Skip to content

Commit 6ee2585

Browse files
authored
Merge pull request #177 from openstates/use-dag-start-run-time
Use DAG run start time by default
2 parents a656b95 + ebb2693 commit 6ee2585

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 6.22.3 - May 14, 2025
4+
* Use DAG run start time for archiving scrape output
5+
36
## 6.22.2 - Apr 25, 2025
47
* Bug fix due to metadata error during import
58

openstates/cli/update.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
GCP_PROJECT = os.environ.get("GCP_PROJECT", None)
3434
BUCKET_NAME = os.environ.get("BUCKET_NAME", None)
3535
SCRAPE_LAKE_PREFIX = os.environ.get("BUCKET_PREFIX", "legislation")
36+
DAG_RUN_START = os.environ.get("DAG_RUN_START", None)
3637

3738

3839
class _Unset:
@@ -102,7 +103,7 @@ def do_scrape(
102103
]
103104
)
104105

105-
last_scrape_end_datetime = datetime.datetime.utcnow()
106+
last_scrape_datetime = DAG_RUN_START or datetime.datetime.utcnow().isoformat()
106107
for scraper_name, scrape_args in scrapers.items():
107108
ScraperCls = juris.scrapers[scraper_name]
108109
if (
@@ -131,7 +132,6 @@ def do_scrape(
131132
file_archiving_enabled=args.archive,
132133
)
133134
partial_report = scraper.do_scrape(**scrape_args, session=session)
134-
last_scrape_end_datetime = partial_report["end"]
135135
stats.write_stats(
136136
[
137137
{
@@ -165,7 +165,6 @@ def do_scrape(
165165
file_archiving_enabled=args.archive,
166166
)
167167
report[scraper_name] = scraper.do_scrape(**scrape_args)
168-
last_scrape_end_datetime = report[scraper_name]["end"]
169168
session = scrape_args.get("session", "")
170169
if session:
171170
stats.write_stats(
@@ -201,13 +200,13 @@ def do_scrape(
201200
# optionally upload scrape output to cloud storage
202201
# but do not archive if realtime mode enabled, as realtime mode has its own archiving process
203202
if args.archive and not args.realtime:
204-
archive_to_cloud_storage(datadir, juris, last_scrape_end_datetime)
203+
archive_to_cloud_storage(datadir, juris, last_scrape_datetime)
205204

206205
return report
207206

208207

209208
def archive_to_cloud_storage(
210-
datadir: str, juris: State, last_scrape_end_datetime: datetime.datetime
209+
datadir: str, juris: State, last_scrape_datetime: str
211210
) -> None:
212211
# check if we have necessary settings
213212
if GCP_PROJECT is None or BUCKET_NAME is None:
@@ -224,7 +223,7 @@ def archive_to_cloud_storage(
224223
bucket = cloud_storage_client.bucket(BUCKET_NAME)
225224
jurisdiction_id = juris.jurisdiction_id.replace("ocd-jurisdiction/", "")
226225
destination_prefix = (
227-
f"{SCRAPE_LAKE_PREFIX}/{jurisdiction_id}/{last_scrape_end_datetime.isoformat()}"
226+
f"{SCRAPE_LAKE_PREFIX}/{jurisdiction_id}/{last_scrape_datetime}"
228227
)
229228

230229
# read files in directory and upload

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "openstates"
3-
version = "6.22.2"
3+
version = "6.22.3"
44
description = "core infrastructure for the openstates project"
55
authors = ["James Turk <[email protected]>"]
66
license = "MIT"

0 commit comments

Comments
 (0)