Skip to content

Commit a8b4729

Browse files
authored
Merge pull request #2 from openstates/more-audit-definitions
More audit definitions
2 parents 33fc1bd + 28813a9 commit a8b4729

File tree

10 files changed

+337
-49
lines changed

10 files changed

+337
-49
lines changed

audits/bill.sql

Lines changed: 108 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,111 @@
1-
-- Does bill have sponsors?
1+
-- all bills have an identifier
22
AUDIT (
3-
name assert_bills_have_sponsor,
3+
name assert_bills_have_identifier,
44
blocking false
55
);
6-
SELECT * from scraper.bill
7-
WHERE sponsorships IS NULL;
6+
SELECT * FROM scraper.bill
7+
WHERE identifier IS NULL;
8+
9+
-- all bills have a title
10+
AUDIT (
11+
name assert_bills_have_title,
12+
blocking false
13+
);
14+
SELECT * FROM scraper.bill
15+
WHERE title IS NULL;
16+
17+
-- all bills have sponsors?
18+
AUDIT (
19+
name assert_bills_have_sponsors,
20+
blocking false
21+
);
22+
SELECT * FROM scraper.bill
23+
WHERE len(sponsorships) < 1;
24+
25+
-- all bills have actions?
26+
AUDIT (
27+
name assert_bills_have_actions,
28+
blocking false
29+
);
30+
SELECT * FROM scraper.bill
31+
WHERE len(actions) < 1;
32+
33+
-- all bills have an abstract, exempt USA
34+
AUDIT (
35+
name assert_bills_have_abstracts,
36+
blocking false
37+
);
38+
SELECT * FROM scraper.bill
39+
WHERE len(abstracts) < 1
40+
AND jurisdiction.name != 'United States';
41+
42+
-- all bills have a classification
43+
AUDIT (
44+
name assert_bills_have_classifications,
45+
blocking false
46+
);
47+
SELECT * FROM scraper.bill
48+
WHERE classification IS NULL;
49+
50+
-- all bills have a version, exempt USA
51+
AUDIT (
52+
name assert_bills_have_versions,
53+
blocking false
54+
);
55+
SELECT * FROM scraper.bill
56+
WHERE versions IS NULL
57+
AND jurisdiction.name != 'United States';
58+
59+
-- all bill versions have a note
60+
AUDIT (
61+
name assert_bill_versions_have_note,
62+
blocking false
63+
);
64+
WITH bill_version_exploded AS (
65+
SELECT
66+
_id,
67+
unnest(versions) AS version,
68+
FROM
69+
scraper.bill
70+
)
71+
SELECT * FROM bill_version_exploded
72+
WHERE version.note IS NULL;
73+
74+
-- all bill versions have a non-empty links property
75+
AUDIT (
76+
name assert_bill_versions_have_links,
77+
blocking false
78+
);
79+
WITH bill_version_exploded AS (
80+
SELECT
81+
_id,
82+
unnest(versions) AS version,
83+
FROM
84+
scraper.bill
85+
)
86+
SELECT * FROM bill_version_exploded
87+
WHERE version.links IS NULL
88+
OR len(version.links) < 1;
89+
90+
-- all bill versions have a source document
91+
AUDIT (
92+
name assert_bill_versions_have_source_document,
93+
blocking false
94+
);
95+
WITH bill_version_exploded AS (
96+
SELECT
97+
unnest(versions) AS version
98+
FROM
99+
scraper.bill
100+
),
101+
bill_version_links AS (
102+
SELECT
103+
unnest(version.links) AS link
104+
FROM
105+
bill_version_exploded
106+
)
107+
SELECT *
108+
FROM bill_version_links
109+
WHERE
110+
link.url IS NULL
111+
OR link.media_type IS NULL;

audits/event.sql

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,31 @@
1+
-- all events have sponsors?
12
AUDIT (
23
name assert_events_are_classified,
34
blocking false
45
);
56
SELECT * from scraper.event
67
WHERE classification IS NULL;
8+
9+
-- all events have sources?
10+
AUDIT (
11+
name assert_events_have_sources,
12+
blocking false
13+
);
14+
SELECT * FROM scraper.event
15+
WHERE len(sources) < 1;
16+
17+
-- all events have participants?
18+
AUDIT (
19+
name assert_events_have_participants,
20+
blocking false
21+
);
22+
SELECT * FROM scraper.event
23+
WHERE len(participants) < 1;
24+
25+
-- all events have start dates?
26+
AUDIT (
27+
name assert_events_have_start_dates,
28+
blocking false
29+
);
30+
SELECT * FROM scraper.event
31+
WHERE start_date IS NULL;

audits/vote_event.sql

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
-- all vote events have bill?
2+
AUDIT (
3+
name assert_vote_events_have_bill,
4+
blocking false
5+
);
6+
SELECT * from scraper.vote_event
7+
WHERE bill IS NULL;
8+
9+
-- all vote events have bill identifier?
10+
AUDIT (
11+
name assert_vote_events_have_bill_identifier,
12+
blocking false
13+
);
14+
SELECT * from scraper.vote_event
15+
WHERE bill_identifier IS NULL;
16+
17+
-- all vote events have start date?
18+
AUDIT (
19+
name assert_vote_events_have_start_date,
20+
blocking false
21+
);
22+
SELECT * from scraper.vote_event
23+
WHERE start_date IS NULL;
24+
25+
-- all vote events have result?
26+
AUDIT (
27+
name assert_vote_events_have_result,
28+
blocking false
29+
);
30+
SELECT * from scraper.vote_event
31+
WHERE result IS NULL;
32+
33+
-- all vote events have legislative session?
34+
AUDIT (
35+
name assert_vote_events_have_legislative_session,
36+
blocking false
37+
);
38+
SELECT * from scraper.vote_event
39+
WHERE legislative_session IS NULL;
40+
41+
-- all vote events have motion text?
42+
AUDIT (
43+
name assert_vote_events_have_motion_text,
44+
blocking false
45+
);
46+
SELECT * from scraper.vote_event
47+
WHERE motion_text IS NULL;
48+
49+
-- all vote events have organization?
50+
AUDIT (
51+
name assert_vote_events_have_organization,
52+
blocking false
53+
);
54+
SELECT * from scraper.vote_event
55+
WHERE organization IS NULL;
56+
57+
-- all vote events have motion classification?
58+
AUDIT (
59+
name assert_vote_events_have_motion_classification,
60+
blocking false
61+
);
62+
SELECT * FROM scraper.vote_event
63+
WHERE len(motion_classification) < 1;
64+
65+
-- all vote events count have four options?
66+
AUDIT (
67+
name assert_vote_events_count_have_four_options,
68+
blocking false
69+
);
70+
SELECT * FROM scraper.vote_event
71+
WHERE len(count) = 4;

main.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,17 +19,17 @@
1919
parser.add_argument(
2020
"--entity",
2121
"-e",
22-
required=True,
23-
choices=["bill", "event"],
22+
choices=["bill", "event", "vote_event"],
2423
type=str,
2524
help="Entity type: bill or event",
2625
)
2726

2827
args = parser.parse_args()
2928
entity = args.entity
3029
jurisdiction = args.jurisdiction
31-
report = sqlmesh_plan(entity, jurisdiction)
32-
if report:
33-
print("Audit failed:\n", report)
30+
31+
if entity:
32+
entities = [entity]
3433
else:
35-
print("Audit passed.")
34+
entities = ["bill", "event", "vote_event"]
35+
report = sqlmesh_plan(entities, jurisdiction)

models/bill.sql

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,21 @@
11
MODEL (
22
name staged.bill,
33
kind FULL,
4-
start '2024-04-24',
4+
start '2025-01-01',
55
cron '0 5 * * *',
66
interval_unit 'day',
77
grain (id),
88
audits (
9-
assert_bills_have_sponsor,
9+
assert_bills_have_sponsors,
10+
assert_bills_have_abstracts,
11+
assert_bills_have_classifications,
12+
assert_bills_have_actions,
13+
assert_bills_have_versions,
14+
assert_bills_have_title,
15+
assert_bills_have_identifier,
16+
assert_bill_versions_have_note,
17+
assert_bill_versions_have_links,
18+
assert_bill_versions_have_source_document,
1019
),
1120
);
1221

models/event.sql

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
11
MODEL (
22
name staged.event,
33
kind FULL,
4-
start '2024-04-24',
4+
start '2025-01-01',
55
cron '0 5 * * *',
66
interval_unit 'day',
77
grains (jurisdiction_id, start_date, 'name'),
8-
audits (assert_events_are_classified),
8+
audits (assert_events_are_classified,
9+
assert_events_have_start_dates,
10+
assert_events_have_participants,
11+
assert_events_have_sources,
12+
),
913
);
1014

1115
SELECT

models/vote_event.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
MODEL (
2+
name staged.vote_event,
3+
kind FULL,
4+
start '2025-01-01',
5+
cron '0 5 * * *',
6+
interval_unit 'day',
7+
grains (identifier, start_date),
8+
audits (
9+
assert_vote_events_have_bill,
10+
assert_vote_events_have_bill_identifier,
11+
assert_vote_events_have_start_date,
12+
assert_vote_events_have_result,
13+
assert_vote_events_have_legislative_session,
14+
assert_vote_events_have_motion_text,
15+
assert_vote_events_have_organization,
16+
assert_vote_events_have_motion_classification,
17+
),
18+
);
19+
20+
SELECT
21+
identifier::TEXT AS identifier,
22+
motion_text::TEXT AS motion_text,
23+
motion_classification::TEXT[] AS motion_classification,
24+
NULLIF(start_date, '')::TIMESTAMP AS start_date,
25+
result::TEXT AS result,
26+
organization::TEXT AS organization,
27+
legislative_session::TEXT AS legislative_session,
28+
bill::TEXT AS bill,
29+
bill_action::TEXT AS bill_action,
30+
bill_identifier::TEXT AS bill_identifier,
31+
votes::JSON AS votes,
32+
counts::JSON AS counts,
33+
sources::JSON AS sources,
34+
extras::JSON AS extras,
35+
_id::TEXT AS _id
36+
FROM
37+
scraper.vote_event;

schema.yaml

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
- name: '"db"."scraper"."bills"'
1+
- name: '"db"."scraper"."bill"'
22
columns:
33
legislative_session: TEXT
44
identifier: TEXT
@@ -24,7 +24,7 @@
2424
_id: UUID
2525
jurisdictions: STRUCT(id TEXT, name TEXT, classification TEXT, division_id TEXT)
2626
scraped_at: TIMESTAMP
27-
- name: '"db"."scraper"."events"'
27+
- name: '"db"."scraper"."event"'
2828
columns:
2929
name: TEXT
3030
all_day: BOOLEAN
@@ -45,3 +45,20 @@
4545
jurisdiction: STRUCT(id TEXT, name TEXT, classification TEXT, division_id TEXT)
4646
scraped_at: TIMESTAMP
4747
_id: UUID
48+
- name: '"db"."scraper"."vote_event"'
49+
columns:
50+
identifier: TEXT
51+
motion_text: TEXT
52+
motion_classification: TEXT[]
53+
start_date: TIMESTAMP
54+
result: TEXT
55+
organization: JSON
56+
legislative_session: TEXT
57+
bill: JSON
58+
bill_action: TEXT
59+
bill_identifier: TEXT
60+
votes: STRUCT(option TEXT, voter_name TEXT, voter_id JSON, note TEXT)[]
61+
counts: STRUCT(option TEXT, value INTEGER)[]
62+
sources: STRUCT(url TEXT, note TEXT)[]
63+
extras: JSON
64+
_id: UUID

0 commit comments

Comments
 (0)