Skip to content

Commit 7fdd35d

Browse files
authored
Merge pull request #3 from openstates/build-docker-image
Build docker image
2 parents a8b4729 + 342256c commit 7fdd35d

File tree

5 files changed

+114
-2
lines changed

5 files changed

+114
-2
lines changed

.dockerignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.cache/
2+
_data/
3+
logs/
4+
db.db
5+
*.pyc
6+
__pycache__/
7+
*.md
8+
.git/

.github/workflows/docker.yml

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
name: Build and Push Docker Image
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
tags:
8+
- '*'
9+
jobs:
10+
publish:
11+
name: Build and Push to Docker Hub
12+
runs-on: ubuntu-latest
13+
steps:
14+
- uses: actions/checkout@v3
15+
- name: Log in to Docker Hub
16+
uses: docker/login-action@v2
17+
with:
18+
username: ${{ secrets.DOCKER_USERNAME }}
19+
password: ${{ secrets.DOCKER_PASSWORD }}
20+
- name: Build and Push Docker Image
21+
uses: docker/build-push-action@v3
22+
with:
23+
context: .
24+
tags: "openstates/scraper-audit:latest,openstates/scraper-audit:${{ github.sha }}"
25+
push: true
26+
publish-plural:
27+
runs-on: ubuntu-latest
28+
steps:
29+
- uses: actions/checkout@v3
30+
- name: Authenticate with Google Cloud
31+
uses: google-github-actions/auth@v2
32+
with:
33+
project_id: civic-eagle-enview-prod
34+
credentials_json: "${{ secrets.PLURAL_OPEN_REPO_PUSHER_KEY }}"
35+
- name: Setup Google Cloud SDK
36+
uses: "google-github-actions/setup-gcloud@v2"
37+
- name: Docker Auth
38+
run: gcloud auth configure-docker us-central1-docker.pkg.dev --quiet
39+
- name: Build and Push Docker Image
40+
uses: docker/build-push-action@v3
41+
with:
42+
context: .
43+
tags: "us-central1-docker.pkg.dev/civic-eagle-enview-prod/open-containers/scraper-audit:latest,us-central1-docker.pkg.dev/civic-eagle-enview-prod/open-containers/scraper-audit:${{ github.sha }}"
44+
push: true

Dockerfile

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
FROM python:3.11-slim
2+
3+
ENV PYTHONUNBUFFERED=1
4+
ENV PYTHONDONTWRITEBYTECODE=1
5+
ENV PYTHONIOENCODING='utf-8'
6+
7+
# set the workdir directory to the environment so bash can find it
8+
ENV WORKDIR='/app'
9+
WORKDIR /app
10+
11+
# disable virtualenv creation to help save some space and slightly speedup startup
12+
RUN apt-get update -qq \
13+
&& DEBIAN_FRONTEND=noninteractive apt-get install -y -qq --no-install-recommends \
14+
&& pip install --disable-pip-version-check --no-cache-dir -q wheel \
15+
&& pip install --disable-pip-version-check --no-cache-dir -q poetry crcmod \
16+
&& poetry config virtualenvs.create false
17+
18+
# Copy poetry files
19+
COPY pyproject.toml /app
20+
COPY poetry.lock /app
21+
22+
23+
# install dependencies before copying files in, this makes the install step *slightly* faster, and makes re-builds significantly faster
24+
RUN poetry install --no-root \
25+
&& rm -r /root/.cache/pypoetry/cache /root/.cache/pypoetry/artifacts/ \
26+
&& apt-get remove -y -qq \
27+
build-essential \
28+
libpq-dev \
29+
&& apt-get autoremove -y -qq \
30+
&& apt-get clean \
31+
&& rm -rf /var/lib/apt/lists/*
32+
33+
# Copy application code
34+
COPY . /app
35+
36+
ENTRYPOINT ["poetry", "run", "python", "main.py"]

README.md

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,4 +75,29 @@ Updating virtual layer ━━━━━━━━━━━━━━━━━━
7575

7676
Audit failed:
7777
[WARNING] staged.bill: 'assert_bills_have_sponsor' audit error: 1179 rows failed. Learn more in logs: ~/scraper-audit/logs/sqlmesh_2025_05_06_20_58_30.log
78-
```
78+
```
79+
80+
### Docker Usage
81+
82+
To run the scraper-audit using Docker, you can build the image locally and execute it.
83+
Ensure your data directory is accessible to Docker.
84+
For example, if your JSON files are in a local `_data` folder:
85+
86+
#### Build the Docker Image
87+
88+
From the root of the project run
89+
```bash
90+
docker build -t scraper-audit .
91+
```
92+
93+
#### Run the Container
94+
95+
Assuming your JSON data is in a local _data directory run
96+
97+
```bash
98+
docker run --rm -v ./_data:/app/_data scraper-audit --entity "bill"
99+
```
100+
101+
Note: The `--entity` flag is optional.
102+
If provided, it can be set to `bill`, `event`, or `vote_event` to audit a specific entity.
103+
If omitted, audits will run for all entities.

main.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import argparse
22

3-
43
from sqlmesh_tasks import sqlmesh_plan
54

65
if __name__ == "__main__":

0 commit comments

Comments
 (0)