Skip to content

Commit b3d8af0

Browse files
authored
RHOAIENG-16076: tests(gha): run Makefile tests on opendatahub-io/notebooks Github Actions (#775)
* RHOAIENG-16076: tests(gha): run Makefile tests in GitHub Actions * fixup, looks like I lost the second changed line from #761 (comment) when merging the work * fixup, linter wants space in the comments; IntelliJ is ok with it, so let's do that * fixup, add reference to OpenShift CI for the source of the make invocations * fixup, the ifNotPresent pull policy (for PR checks without image registry) and the symbolic links apparently needed to deploy rocm stuff
1 parent fb6e1b9 commit b3d8af0

File tree

7 files changed

+309
-11
lines changed

7 files changed

+309
-11
lines changed

.github/workflows/build-notebooks-TEMPLATE.yaml

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ jobs:
4646
username: ${{ github.actor }}
4747
password: ${{ secrets.GITHUB_TOKEN }}
4848

49+
# region Free up disk space
50+
4951
- name: Free up additional disk space
5052
# https://docs.github.com/en/actions/learn-github-actions/expressions
5153
if: "${{ contains(inputs.target, 'rocm') || contains(inputs.target, 'cuda') || contains(inputs.target, 'intel') ||
@@ -86,6 +88,10 @@ jobs:
8688
df -h
8789
free -h
8890
91+
# endregion
92+
93+
# region Podman setup
94+
8995
# https://github.com/containers/buildah/issues/2521#issuecomment-884779112
9096
- name: Workaround https://github.com/containers/podman/issues/22152#issuecomment-2027705598
9197
run: sudo apt-get -qq remove podman crun
@@ -156,6 +162,10 @@ jobs:
156162
echo "IMAGE_TAG=${IMAGE_TAG}" >> "$GITHUB_OUTPUT"
157163
echo "OUTPUT_IMAGE=${{ env.IMAGE_REGISTRY}}:${{ inputs.target }}-${IMAGE_TAG}" >> "$GITHUB_OUTPUT"
158164
165+
# endregion
166+
167+
# region Trivy init & DB pre-pull
168+
159169
- name: "pull_request|schedule: resolve target if Trivy scan should run"
160170
id: resolve-target
161171
if: ${{ fromJson(inputs.github).event_name == 'pull_request' || fromJson(inputs.github).event_name == 'schedule' }}
@@ -210,6 +220,10 @@ jobs:
210220
image \
211221
--download-java-db-only
212222
223+
# endregion
224+
225+
# region Image build
226+
213227
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#push
214228
- name: "push|schedule: make ${{ inputs.target }}"
215229
run: |
@@ -235,10 +249,34 @@ jobs:
235249
- name: "Show podman images information"
236250
run: podman images --digests
237251

252+
# endregion
253+
254+
# region Makefile image tests
255+
238256
- name: "Check if we have tests or not"
239257
id: have-tests
240258
run: "ci/cached-builds/has_tests.py --target ${{ inputs.target }}"
241259

260+
- name: "Change pull policy to IfNotPresent"
261+
run: |
262+
set -Eeuxo pipefail
263+
264+
find . \( -name "statefulset.yaml" -o -name "pod.yaml" \) -type f -exec \
265+
sed -i'' 's/imagePullPolicy: Always/imagePullPolicy: IfNotPresent/g' {} \;
266+
git diff
267+
268+
# [INFO] Running command (('make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.11',), {'shell': True})
269+
# Deploying notebook from runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base directory...
270+
# sed: can't read runtimes/rocm/tensorflow/ubi9-python-3.11/kustomize/base/kustomization.yaml: No such file or directory
271+
- name: "Fixup paths that prevent us from running rocm tests"
272+
if: ${{ steps.have-tests.outputs.tests == 'true' }}
273+
run: |
274+
set -Eeuxo pipefail
275+
276+
mkdir -p runtimes/rocm
277+
ln -s ../rocm-tensorflow runtimes/rocm/tensorflow
278+
ln -s ../rocm-pytorch runtimes/rocm/pytorch
279+
242280
# https://cri-o.io/
243281
- name: Install cri-o
244282
if: ${{ steps.have-tests.outputs.tests == 'true' }}
@@ -288,11 +326,11 @@ jobs:
288326
289327
# do this early, it's a good check that cri-o is not completely broken
290328
- name: "Show crio images information"
291-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
329+
if: ${{ steps.have-tests.outputs.tests == 'true' }}
292330
run: sudo crictl images
293331

294332
- name: Install Kubernetes cluster
295-
if: ${{ steps.have-tests.outputs.tests == 'true' }}
333+
if: ${{ steps.have-tests.outputs.tests == 'true' }}
296334
run: |
297335
set -Eeuxo pipefail
298336
@@ -350,6 +388,18 @@ jobs:
350388
kubectl wait deployments --all --all-namespaces --for=condition=Available --timeout=100s
351389
kubectl wait pods --all --all-namespaces --for=condition=Ready --timeout=100s
352390
391+
- name: "Run image tests"
392+
if: ${{ steps.have-tests.outputs.tests == 'true' }}
393+
run: python3 ci/cached-builds/make_test.py --target ${{ inputs.target }}
394+
env:
395+
IMAGE_TAG: "${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
396+
# for make deploy, mandatory to specify for the more exotic cases
397+
NOTEBOOK_TAG: "${{ inputs.target }}-${{ steps.calculated_vars.outputs.IMAGE_TAG }}"
398+
399+
# endregion
400+
401+
# region Trivy vulnerability scan
402+
353403
- name: Run Trivy vulnerability scanner
354404
if: ${{ steps.resolve-target.outputs.target }}
355405
run: |
@@ -391,6 +441,10 @@ jobs:
391441
392442
cat $REPORT_FOLDER/$REPORT_FILE >> $GITHUB_STEP_SUMMARY
393443
444+
# endregion
445+
446+
# region Typescript (browser) image tests
447+
394448
# https://playwright.dev/docs/ci
395449
# https://playwright.dev/docs/docker
396450
# we leave little free disk space after we mount LVM for podman storage
@@ -436,5 +490,7 @@ jobs:
436490
path: tests/browser/playwright-report/
437491
retention-days: 30
438492

493+
# endregion
494+
439495
- run: df -h
440496
if: "${{ !cancelled() }}"

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ validate-runtime-image: bin/kubectl
553553
fi; \
554554
if [ $$cmd == "python3" ]; then \
555555
echo "=> Checking notebook execution..." ; \
556-
$(KUBECTL_BIN) exec runtime-pod -- /bin/sh -c "curl https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt --output req.txt && \
556+
$(KUBECTL_BIN) exec runtime-pod -- /bin/sh -c "curl https://raw.githubusercontent.com/opendatahub-io/elyra/refs/heads/main/etc/generic/requirements-elyra.txt --output req.txt && \
557557
python3 -m pip install -r req.txt > /dev/null && \
558558
curl https://raw.githubusercontent.com/nteract/papermill/main/papermill/tests/notebooks/simple_execute.ipynb --output simple_execute.ipynb && \
559559
python3 -m papermill simple_execute.ipynb output.ipynb > /dev/null" ; \

ci/cached-builds/make_test.py

Lines changed: 242 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,242 @@
1+
#!/usr/bin/env python3
2+
import argparse
3+
import contextlib
4+
import functools
5+
import re
6+
import subprocess
7+
import sys
8+
import time
9+
import typing
10+
import unittest
11+
import unittest.mock
12+
13+
"""Runs the make commands used to deploy, test, and undeploy image in Kubernetes
14+
15+
The make commands this runs are intended to reproduce the commands we define in our OpenShift CI config at
16+
https://github.com/openshift/release/blob/master/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml#L1485
17+
"""
18+
19+
20+
class Args(argparse.Namespace):
21+
"""Type annotation to have autocompletion for args"""
22+
target: str
23+
24+
25+
def main() -> None:
26+
parser = argparse.ArgumentParser("make_test.py")
27+
parser.add_argument("--target", type=str)
28+
args = typing.cast(Args, parser.parse_args())
29+
30+
run_tests(args.target)
31+
32+
33+
def run_tests(target: str) -> None:
34+
prefix = target.translate(str.maketrans(".", "-"))
35+
# this is a pod name in statefulset, some tests deploy individual unmanaged pods, though
36+
pod = prefix + "-notebook-0" # `$(kubectl get statefulset -o name | head -n 1)` would work too
37+
namespace = "ns-" + prefix
38+
39+
if target.startswith("runtime-"):
40+
deploy = "deploy9"
41+
deploy_target = target.replace("runtime-", "runtimes-")
42+
elif target.startswith("intel-runtime-"):
43+
deploy = "deploy9"
44+
deploy_target = target.replace("intel-runtime-", "intel-runtimes-")
45+
elif target.startswith("rocm-runtime-"):
46+
deploy = "deploy9"
47+
deploy_target = target.replace("rocm-runtime-", "runtimes-rocm-")
48+
elif target.startswith("rocm-jupyter-"):
49+
deploy = "deploy9"
50+
deploy_target = target.replace("rocm-jupyter-", "jupyter-rocm-")
51+
elif target.startswith("cuda-rstudio-"):
52+
deploy = "deploy"
53+
os = re.match(r"^cuda-rstudio-([^-]+-).*", target)
54+
deploy_target = os.group(1) + target.removeprefix("cuda-")
55+
elif target.startswith("rstudio-"):
56+
deploy = "deploy"
57+
os = re.match(r"^rstudio-([^-]+-).*", target)
58+
deploy_target = os.group(1) + target
59+
else:
60+
deploy = "deploy9"
61+
deploy_target = target
62+
63+
check_call(f"kubectl create namespace {namespace}", shell=True)
64+
check_call(f"kubectl config set-context --current --namespace={namespace}", shell=True)
65+
check_call(f"kubectl label namespace {namespace} fake-scc=fake-restricted-v2", shell=True)
66+
67+
# wait for service account to be created, otherwise pod is refused to be created
68+
# $ bin/kubectl apply -k runtimes/minimal/ubi9-python-3.9/kustomize/base
69+
# configmap/runtime-req-config-9hhb2bhhmd created
70+
# Error from server (Forbidden): error when creating "runtimes/minimal/ubi9-python-3.9/kustomize/base": pods "runtime-pod" is forbidden: error looking up service account ns-runtime-minimal-ubi9-python-3-9/default: serviceaccount "default" not found
71+
# See https://github.com/kubernetes/kubernetes/issues/66689
72+
check_call(f"timeout 10s bash -c 'until kubectl get serviceaccount/default; do sleep 1; done'", shell=True)
73+
74+
check_call(f"make {deploy}-{deploy_target}", shell=True)
75+
wait_for_stability(pod)
76+
77+
try:
78+
if target.startswith("runtime-") or target.startswith("intel-runtime-"):
79+
check_call(f"make validate-runtime-image image={target}", shell=True)
80+
elif target.startswith("rocm-runtime-"):
81+
check_call(f"make validate-runtime-image image={target
82+
.replace("rocm-runtime-", "runtime-rocm-")}", shell=True)
83+
elif target.startswith("rstudio-") or target.startswith("cuda-rstudio-"):
84+
check_call(f"make validate-rstudio-image image={target}", shell=True)
85+
elif target.startswith("codeserver-"):
86+
check_call(f"make validate-codeserver-image image={target}", shell=True)
87+
elif target.startswith("rocm-jupyter"):
88+
check_call(f"make test-{target
89+
.replace("rocm-jupyter-", "jupyter-rocm-")}", shell=True)
90+
else:
91+
check_call(f"make test-{target}", shell=True)
92+
finally:
93+
# dump a lot of info to the GHA logs
94+
with gha_log_group("pod and statefulset info"):
95+
call(f"kubectl get statefulsets", shell=True)
96+
call(f"kubectl describe statefulsets", shell=True)
97+
call(f"kubectl get pods", shell=True)
98+
call(f"kubectl describe pods", shell=True)
99+
# describe does not show everything about the pod
100+
call(f"kubectl get pods -o yaml", shell=True)
101+
102+
with gha_log_group("kubernetes namespace events"):
103+
# events aren't all that useful, but it can tell what was happening in the current namespace
104+
call(f"kubectl get events", shell=True)
105+
106+
with gha_log_group("previous pod logs"):
107+
# relevant if the pod is crashlooping, this shows the final lines
108+
# use the negative label selector as a trick to match all pods (as we don't have any pods with nosuchlabel)
109+
call(f"kubectl logs --selector=nosuchlabel!=nosuchvalue --all-pods --timestamps --previous", shell=True)
110+
with gha_log_group("current pod logs"):
111+
# regular logs from a running (or finished) pod
112+
call(f"kubectl logs --selector=nosuchlabel!=nosuchvalue --all-pods --timestamps", shell=True)
113+
114+
check_call(f"make un{deploy}-{deploy_target}", shell=True)
115+
116+
print(f"[INFO] Finished testing {target}")
117+
118+
119+
@functools.wraps(subprocess.check_call)
120+
def check_call(*args, **kwargs) -> int:
121+
return execute(subprocess.check_call, args, kwargs)
122+
123+
124+
@functools.wraps(subprocess.call)
125+
def call(*args, **kwargs) -> int:
126+
return execute(subprocess.call, args, kwargs)
127+
128+
129+
def execute(executor: typing.Callable, args: tuple, kwargs: dict) -> int:
130+
print(f"[INFO] Running command {args, kwargs}")
131+
sys.stdout.flush()
132+
result = executor(*args, **kwargs)
133+
print(f"\tDONE running command {args, kwargs}")
134+
sys.stdout.flush()
135+
return result
136+
137+
138+
# TODO(jdanek) this is a dumb impl, needs to be improved
139+
def wait_for_stability(pod: str) -> None:
140+
"""Waits for the pod to be stable. Often I'm seeing that the probes initially fail.
141+
> error: Internal error occurred: error executing command in container: container is not created or running
142+
> error: unable to upgrade connection: container not found ("notebook")
143+
"""
144+
timeout = 100
145+
for _ in range(3):
146+
call(
147+
f"timeout {timeout}s bash -c 'until kubectl wait --for=condition=Ready pods --all --timeout 5s; do sleep 1; done'", shell=True)
148+
timeout = 50
149+
time.sleep(3)
150+
151+
152+
# https://docs.github.com/en/actions/writing-workflows/choosing-what-your-workflow-does/workflow-commands-for-github-actions#grouping-log-lines
153+
@contextlib.contextmanager
154+
def gha_log_group(title):
155+
"""Prints the starting and ending magic strings for GitHub Actions line group in log."""
156+
print(f"::group::{title}", file=sys.stdout)
157+
sys.stdout.flush()
158+
try:
159+
yield
160+
finally:
161+
print("::endgroup::", file=sys.stdout)
162+
sys.stdout.flush()
163+
164+
165+
# https://docs.python.org/3/library/unittest.mock-examples.html#patch-decorators
166+
@unittest.mock.patch("time.sleep", unittest.mock.Mock())
167+
class TestMakeTest(unittest.TestCase):
168+
@unittest.mock.patch("make_test.execute")
169+
def test_make_commands_jupyter(self, mock_execute: unittest.mock.Mock) -> None:
170+
"""Compares the commands with what we had in the openshift/release yaml"""
171+
run_tests("jupyter-minimal-ubi9-python-3.11")
172+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
173+
assert "make deploy9-jupyter-minimal-ubi9-python-3.11" in commands
174+
assert "make test-jupyter-minimal-ubi9-python-3.11" in commands
175+
assert "make undeploy9-jupyter-minimal-ubi9-python-3.11" in commands
176+
177+
@unittest.mock.patch("make_test.execute")
178+
def test_make_commands_jupyter_rocm(self, mock_execute: unittest.mock.Mock) -> None:
179+
"""Compares the commands with what we had in the openshift/release yaml"""
180+
run_tests("rocm-jupyter-tensorflow-ubi9-python-3.11")
181+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
182+
assert "make deploy9-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands
183+
assert "make test-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands
184+
assert "make undeploy9-jupyter-rocm-tensorflow-ubi9-python-3.11" in commands
185+
186+
@unittest.mock.patch("make_test.execute")
187+
def test_make_commands_codeserver(self, mock_execute: unittest.mock.Mock) -> None:
188+
"""Compares the commands with what we had in the openshift/release yaml"""
189+
run_tests("codeserver-ubi9-python-3.11")
190+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
191+
assert "make deploy9-codeserver-ubi9-python-3.11" in commands
192+
assert "make validate-codeserver-image image=codeserver-ubi9-python-3.11" in commands
193+
assert "make undeploy9-codeserver-ubi9-python-3.11" in commands
194+
195+
@unittest.mock.patch("make_test.execute")
196+
def test_make_commands_rstudio(self, mock_execute: unittest.mock.Mock) -> None:
197+
"""Compares the commands with what we had in the openshift/release yaml"""
198+
run_tests("rstudio-c9s-python-3.11")
199+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
200+
assert "make deploy-c9s-rstudio-c9s-python-3.11" in commands
201+
assert "make validate-rstudio-image image=rstudio-c9s-python-3.11" in commands
202+
assert "make undeploy-c9s-rstudio-c9s-python-3.11" in commands
203+
204+
@unittest.mock.patch("make_test.execute")
205+
def test_make_commands_cuda_rstudio(self, mock_execute: unittest.mock.Mock) -> None:
206+
"""Compares the commands with what we had in the openshift/release yaml"""
207+
run_tests("cuda-rstudio-c9s-python-3.11")
208+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
209+
assert "make deploy-c9s-rstudio-c9s-python-3.11" in commands
210+
assert "make validate-rstudio-image image=cuda-rstudio-c9s-python-3.11" in commands
211+
assert "make undeploy-c9s-rstudio-c9s-python-3.11" in commands
212+
213+
@unittest.mock.patch("make_test.execute")
214+
def test_make_commands_runtime(self, mock_execute: unittest.mock.Mock) -> None:
215+
"""Compares the commands with what we had in the openshift/release yaml"""
216+
run_tests("runtime-datascience-ubi9-python-3.11")
217+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
218+
assert "make deploy9-runtimes-datascience-ubi9-python-3.11" in commands
219+
assert "make validate-runtime-image image=runtime-datascience-ubi9-python-3.11" in commands
220+
assert "make undeploy9-runtimes-datascience-ubi9-python-3.11" in commands
221+
222+
@unittest.mock.patch("make_test.execute")
223+
def test_make_commands_intel_runtime(self, mock_execute: unittest.mock.Mock) -> None:
224+
"""Compares the commands with what we had in the openshift/release yaml"""
225+
run_tests("intel-runtime-ml-ubi9-python-3.11")
226+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
227+
assert "make deploy9-intel-runtimes-ml-ubi9-python-3.11" in commands
228+
assert "make validate-runtime-image image=intel-runtime-ml-ubi9-python-3.11" in commands
229+
assert "make undeploy9-intel-runtimes-ml-ubi9-python-3.11" in commands
230+
231+
@unittest.mock.patch("make_test.execute")
232+
def test_make_commands_rocm_runtime(self, mock_execute: unittest.mock.Mock) -> None:
233+
"""Compares the commands with what we had in the openshift/release yaml"""
234+
run_tests("rocm-runtime-pytorch-ubi9-python-3.11")
235+
commands: list[str] = [c[0][1][0] for c in mock_execute.call_args_list]
236+
assert "make deploy9-runtimes-rocm-pytorch-ubi9-python-3.11" in commands
237+
assert "make validate-runtime-image image=runtime-rocm-pytorch-ubi9-python-3.11" in commands
238+
assert "make undeploy9-runtimes-rocm-pytorch-ubi9-python-3.11" in commands
239+
240+
241+
if __name__ == "__main__":
242+
main()

jupyter/rocm/pytorch/ubi9-python-3.11/kustomize/base/kustomization.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
---
22
apiVersion: kustomize.config.k8s.io/v1beta1
33
kind: Kustomization
4-
namePrefix: rocm-jupyter-pytorch-ubi9-python-3-11-
4+
namePrefix: jupyter-rocm-pytorch-ubi9-python-3-11-
55
commonLabels:
6-
app: rocm-jupyter-pytorch-ubi9-python-3-11
6+
app: jupyter-rocm-pytorch-ubi9-python-3-11
77
resources:
88
- service.yaml
99
- statefulset.yaml

0 commit comments

Comments
 (0)