Skip to content

Commit 005c2a0

Browse files
z103cbastefanutti
authored andcommitted
CARRY: Add RHOAI manifests (#3)
1 parent ed82cdb commit 005c2a0

8 files changed

+164
-0
lines changed
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#This file has been copied from ../overlays/kubeflow
2+
#The original labels have ben commented out for documentation purposes
3+
apiVersion: rbac.authorization.k8s.io/v1
4+
kind: ClusterRole
5+
metadata:
6+
name: training-edit
7+
labels:
8+
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
9+
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
10+
rbac.authorization.k8s.io/aggregate-to-edit: "true"
11+
rbac.authorization.k8s.io/aggregate-to-admin: "true"
12+
rules:
13+
- apiGroups:
14+
- kubeflow.org
15+
resources:
16+
- mpijobs
17+
- tfjobs
18+
- pytorchjobs
19+
- mxjobs
20+
- xgboostjobs
21+
- paddlejobs
22+
verbs:
23+
- create
24+
- delete
25+
- get
26+
- list
27+
- patch
28+
- update
29+
- watch
30+
- apiGroups:
31+
- kubeflow.org
32+
resources:
33+
- mpijobs/status
34+
- tfjobs/status
35+
- pytorchjobs/status
36+
- mxjobs/status
37+
- xgboostjobs/status
38+
- paddlejobs/status
39+
verbs:
40+
- get
41+
---
42+
apiVersion: rbac.authorization.k8s.io/v1
43+
kind: ClusterRole
44+
metadata:
45+
name: training-view
46+
labels:
47+
# rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
48+
rbac.authorization.k8s.io/aggregate-to-view: "true"
49+
rules:
50+
- apiGroups:
51+
- kubeflow.org
52+
resources:
53+
- mpijobs
54+
- tfjobs
55+
- pytorchjobs
56+
- mxjobs
57+
- xgboostjobs
58+
- paddlejobs
59+
verbs:
60+
- get
61+
- list
62+
- watch
63+
- apiGroups:
64+
- kubeflow.org
65+
resources:
66+
- mpijobs/status
67+
- tfjobs/status
68+
- pytorchjobs/status
69+
- mxjobs/status
70+
- xgboostjobs/status
71+
- paddlejobs/status
72+
verbs:
73+
- get

manifests/rhoai/kustomization.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
# RHOAI configuration for Kubeflow Training Operator (KFTO)
2+
3+
# Adds namespace to all resources.
4+
namespace: opendatahub
5+
6+
# Value of this field is prepended to the
7+
# names of all resources, e.g. a deployment named
8+
# "wordpress" becomes "alices-wordpress".
9+
# Note that it should also match with the prefix (text before '-') of the namespace
10+
# field above.
11+
namePrefix: kubeflow-
12+
13+
configMapGenerator:
14+
- name: rhoai-config
15+
envs:
16+
- params.env
17+
18+
configurations:
19+
- params.yaml
20+
21+
vars:
22+
- name: image
23+
objref:
24+
kind: ConfigMap
25+
name: rhoai-config
26+
apiVersion: v1
27+
fieldref:
28+
fieldpath: data.odh-training-operator-controller-image
29+
30+
# Labels to add to all resources and selectors.
31+
commonLabels:
32+
app.kubernetes.io/name: training-operator
33+
app.kubernetes.io/component: controller
34+
35+
resources:
36+
- ../base
37+
- kubeflow-training-roles.yaml
38+
- monitor.yaml
39+
40+
patches:
41+
# Mount the controller config file for loading manager configurations
42+
# through a ComponentConfig type
43+
- path: manager_config_patch.yaml
44+
- path: manager_metrics_patch.yaml
45+
- path: manager_delete_metrics_service_patch.yaml
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: training-operator
5+
spec:
6+
template:
7+
spec:
8+
containers:
9+
- name: training-operator
10+
image: $(image)
11+
args:
12+
- "--zap-log-level=2"
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# Delete the service created in base
2+
$patch: delete
3+
apiVersion: v1
4+
kind: Service
5+
metadata:
6+
name: training-operator
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: training-operator
5+
spec:
6+
template:
7+
spec:
8+
containers:
9+
- name: training-operator
10+
ports:
11+
- containerPort: 8080
12+
name: metrics

manifests/rhoai/monitor.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Prometheus Pod Monitor (Metrics)
2+
apiVersion: monitoring.coreos.com/v1
3+
kind: PodMonitor
4+
metadata:
5+
name: training-operator-metrics-monitor
6+
spec:
7+
selector:
8+
matchLabels:
9+
app.kubernetes.io/name: training-operator
10+
app.kubernetes.io/component: controller
11+
podMetricsEndpoints:
12+
- port: metrics

manifests/rhoai/params.env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
odh-training-operator-controller-image=docker.io/kubeflow/training-operator:v1-855e096

manifests/rhoai/params.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
varReference:
2+
- path: spec/template/spec/containers[]/image
3+
kind: Deployment

0 commit comments

Comments
 (0)