File tree 8 files changed +164
-0
lines changed 8 files changed +164
-0
lines changed Original file line number Diff line number Diff line change
1
+ # This file has been copied from ../overlays/kubeflow
2
+ # The original labels have ben commented out for documentation purposes
3
+ apiVersion : rbac.authorization.k8s.io/v1
4
+ kind : ClusterRole
5
+ metadata :
6
+ name : training-edit
7
+ labels :
8
+ # rbac.authorization.kubeflow.org/aggregate-to-kubeflow-edit: "true"
9
+ # rbac.authorization.kubeflow.org/aggregate-to-kubeflow-training-admin: "true"
10
+ rbac.authorization.k8s.io/aggregate-to-edit : " true"
11
+ rbac.authorization.k8s.io/aggregate-to-admin : " true"
12
+ rules :
13
+ - apiGroups :
14
+ - kubeflow.org
15
+ resources :
16
+ - mpijobs
17
+ - tfjobs
18
+ - pytorchjobs
19
+ - mxjobs
20
+ - xgboostjobs
21
+ - paddlejobs
22
+ verbs :
23
+ - create
24
+ - delete
25
+ - get
26
+ - list
27
+ - patch
28
+ - update
29
+ - watch
30
+ - apiGroups :
31
+ - kubeflow.org
32
+ resources :
33
+ - mpijobs/status
34
+ - tfjobs/status
35
+ - pytorchjobs/status
36
+ - mxjobs/status
37
+ - xgboostjobs/status
38
+ - paddlejobs/status
39
+ verbs :
40
+ - get
41
+ ---
42
+ apiVersion : rbac.authorization.k8s.io/v1
43
+ kind : ClusterRole
44
+ metadata :
45
+ name : training-view
46
+ labels :
47
+ # rbac.authorization.kubeflow.org/aggregate-to-kubeflow-view: "true"
48
+ rbac.authorization.k8s.io/aggregate-to-view : " true"
49
+ rules :
50
+ - apiGroups :
51
+ - kubeflow.org
52
+ resources :
53
+ - mpijobs
54
+ - tfjobs
55
+ - pytorchjobs
56
+ - mxjobs
57
+ - xgboostjobs
58
+ - paddlejobs
59
+ verbs :
60
+ - get
61
+ - list
62
+ - watch
63
+ - apiGroups :
64
+ - kubeflow.org
65
+ resources :
66
+ - mpijobs/status
67
+ - tfjobs/status
68
+ - pytorchjobs/status
69
+ - mxjobs/status
70
+ - xgboostjobs/status
71
+ - paddlejobs/status
72
+ verbs :
73
+ - get
Original file line number Diff line number Diff line change
1
+ # RHOAI configuration for Kubeflow Training Operator (KFTO)
2
+
3
+ # Adds namespace to all resources.
4
+ namespace : opendatahub
5
+
6
+ # Value of this field is prepended to the
7
+ # names of all resources, e.g. a deployment named
8
+ # "wordpress" becomes "alices-wordpress".
9
+ # Note that it should also match with the prefix (text before '-') of the namespace
10
+ # field above.
11
+ namePrefix : kubeflow-
12
+
13
+ configMapGenerator :
14
+ - name : rhoai-config
15
+ envs :
16
+ - params.env
17
+
18
+ configurations :
19
+ - params.yaml
20
+
21
+ vars :
22
+ - name : image
23
+ objref :
24
+ kind : ConfigMap
25
+ name : rhoai-config
26
+ apiVersion : v1
27
+ fieldref :
28
+ fieldpath : data.odh-training-operator-controller-image
29
+
30
+ # Labels to add to all resources and selectors.
31
+ commonLabels :
32
+ app.kubernetes.io/name : training-operator
33
+ app.kubernetes.io/component : controller
34
+
35
+ resources :
36
+ - ../base
37
+ - kubeflow-training-roles.yaml
38
+ - monitor.yaml
39
+
40
+ patches :
41
+ # Mount the controller config file for loading manager configurations
42
+ # through a ComponentConfig type
43
+ - path : manager_config_patch.yaml
44
+ - path : manager_metrics_patch.yaml
45
+ - path : manager_delete_metrics_service_patch.yaml
Original file line number Diff line number Diff line change
1
+ apiVersion : apps/v1
2
+ kind : Deployment
3
+ metadata :
4
+ name : training-operator
5
+ spec :
6
+ template :
7
+ spec :
8
+ containers :
9
+ - name : training-operator
10
+ image : $(image)
11
+ args :
12
+ - " --zap-log-level=2"
Original file line number Diff line number Diff line change
1
+ # Delete the service created in base
2
+ $patch : delete
3
+ apiVersion : v1
4
+ kind : Service
5
+ metadata :
6
+ name : training-operator
Original file line number Diff line number Diff line change
1
+ apiVersion : apps/v1
2
+ kind : Deployment
3
+ metadata :
4
+ name : training-operator
5
+ spec :
6
+ template :
7
+ spec :
8
+ containers :
9
+ - name : training-operator
10
+ ports :
11
+ - containerPort : 8080
12
+ name : metrics
Original file line number Diff line number Diff line change
1
+ # Prometheus Pod Monitor (Metrics)
2
+ apiVersion : monitoring.coreos.com/v1
3
+ kind : PodMonitor
4
+ metadata :
5
+ name : training-operator-metrics-monitor
6
+ spec :
7
+ selector :
8
+ matchLabels :
9
+ app.kubernetes.io/name : training-operator
10
+ app.kubernetes.io/component : controller
11
+ podMetricsEndpoints :
12
+ - port : metrics
Original file line number Diff line number Diff line change
1
+ odh-training-operator-controller-image = docker.io/kubeflow/training-operator:v1-855e096
Original file line number Diff line number Diff line change
1
+ varReference :
2
+ - path : spec/template/spec/containers[]/image
3
+ kind : Deployment
You can’t perform that action at this time.
0 commit comments