File tree Expand file tree Collapse file tree 3 files changed +8
-4
lines changed Expand file tree Collapse file tree 3 files changed +8
-4
lines changed Original file line number Diff line number Diff line change 1
1
{
2
+ "accelerate_launch_args" : {
3
+ "num_processes" : 2 ,
4
+ "cpu" : true
5
+ },
2
6
"model_name_or_path" : " bigscience/bloom-560m" ,
3
7
"training_data_path" : " /etc/config/twitter_complaints_small.json" ,
4
8
"output_dir" : " /tmp/out" ,
Original file line number Diff line number Diff line change 1
- //go:build ignore
1
+ //go:build tuning_e2e
2
2
3
3
/*
4
4
Copyright 2023.
@@ -94,9 +94,9 @@ func TestPytorchjobWithSFTtrainer(t *testing.T) {
94
94
Containers : []corev1.Container {
95
95
{
96
96
Name : "pytorch" ,
97
- Image : "quay.io/tedchang/sft-trainer:dev " ,
97
+ Image : "quay.io/modh/fms-hf-tuning:5d8789723ec58ac1bc9c2df704395f162fed974a " ,
98
98
ImagePullPolicy : corev1 .PullIfNotPresent ,
99
- Command : []string {"python" , "/app/launch_training .py" },
99
+ Command : []string {"python" , "/app/accelerate_launch .py" },
100
100
Env : []corev1.EnvVar {
101
101
{
102
102
Name : "SFT_TRAINER_CONFIG_JSON_PATH" ,
Original file line number Diff line number Diff line change @@ -19,4 +19,4 @@ docker build -t ${KFTO_IMG} -f ${BASE_DIR}/build/images/training-operator/Docker
19
19
20
20
echo " Load training operator image into cluster"
21
21
kind load --name training-operator-cluster docker-image training-operator:dev
22
- KFTO_IMG =training-operator:dev make deploy
22
+ IMG =training-operator:dev make deploy
You can’t perform that action at this time.
0 commit comments