@@ -15,44 +15,41 @@ jobs:
15
15
timeout-minutes : 1440 # 24 hours
16
16
environment : docker-s3-upload
17
17
env :
18
- BASE_CONDA_ENV : " torchbench"
19
18
CONDA_ENV : " optim"
20
19
PLATFORM_NAME : " gcp_a100"
21
20
TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN : ${{ secrets.TORCHBENCH_USERBENCHMARK_SCRIBE_GRAPHQL_ACCESS_TOKEN }}
22
21
AWS_ACCESS_KEY_ID : ${{ secrets.AWS_ACCESS_KEY_ID }}
23
22
AWS_SECRET_ACCESS_KEY : ${{ secrets.AWS_SECRET_ACCESS_KEY }}
24
- SETUP_SCRIPT : " /workspace/setup_instance.sh"
25
23
HUGGING_FACE_HUB_TOKEN : ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
26
24
steps :
27
25
- name : Checkout TorchBench
28
26
uses : actions/checkout@v3
29
27
with :
30
28
path : benchmark
31
- - name : Tune Nvidia GPU
29
+ - name : Install Conda
32
30
run : |
33
- sudo nvidia-smi -pm 1
34
- sudo nvidia-smi -ac 1215,1410
35
- nvidia-smi
36
- - name : Clone and setup Conda env
37
- run : |
38
- CONDA_ENV=${BASE_CONDA_ENV} . "${SETUP_SCRIPT}"
39
- conda create --name "${CONDA_ENV}" --clone "${BASE_CONDA_ENV}"
31
+ set -x
32
+ pushd benchmark
33
+ bash ./.ci/torchbench/install-conda.sh
40
34
- name : Install TorchBench
41
35
run : |
42
36
set -x
43
- . "${SETUP_SCRIPT}"
37
+ . "${HOME}"/miniconda3/etc/profile.d/conda.sh
38
+ conda activate "${CONDA_ENV}"
44
39
pushd benchmark
45
40
# only install the subset of models currently running.
46
41
python install.py BERT_pytorch DALLE2_pytorch hf_GPT2_large hf_T5_large resnet50 timm_vision_transformer_large yolov3
47
42
- name : Print torch.version.git_version
48
43
run : |
49
44
set -x
50
- . "${SETUP_SCRIPT}"
45
+ . "${HOME}"/miniconda3/etc/profile.d/conda.sh
46
+ conda activate "${CONDA_ENV}"
51
47
python -c "import torch; print(torch.version.git_version)"
52
48
- name : Run optim user benchmark
53
49
run : |
54
50
set -x
55
- . "${SETUP_SCRIPT}"
51
+ . "${HOME}"/miniconda3/etc/profile.d/conda.sh
52
+ conda activate "${CONDA_ENV}"
56
53
# remove old results
57
54
if [ -d benchmark-output ]; then rm -Rf benchmark-output; fi
58
55
pushd benchmark
65
62
- name : Detect potential regressions
66
63
continue-on-error : true
67
64
run : |
68
- . "${SETUP_SCRIPT}"
65
+ set -x
66
+ . "${HOME}"/miniconda3/etc/profile.d/conda.sh
67
+ conda activate "${CONDA_ENV}"
69
68
pushd benchmark
70
69
RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
71
70
# TODO: the following assumes only one metrics-*.json is found. It will keep
86
85
torchbench-perf-report
87
86
- name : Upload result jsons to Scribe and S3
88
87
run : |
89
- . "${SETUP_SCRIPT}"
88
+ . "${HOME}"/miniconda3/etc/profile.d/conda.sh
89
+ conda activate "${CONDA_ENV}"
90
90
pushd benchmark
91
91
RESULTS=($(find ${PWD}/../benchmark-output -name "metrics-*.json" -maxdepth 2 | sort -r))
92
92
echo "Uploading result jsons: ${RESULTS}"
@@ -102,13 +102,13 @@ jobs:
102
102
- name : Finally, error if errors.txt exists
103
103
if : always()
104
104
run : |
105
+ set -x
105
106
# Do not error earlier as we want all artifacts and regressions to be reported first
106
107
# TODO: potentially move errors.txt to benchmark-output so it gets uploaded to S3
107
108
pushd benchmark
108
109
if [ -e errors.txt ]; then cat errors.txt && exit 1; fi
109
110
- name : Remove conda environment
110
111
if : always()
111
112
run : |
112
- . "${SETUP_SCRIPT}"
113
- conda deactivate && conda deactivate
113
+ . ${HOME}/miniconda3/etc/profile.d/conda.sh
114
114
conda remove -n "${CONDA_ENV}" --all
0 commit comments