Skip to content

Commit 849f0f5

Browse files
authored
Merge pull request #110 from opendatahub-io/sync-release-with-main
Sync release with main for RHOAI 2.12
2 parents 27d7746 + b145c20 commit 849f0f5

File tree

549 files changed

+42884
-11710
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

549 files changed

+42884
-11710
lines changed

.buildkite/download-images.sh

Lines changed: 0 additions & 18 deletions
This file was deleted.
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2
2+
model_name: "deepseek-ai/DeepSeek-V2-Lite-Chat"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.671
8+
- name: "exact_match,flexible-extract"
9+
value: 0.664
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5
2+
model_name: "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.905
8+
- name: "exact_match,flexible-extract"
9+
value: 0.905
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-70B-Instruct -b 32 -l 250 -f 5
2+
model_name: "meta-llama/Meta-Llama-3-70B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.892
8+
- name: "exact_match,flexible-extract"
9+
value: 0.892
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.752
8+
- name: "exact_match,flexible-extract"
9+
value: 0.754
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.753
8+
- name: "exact_match,flexible-extract"
9+
value: 0.753
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.755
8+
- name: "exact_match,flexible-extract"
9+
value: 0.755
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
2+
model_name: "neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.753
8+
- name: "exact_match,flexible-extract"
9+
value: 0.753
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test -b "auto" -l 250 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.728
8+
- name: "exact_match,flexible-extract"
9+
value: 0.728
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test -b auto -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.758
8+
- name: "exact_match,flexible-extract"
9+
value: 0.759
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-8B-Instruct -b 32 -l 250 -f 5 -t 1
2+
model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.756
8+
- name: "exact_match,flexible-extract"
9+
value: 0.752
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic -b "auto" -l 250 -f 5 -t 8
2+
model_name: "neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.86
8+
- name: "exact_match,flexible-extract"
9+
value: 0.86
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8 -b "auto" -l 250 -f 5 -t 4
2+
model_name: "neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.624
8+
- name: "exact_match,flexible-extract"
9+
value: 0.624
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1 -b 32 -l 250 -f 5 -t 4
2+
model_name: "mistralai/Mixtral-8x7B-Instruct-v0.1"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.616
8+
- name: "exact_match,flexible-extract"
9+
value: 0.632
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
2+
model_name: "neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.593
8+
- name: "exact_match,flexible-extract"
9+
value: 0.588
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-1.5B-Instruct-W8A16-Channelwise -b "auto" -l 1000 -f 5 -t 1
2+
model_name: "nm-testing/Qwen2-1.5B-Instruct-W8A16-Channelwise"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.595
8+
- name: "exact_match,flexible-extract"
9+
value: 0.582
10+
limit: 1000
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m Qwen/Qwen2-57B-A14B-Instruct -b "auto" -l 250 -f 5 -t 4
2+
model_name: "Qwen/Qwen2-57B-A14B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.792
8+
- name: "exact_match,flexible-extract"
9+
value: 0.824
10+
limit: 250
11+
num_fewshot: 5
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
2+
Meta-Llama-3-70B-Instruct.yaml
3+
Mixtral-8x7B-Instruct-v0.1.yaml
4+
Qwen2-57B-A14-Instruct.yaml
5+
DeepSeek-V2-Lite-Chat.yaml
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Meta-Llama-3-8B-Instruct.yaml
2+
Meta-Llama-3-8B-Instruct-FP8.yaml
3+
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
4+
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
5+
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
6+
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
7+
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#!/bin/bash
2+
# We can use this script to compute baseline accuracy on GSM for transformers.
3+
#
4+
# Make sure you have lm-eval-harness installed:
5+
# pip install git+https://github.com/EleutherAI/lm-evaluation-harness.git@9516087b81a61d0e220b22cc1b75be76de23bc10
6+
7+
usage() {
8+
echo``
9+
echo "Runs lm eval harness on GSM8k using huggingface transformers."
10+
echo "This pathway is intended to be used to create baselines for "
11+
echo "our automated nm-test-accuracy workflow"
12+
echo
13+
echo "usage: ${0} <options>"
14+
echo
15+
echo " -m - huggingface stub or local directory of the model"
16+
echo " -b - batch size to run the evaluation at"
17+
echo " -l - limit number of samples to run"
18+
echo " -f - number of fewshot samples to use"
19+
echo
20+
}
21+
22+
while getopts "m:b:l:f:" OPT; do
23+
case ${OPT} in
24+
m )
25+
MODEL="$OPTARG"
26+
;;
27+
b )
28+
BATCH_SIZE="$OPTARG"
29+
;;
30+
l )
31+
LIMIT="$OPTARG"
32+
;;
33+
f )
34+
FEWSHOT="$OPTARG"
35+
;;
36+
\? )
37+
usage
38+
exit 1
39+
;;
40+
esac
41+
done
42+
43+
lm_eval --model hf \
44+
--model_args pretrained=$MODEL,parallelize=True \
45+
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
46+
--batch_size $BATCH_SIZE
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#!/bin/bash
2+
# We can use this script to compute baseline accuracy on GSM for vllm.
3+
# We use this for fp8, which HF does not support.
4+
#
5+
# Make sure you have lm-eval-harness installed:
6+
# pip install lm-eval==0.4.3
7+
8+
usage() {
9+
echo``
10+
echo "Runs lm eval harness on GSM8k using huggingface transformers."
11+
echo "This pathway is intended to be used to create baselines for "
12+
echo "our automated nm-test-accuracy workflow"
13+
echo
14+
echo "usage: ${0} <options>"
15+
echo
16+
echo " -m - huggingface stub or local directory of the model"
17+
echo " -b - batch size to run the evaluation at"
18+
echo " -l - limit number of samples to run"
19+
echo " -f - number of fewshot samples to use"
20+
echo " -t - tensor parallel size to run at"
21+
echo
22+
}
23+
24+
while getopts "m:b:l:f:t:" OPT; do
25+
case ${OPT} in
26+
m )
27+
MODEL="$OPTARG"
28+
;;
29+
b )
30+
BATCH_SIZE="$OPTARG"
31+
;;
32+
l )
33+
LIMIT="$OPTARG"
34+
;;
35+
f )
36+
FEWSHOT="$OPTARG"
37+
;;
38+
t )
39+
TP_SIZE="$OPTARG"
40+
;;
41+
\? )
42+
usage
43+
exit 1
44+
;;
45+
esac
46+
done
47+
48+
lm_eval --model vllm \
49+
--model_args pretrained=$MODEL,tensor_parallel_size=$TP_SIZE,distributed_executor_backend="ray",trust_remote_code=true,max_model_len=4096 \
50+
--tasks gsm8k --num_fewshot $FEWSHOT --limit $LIMIT \
51+
--batch_size $BATCH_SIZE
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/bash
2+
3+
usage() {
4+
echo``
5+
echo "Runs lm eval harness on GSM8k using vllm and compares to "
6+
echo "precomputed baseline (measured by HF transformers.)"
7+
echo
8+
echo "usage: ${0} <options>"
9+
echo
10+
echo " -c - path to the test data config (e.g. configs/small-models.txt)"
11+
echo " -t - tensor parallel size"
12+
echo
13+
}
14+
15+
SUCCESS=0
16+
17+
while getopts "c:t:" OPT; do
18+
case ${OPT} in
19+
c )
20+
CONFIG="$OPTARG"
21+
;;
22+
t )
23+
TP_SIZE="$OPTARG"
24+
;;
25+
\? )
26+
usage
27+
exit 1
28+
;;
29+
esac
30+
done
31+
32+
# Parse list of configs.
33+
IFS=$'\n' read -d '' -r -a MODEL_CONFIGS < $CONFIG
34+
35+
for MODEL_CONFIG in "${MODEL_CONFIGS[@]}"
36+
do
37+
LOCAL_SUCCESS=0
38+
39+
echo "=== RUNNING MODEL: $MODEL_CONFIG WITH TP SIZE: $TP_SIZE==="
40+
41+
export LM_EVAL_TEST_DATA_FILE=$PWD/configs/${MODEL_CONFIG}
42+
export LM_EVAL_TP_SIZE=$TP_SIZE
43+
pytest -s test_lm_eval_correctness.py || LOCAL_SUCCESS=$?
44+
45+
if [[ $LOCAL_SUCCESS == 0 ]]; then
46+
echo "=== PASSED MODEL: ${MODEL_CONFIG} ==="
47+
else
48+
echo "=== FAILED MODEL: ${MODEL_CONFIG} ==="
49+
fi
50+
51+
SUCCESS=$((SUCCESS + LOCAL_SUCCESS))
52+
53+
done
54+
55+
if [ "${SUCCESS}" -eq "0" ]; then
56+
exit 0
57+
else
58+
exit 1
59+
fi

0 commit comments

Comments
 (0)