Skip to content

Commit 3875343

Browse files
authored
Bring back TorchBench AO dashboard (#6101)
After pytorch/benchmark#2561, TorchBench AO benchmark data is now available to query and we can finally use that dashboard again. If this proves useful, the next steps would be: 1. pytorch/benchmark#2561 only brings back one example model for each suite (TorchBench, HF, TIMM). We need to add more. 2. TorchBench AO dashboard shares the code with TorchInductor dashboard. While the former has been migrated to the new benchmark database, the latter hasn't. I will need to do that and clean this up in a later PR. 3. Looking at the results on the dashboard, it seems that `autoquant` works, but not `int8dynamic` and `int8weightonly`. I'm not sure if they are still relevant, but if they are, ao team should know how to fix them (cc @jerryzh168). The run on TorchBench is at https://github.com/pytorch/benchmark/actions/workflows/torchao.yml ### Testing The two metrics speedup and abs execution time are now showing up https://torchci-git-fork-huydhn-ch-migrate-torchao-queries-fbopensource.vercel.app/benchmark/torchao?dashboard=torchao&startTime=Sun%2C%2015%20Dec%202024%2011%3A06%3A45%20GMT&stopTime=Sun%2C%2022%20Dec%202024%2011%3A06%3A45%20GMT&granularity=hour&mode=inference&dtype=autoquant&deviceName=cuda%20(a100)&lBranch=main&lCommit=07e6ef43fca2e95bc6cf59f97ba6251e618ef0e3&rBranch=main&rCommit=c03fa7c6c1bd03242a9de1fddb77a9c778106afd
1 parent 1f11622 commit 3875343

File tree

13 files changed

+330
-319
lines changed

13 files changed

+330
-319
lines changed
Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
{
2-
"branches": "String",
3-
"commits": "String",
4-
"compilers": "String",
2+
"branches": "Array(String)",
3+
"commits": "Array(String)",
54
"device": "String",
6-
"dtypes": "String",
7-
"getJobId": "Bool",
5+
"dtypes": "Array(String)",
86
"granularity": "String",
97
"mode": "String",
8+
"repo": "String",
109
"startTime": "DateTime64(3)",
1110
"stopTime": "DateTime64(3)",
12-
"suites": "String",
13-
"timezone": "String",
11+
"suites": "Array(String)",
1412
"workflowId": "Int64"
15-
}
13+
}
Lines changed: 72 additions & 156 deletions
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,76 @@
1-
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
2-
WITH performance_results AS (
3-
SELECT
4-
name,
5-
IF(speedup = 'infra_error', NULL, speedup) AS speedup, -- Handle the recent burst of infra error
6-
REPLACE(
7-
filename,
8-
CONCAT(
9-
'_', : dtypes, '_', : mode, '_', : device,
10-
'_performance'
11-
)
12-
) AS filename,
13-
compilation_latency,
14-
compression_ratio,
15-
abs_latency,
16-
mfu,
17-
memory_bandwidth,
18-
dynamo_peak_mem,
19-
eager_peak_mem,
20-
workflow_id,
21-
CAST(job_id AS INT) AS job_id,
22-
FORMAT_ISO8601(
23-
DATE_TRUNC(: granularity, _event_time)
24-
) AS granularity_bucket,
25-
head_sha,
26-
head_branch,
27-
FROM
28-
inductor.torchao_perf_stats
29-
WHERE
30-
filename LIKE '%_performance'
31-
AND filename LIKE CONCAT(
32-
'%_', : dtypes, '_', : mode, '_', : device,
33-
'_%'
34-
)
35-
AND _event_time >= PARSE_DATETIME_ISO8601(:startTime)
36-
AND _event_time < PARSE_DATETIME_ISO8601(:stopTime)
37-
AND (workflow_id = :workflowId OR :workflowId = 0)
38-
),
39-
accuracy_results AS (
40-
SELECT
41-
name,
42-
accuracy,
43-
REPLACE(
44-
filename,
45-
CONCAT(
46-
'_', : dtypes, '_', : mode, '_', : device,
47-
'_accuracy'
48-
)
49-
) AS filename,
50-
workflow_id,
51-
CAST(job_id AS INT) AS job_id,
52-
FROM
53-
inductor.torchao_perf_stats
54-
WHERE
55-
filename LIKE '%_accuracy'
56-
AND filename LIKE CONCAT(
57-
'%_', : dtypes, '_', : mode, '_', : device,
58-
'_%'
59-
)
60-
AND _event_time >= PARSE_DATETIME_ISO8601(:startTime)
61-
AND _event_time < PARSE_DATETIME_ISO8601(:stopTime)
62-
AND (workflow_id = :workflowId OR :workflowId = 0)
63-
AND accuracy != 'model_fail_to_load'
64-
AND accuracy != 'eager_fail_to_run'
65-
),
66-
results AS (
67-
SELECT
68-
performance_results.granularity_bucket AS granularity_bucket,
69-
performance_results.workflow_id AS workflow_id,
70-
performance_results.job_id AS job_id,
71-
performance_results.head_branch AS head_branch,
72-
performance_results.head_sha AS head_sha,
73-
CASE
74-
WHEN performance_results.filename LIKE '%_torchbench' THEN 'torchbench'
75-
WHEN performance_results.filename LIKE '%_timm_models' THEN 'timm_models'
76-
WHEN performance_results.filename LIKE '%_huggingface' THEN 'huggingface'
77-
ELSE NULL
78-
END AS suite,
79-
CASE
80-
WHEN performance_results.filename LIKE '%_torchbench' THEN REPLACE(
81-
performance_results.filename, '_torchbench'
82-
)
83-
WHEN performance_results.filename LIKE '%_timm_models' THEN REPLACE(
84-
performance_results.filename, '_timm_models'
85-
)
86-
WHEN performance_results.filename LIKE '%_huggingface' THEN REPLACE(
87-
performance_results.filename, '_huggingface'
88-
)
89-
ELSE NULL
90-
END AS compiler,
91-
performance_results.name,
92-
IF(TRY_CAST(speedup AS FLOAT) IS NOT NULL,
93-
CAST(speedup AS FLOAT),
94-
0.0
95-
) AS speedup,
96-
accuracy_results.accuracy AS accuracy,
97-
IF(TRY_CAST(compilation_latency AS FLOAT) IS NOT NULL,
98-
CAST(compilation_latency AS FLOAT),
99-
0.0
100-
) AS compilation_latency,
101-
IF(TRY_CAST(compression_ratio AS FLOAT) IS NOT NULL,
102-
CAST(compression_ratio AS FLOAT),
103-
0.0
104-
) AS compression_ratio,
105-
IF(TRY_CAST(abs_latency AS FLOAT) IS NOT NULL,
106-
CAST(abs_latency AS FLOAT),
107-
0.0
108-
) AS abs_latency,
109-
IF(TRY_CAST(mfu AS FLOAT) IS NOT NULL,
110-
CAST(mfu AS FLOAT),
111-
0.0
112-
) AS mfu,
113-
IF(TRY_CAST(memory_bandwidth AS FLOAT) IS NOT NULL,
114-
CAST(memory_bandwidth AS FLOAT),
115-
0.0
116-
) AS memory_bandwidth,
117-
IF(TRY_CAST(dynamo_peak_mem AS FLOAT) IS NOT NULL,
118-
CAST(dynamo_peak_mem AS FLOAT),
119-
0.0
120-
) AS dynamo_peak_mem,
121-
IF(TRY_CAST(eager_peak_mem AS FLOAT) IS NOT NULL,
122-
CAST(eager_peak_mem AS FLOAT),
123-
0.0
124-
) AS eager_peak_mem,
125-
FROM
126-
performance_results
127-
LEFT JOIN accuracy_results ON performance_results.name = accuracy_results.name
128-
AND performance_results.filename = accuracy_results.filename
129-
AND performance_results.workflow_id = accuracy_results.workflow_id
1+
-- This powers HUD TorchAO benchmarks dashboards
2+
WITH benchmarks AS (
3+
SELECT
4+
o.model.origins [ 1 ] AS suite,
5+
o.model.name AS model,
6+
tupleElement(o.benchmark, 'extra_info') [ 'quantization' ] AS dtype,
7+
o.metric.name AS metric,
8+
floor(arrayAvg(o.metric.benchmark_values), 2) AS value,
9+
tupleElement(o.metric, 'extra_info') AS extra_info,
10+
replaceOne(o.head_branch, 'refs/heads/', '') AS head_branch,
11+
o.head_sha AS head_sha,
12+
o.workflow_id AS workflow_id,
13+
o.job_id AS job_id,
14+
DATE_TRUNC(
15+
{granularity: String },
16+
fromUnixTimestamp(o.timestamp)
17+
) AS granularity_bucket
18+
FROM
19+
benchmark.oss_ci_benchmark_v3 o
20+
WHERE
21+
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
22+
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
23+
AND o.repo = {repo: String }
24+
AND (
25+
has({commits: Array(String) }, o.head_sha)
26+
OR empty({commits: Array(String) })
27+
)
28+
AND (
29+
has({suites: Array(String) }, suite)
30+
OR empty({suites: Array(String) })
31+
)
32+
AND (
33+
has({dtypes: Array(String) }, dtype)
34+
OR empty({dtypes: Array(String) })
35+
)
36+
AND tupleElement(o.benchmark, 'mode') = {mode: String }
37+
AND tupleElement(o.benchmark, 'extra_info') [ 'device' ] = {device: String }
38+
AND (
39+
workflow_id = {workflowId: Int64}
40+
OR {workflowId: Int64} = 0
41+
)
42+
AND (
43+
o.metric.name in [ 'accuracy',
44+
'speedup',
45+
'compilation_latency',
46+
'compression_ratio',
47+
'abs_latency',
48+
'mfu',
49+
'memory_bandwidth',
50+
'dynamo_peak_mem',
51+
'eager_peak_mem' ]
52+
)
13053
)
131-
SELECT DISTINCT
132-
results.workflow_id,
133-
-- As the JSON response is pretty big, only return the field if it's needed
134-
IF(:getJobId, results.job_id, NULL) AS job_id,
135-
results.suite,
136-
results.compiler,
137-
results.name,
138-
results.speedup,
139-
results.accuracy,
140-
results.compilation_latency,
141-
results.compression_ratio,
142-
results.abs_latency,
143-
results.mfu,
144-
results.memory_bandwidth,
145-
results.dynamo_peak_mem,
146-
results.eager_peak_mem,
147-
results.granularity_bucket,
54+
SELECT
55+
suite,
56+
model,
57+
dtype,
58+
metric,
59+
value,
60+
extra_info,
61+
workflow_id,
62+
job_id,
63+
granularity_bucket
14864
FROM
149-
results
65+
benchmarks
15066
WHERE
151-
ARRAY_CONTAINS(SPLIT(:suites, ','), LOWER(results.suite))
152-
AND (ARRAY_CONTAINS(SPLIT(:compilers, ','), LOWER(results.compiler)) OR :compilers = '')
153-
AND (ARRAY_CONTAINS(SPLIT(:branches, ','), results.head_branch) OR :branches = '')
154-
AND (ARRAY_CONTAINS(SPLIT(:commits, ','), results.head_sha) OR :commits = '')
67+
(
68+
has({branches: Array(String) }, head_branch)
69+
OR empty({branches: Array(String) })
70+
)
15571
ORDER BY
156-
granularity_bucket DESC,
157-
workflow_id DESC,
158-
suite ASC,
159-
compiler ASC,
160-
name ASC
72+
granularity_bucket DESC,
73+
workflow_id DESC,
74+
suite ASC,
75+
dtype ASC,
76+
model ASC
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
{
2+
"dtypes": "Array(String)",
23
"device": "String",
3-
"dtypes": "String",
4-
"granularity": "String",
54
"mode": "String",
5+
"repo": "String",
66
"startTime": "DateTime64(3)",
77
"stopTime": "DateTime64(3)"
8-
}
8+
}
Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,35 @@
1-
-- !!! Query is not converted to CH syntax yet. Delete this line when it gets converted
1+
-- This query is used to get the list of branches and commits used by different
2+
-- OSS CI benchmark experiments. This powers HUD TorchAO benchmarks dashboards
3+
WITH benchmarks AS (
4+
SELECT
5+
o.head_branch AS head_branch,
6+
o.head_sha AS head_sha,
7+
o.workflow_id AS id,
8+
toStartOfDay(fromUnixTimestamp(o.timestamp)) AS event_time
9+
FROM
10+
benchmark.oss_ci_benchmark_v3 o
11+
WHERE
12+
o.timestamp >= toUnixTimestamp({startTime: DateTime64(3) })
13+
AND o.timestamp < toUnixTimestamp({stopTime: DateTime64(3) })
14+
AND o.repo = {repo: String }
15+
AND tupleElement(o.benchmark, 'extra_info') [ 'performance' ] = 'true'
16+
AND (
17+
has(
18+
{dtypes: Array(String) },
19+
tupleElement(o.benchmark, 'extra_info') [ 'quantization' ]
20+
)
21+
OR empty({dtypes: Array(String) })
22+
)
23+
AND tupleElement(o.benchmark, 'mode') = {mode: String }
24+
AND tupleElement(o.benchmark, 'extra_info') [ 'device' ] = {device: String }
25+
)
226
SELECT
3-
DISTINCT head_branch,
4-
head_sha,
5-
FORMAT_ISO8601(
6-
DATE_TRUNC(
7-
: granularity, _event_time
8-
)
9-
) AS event_time,
27+
DISTINCT replaceOne(head_branch, 'refs/heads/', '') AS head_branch,
28+
head_sha,
29+
id,
30+
event_time
1031
FROM
11-
inductor.torchao_perf_stats
12-
WHERE
13-
torchao_perf_stats._event_time >= PARSE_DATETIME_ISO8601(: startTime)
14-
AND torchao_perf_stats._event_time < PARSE_DATETIME_ISO8601(: stopTime)
15-
AND torchao_perf_stats.filename LIKE '%_performance'
16-
AND torchao_perf_stats.filename LIKE CONCAT(
17-
'%_', : dtypes, '_', : mode, '_', : device,
18-
'_%'
19-
)
32+
benchmarks
2033
ORDER BY
21-
head_branch,
22-
event_time DESC
34+
head_branch,
35+
event_time DESC

torchci/components/benchmark/compilers/CompilerGraphGroup.tsx

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,34 @@ const GraphCardGroup = styled(Card)({
1212
/** Mui Styles */
1313

1414
export default function CompilerGraphGroup({
15+
dashboard,
1516
suiteConfig,
1617
queryParams,
1718
granularity,
1819
lBranchAndCommit,
1920
rBranchAndCommit,
2021
}: {
21-
lBranchAndCommit: BranchAndCommit;
22-
rBranchAndCommit: BranchAndCommit;
22+
dashboard: string;
23+
suiteConfig: SuiteConfig;
2324
queryParams: { [key: string]: any };
2425
granularity: Granularity;
25-
suiteConfig: SuiteConfig;
26+
lBranchAndCommit: BranchAndCommit;
27+
rBranchAndCommit: BranchAndCommit;
2628
}) {
29+
// TODO (huydhn): Remove this once TorchInductor dashboard is migrated to the
30+
// new database schema
31+
const queryName =
32+
dashboard === "torchao"
33+
? "torchao_query"
34+
: "compilers_benchmark_performance";
35+
2736
return (
2837
<>
2938
<GraphCardGroup>
3039
<CardHeader title={`Suite: ${suiteConfig.name}`} />
3140
<CardContent>
3241
<GraphPanel
33-
queryName={"compilers_benchmark_performance"}
42+
queryName={queryName}
3443
queryParams={queryParams}
3544
granularity={granularity}
3645
suite={suiteConfig.id}

torchci/components/benchmark/compilers/ModelGraphPanel.tsx

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import {
1010
TimeSeriesPanelWithData,
1111
} from "components/metrics/panels/TimeSeriesPanel";
1212
import dayjs from "dayjs";
13+
import { convertToCompilerPerformanceData } from "lib/benchmark/aoUtils";
1314
import { augmentData } from "lib/benchmark/compilerUtils";
1415
import { fetcher } from "lib/GeneralUtils";
1516
import { CompilerPerformanceData } from "lib/types";
@@ -49,6 +50,12 @@ export function GraphPanel({
4950
let { data, error } = useSWR(url, fetcher, {
5051
refreshInterval: 60 * 60 * 1000, // refresh every hour
5152
});
53+
// TODO (huydhn): Remove this once TorchInductor dashboard is migrated to the
54+
// new database schema
55+
data =
56+
queryName === "torchao_query"
57+
? convertToCompilerPerformanceData(data)
58+
: data;
5259
data = augmentData(data);
5360

5461
if (data === undefined || data.length === 0) {

0 commit comments

Comments
 (0)