Update how peak memory is measured (#150534)

desertfire · facebook-github-bot · commit 942979efdcd9 · 2025-04-03T10:53:16.000-07:00
Summary: In the dashboard measurement script, AOTI needs to run Eager first to register the output pytree, so the peak memory compression ratio on the dashboard is always close to 1. Update AOTI run to use an extra warmup run, so the peak memory compression ratio measures the result at the run time instead of the compile time. X-link: pytorch/pytorch#150534 Approved by: https://github.com/yushangdi Reviewed By: clee2000 Differential Revision: D72395560 fbshipit-source-id: f37e493d851ea665f88972effbd225e8250a022f
diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
@@ -3735,6 +3735,10 @@ def run(runner, args, original_dir=None):
             # AOTInductor doesn't support control flow yet
             runner.skip_models.update(runner.skip_models_due_to_control_flow)
             runner.skip_models.update(runner.skip_models_due_to_export_not_supported)
+
+            # For AOTI, we only measure the memory compression ratio at the run time
+            # instead of the compile time, so use a warmup run to trigger AOTI compilation.
+            args.use_warm_peak_memory = True
         elif args.backend == "torchao":
             assert "cuda" in args.devices, "Quantization requires CUDA device."
             assert args.bfloat16, "Quantization requires dtype bfloat16."