Don't upload compiler benchmark debug info to the benchmark database (#153769)

huydhn · facebook-github-bot · commit 6693f5845f21 · 2025-05-23T09:11:48.000-07:00
Summary: During our debug session, wdvr and I found out that the benchmark database is growing much faster than we expect. After taking a closer look, the majority of them coming from TorchInductor benchmark and the top 3 are all debug information not used by any dashboard atm. In the period of 7 days, there are close to 6 millions records ([query](https://paste.sh/GUVCBa0v#UzszFCZaWQxh7oSVsZtfZdVE)) ``` Benchmark,Metric,Count "TorchInductor","user_stack","1926014" "TorchInductor","reason","1926014" "TorchInductor","model","1926014" ``` Let's skip uploading them to avoid bloating the database. X-link: pytorch/pytorch#153769 Approved by: https://github.com/malfet Reviewed By: yangw-dev Differential Revision: D75295298 fbshipit-source-id: 621e582c8908bf58fe79fc5069ea814494d8f78c
diff --git a/userbenchmark/dynamo/dynamobench/common.py b/userbenchmark/dynamo/dynamobench/common.py
@@ -343,7 +343,7 @@ def load_model_from_path(path_and_class_str):
     return model, inputs
 
 
-def write_outputs(filename, headers, row):
+def write_outputs(filename, headers, row, upload_to_benchmark_db: bool = True):
     """
     Write both CSV and JSON outputs using the original CSV output interface
     """
@@ -352,7 +352,8 @@ def write_outputs(filename, headers, row):
         return
 
     output_csv(filename, headers, row)
-    output_json(filename, headers, row)
+    if upload_to_benchmark_db:
+        output_json(filename, headers, row)
 
 
 def output_csv(filename, headers, row):
@@ -2847,10 +2848,15 @@ def add_double_quotes(x):
                 user_stack = add_double_quotes(
                     ", ".join([str(x) for x in graph_break.user_stack])
                 )
+
+                # NB: Don't upload them to the benchmark database as they are debugging
+                # infomation. There are also around a million records a day which is
+                # wasteful to store
                 write_outputs(
                     filename,
                     ["model", "reason", "user_stack"],
                     [current_name, reason, user_stack],
+                    False,
                 )
 
         if self.args.stats: