32
32
from gpt_engineer .applications .cli .main import load_env_if_needed
33
33
from gpt_engineer .benchmark .bench_config import BenchConfig
34
34
from gpt_engineer .benchmark .benchmarks .load import get_benchmark
35
- from gpt_engineer .benchmark .run import print_results , run
35
+ from gpt_engineer .benchmark .run import export_json_results , print_results , run
36
36
37
37
app = typer .Typer () # creates a CLI app
38
38
@@ -72,8 +72,12 @@ def main(
72
72
),
73
73
],
74
74
bench_config : Annotated [
75
- Optional [ str ] , typer .Argument (help = "optional task name in benchmark" )
75
+ str , typer .Argument (help = "optional task name in benchmark" )
76
76
] = os .path .join (os .path .dirname (__file__ ), "default_bench_config.toml" ),
77
+ json_output : Annotated [
78
+ Optional [str ],
79
+ typer .Option (help = "print results for each task" , show_default = False ),
80
+ ] = None ,
77
81
verbose : Annotated [
78
82
bool , typer .Option (help = "print results for each task" , show_default = False )
79
83
] = False ,
@@ -85,13 +89,12 @@ def main(
85
89
----------
86
90
path_to_agent : str
87
91
The file path to the Python module that contains a function called 'default_config_agent'.
88
- benchmarks : str
89
- A comma-separated string of benchmark names to run.
90
- bench_config : Optional[str], default=default_bench_config.toml
92
+ bench_config : str, default=default_bench_config.toml
91
93
Configuration file for choosing which benchmark problems to run. See default config for more details.
94
+ json_output: Optional[str], default=None
95
+ Pass a path to a json file to have results written to file.
92
96
verbose : bool, default=False
93
97
A flag to indicate whether to print results for each task.
94
-
95
98
Returns
96
99
-------
97
100
None
@@ -101,6 +104,7 @@ def main(
101
104
config = BenchConfig .from_toml (bench_config )
102
105
print ("using config file: " + bench_config )
103
106
benchmarks = list ()
107
+ benchmark_results = dict ()
104
108
for specific_config_name in vars (config ):
105
109
specific_config = getattr (config , specific_config_name )
106
110
if hasattr (specific_config , "active" ):
@@ -124,6 +128,11 @@ def main(
124
128
)
125
129
print_results (results )
126
130
print ()
131
+ benchmark_results [benchmark_name ] = {
132
+ "detailed" : [result .to_dict () for result in results ]
133
+ }
134
+ if json_output is not None :
135
+ export_json_results (json_output , benchmark_results )
127
136
128
137
129
138
if __name__ == "__main__" :
0 commit comments