11
11
from sklearn .metrics import roc_auc_score
12
12
from scipy .stats import spearmanr
13
13
14
- from dataworkspaces .lineage import LineageBuilder
14
+ # from dataworkspaces.lineage import LineageBuilder
15
15
16
16
17
17
ARGS_REGEX = {
18
- ' hlwt' : (re .compile (r' hlwt-([^,]*)' ), float ),
19
- ' l2wt' : (re .compile (r' l2wt-([^,]*)' ), float ),
20
- ' epochs' : (re .compile (r' epochs-([0-9]*)' ), int ),
21
- ' method' : (re .compile (r' method-([^,]*)' ), str ),
22
- ' shuffle' : (re .compile (r' shuffle-([0-9]*)' ), int ),
23
- ' training_fraction' : (re .compile (r' training_fraction-([^,]*)' ), float ),
24
- ' dataset' : (re .compile (r' ,([^,]*)\.preds' ), str ),
18
+ " hlwt" : (re .compile (r" hlwt-([^,]*)" ), float ),
19
+ " l2wt" : (re .compile (r" l2wt-([^,]*)" ), float ),
20
+ " epochs" : (re .compile (r" epochs-([0-9]*)" ), int ),
21
+ " method" : (re .compile (r" method-([^,]*)" ), str ),
22
+ " shuffle" : (re .compile (r" shuffle-([0-9]*)" ), int ),
23
+ " training_fraction" : (re .compile (r" training_fraction-([^,]*)" ), float ),
24
+ " dataset" : (re .compile (r" ,([^,]*)\.preds" ), str ),
25
25
}
26
26
27
27
@@ -38,47 +38,52 @@ def parse_args(file_name):
38
38
39
39
40
40
def _analysis_worker (op_file ):
41
- preds = pd .read_csv (op_file , sep = ' \t ' )
41
+ preds = pd .read_csv (op_file , sep = " \t " )
42
42
args = parse_args (op_file )
43
43
44
- args [' MAE' ] = np .mean (np .abs (preds ['p' ] - preds ['pp' ]))
45
- args [' AUC' ] = roc_auc_score (preds ['p' ], preds ['pp' ])
46
- args [' COR_p' ] = spearmanr (preds ['p' ], preds ['pp' ])[0 ]
47
- args [' COR_h' ] = spearmanr (preds ['h' ], preds ['hh' ])[0 ]
44
+ args [" MAE" ] = np .mean (np .abs (preds ["p" ] - preds ["pp" ]))
45
+ args [" AUC" ] = roc_auc_score (preds ["p" ], preds ["pp" ])
46
+ args [" COR_p" ] = spearmanr (preds ["p" ], preds ["pp" ])[0 ]
47
+ args [" COR_h" ] = spearmanr (preds ["h" ], preds ["hh" ])[0 ]
48
48
49
49
return args
50
50
51
51
52
52
@click .command ()
53
- @click .argument ('results_dir' , type = click .Path ())
54
- @click .argument ('output_csv' , type = click .Path ())
55
- @click .option ('--debug/--no-debug' , default = False , help = 'Run in single threaded mode for debugging.' )
53
+ @click .argument ("results_dir" , type = click .Path ())
54
+ @click .argument ("output_csv" , type = click .Path ())
55
+ @click .option (
56
+ "--debug/--no-debug" ,
57
+ default = False ,
58
+ help = "Run in single threaded mode for debugging." ,
59
+ )
56
60
def run (results_dir , output_csv , debug ):
57
61
"""Read all *.detailed files from RESULTS_DIR, calculate the metrics, and
58
62
save output to OUTPUT_CSV."""
59
- op_files = glob .glob (os .path .join (results_dir , '*.preds' ))
60
-
61
- builder = (
62
- LineageBuilder ()
63
- .as_script_step ()
64
- .with_parameters ({
65
- 'results_dir' : results_dir ,
66
- })
67
- .with_input_paths (op_files )
68
- )
69
-
70
- with builder .eval () as lineage :
63
+ op_files = glob .glob (os .path .join (results_dir , "*.preds" ))
64
+
65
+ # builder = (
66
+ # LineageBuilder()
67
+ # .as_script_step()
68
+ # .with_parameters({
69
+ # 'results_dir': results_dir,
70
+ # })
71
+ # .with_input_paths(op_files)
72
+ # )
73
+
74
+ # with builder.eval() as lineage:
75
+ if True :
71
76
if debug :
72
77
data = [_analysis_worker (op_file ) for op_file in op_files ]
73
78
else :
74
79
with MP .Pool () as pool :
75
80
data = pool .map (_analysis_worker , op_files )
76
81
77
82
pd .DataFrame (data ).to_csv (output_csv , index = False )
78
- lineage .add_output_path (output_csv )
83
+ # lineage.add_output_path(output_csv)
79
84
80
- print (' Done.' )
85
+ print (" Done." )
81
86
82
87
83
- if __name__ == ' __main__' :
88
+ if __name__ == " __main__" :
84
89
run ()
0 commit comments