1
- import json
2
- from pathlib import Path
3
-
4
1
import argparse
2
+ import json
5
3
import logging
4
+ from collections import defaultdict
5
+ from itertools import combinations
6
6
from pathlib import Path
7
7
from typing import Optional
8
8
9
+ import datasets
10
+ import matplotlib .pyplot as plt
11
+ import numpy as np
9
12
import pandas as pd
10
13
import seaborn as sns
11
- import matplotlib .pyplot as plt
12
14
from huggingface_hub import snapshot_download
13
- import datasets
14
- import json
15
-
16
- import numpy as np
17
- import matplotlib .pyplot as plt
18
- from itertools import combinations
19
- from collections import defaultdict
20
15
21
-
22
16
FONT_SIZES = {"small" : 12 , "medium" : 16 , "large" : 18 }
23
17
24
18
PLOT_PARAMS = {
25
- "font.family" : "serif" ,
26
- "font.serif" : ["Times New Roman" , "STIX" ],
27
- "font.size" : FONT_SIZES .get ("medium" ),
28
- "axes.titlesize" : FONT_SIZES .get ("large" ),
29
- "axes.labelsize" : FONT_SIZES .get ("large" ),
30
- "xtick.labelsize" : FONT_SIZES .get ("large" ),
31
- "ytick.labelsize" : FONT_SIZES .get ("small" ),
32
- "legend.fontsize" : FONT_SIZES .get ("medium" ),
33
- "figure.titlesize" : FONT_SIZES .get ("medium" ),
34
- "text.usetex" : False ,
19
+ "font.family" : "serif" ,
20
+ "font.serif" : ["Times New Roman" , "STIX" ],
21
+ "font.size" : FONT_SIZES .get ("medium" ),
22
+ "axes.titlesize" : FONT_SIZES .get ("large" ),
23
+ "axes.labelsize" : FONT_SIZES .get ("large" ),
24
+ "xtick.labelsize" : FONT_SIZES .get ("large" ),
25
+ "ytick.labelsize" : FONT_SIZES .get ("small" ),
26
+ "legend.fontsize" : FONT_SIZES .get ("medium" ),
27
+ "figure.titlesize" : FONT_SIZES .get ("medium" ),
28
+ "text.usetex" : False ,
35
29
}
36
30
37
31
logging .basicConfig (level = logging .INFO )
38
32
39
33
plt .rcParams .update (PLOT_PARAMS )
40
34
35
+
41
36
def load_json (json_file_path ):
42
- with open (json_file_path , "r" ) as file :
43
- json_data = json .load (file )
44
- return json_data
37
+ with open (json_file_path , "r" ) as file :
38
+ json_data = json .load (file )
39
+ return json_data
45
40
46
- results_dir = 'data/eval-results-maple'
41
+
42
+ results_dir = "data/eval-results-maple"
47
43
results_path = Path (results_dir )
48
44
49
45
results_all = []
50
46
for result_file in results_path .glob ("*.json" ):
51
- raw_results = load_json (result_file )
52
- if "leaderboard" in raw_results .keys ():
53
- model_id = raw_results ["model" ]
54
- subset_results = raw_results [' subset' ]
55
- overall = raw_results [' scores' ][ ' accuracy' ]
56
- remove_key = [' model' , ' model_type' , ' chat_template' ]
57
- for key in remove_key :
58
- del subset_results [key ]
59
- elif "subset_results" in raw_results .keys ():
60
- model_id = raw_results ["model" ]
61
- subset_results = raw_results [' subset_results' ]
62
- overall = raw_results [' accuracy' ]
63
- else :
64
- model_id = raw_results ["model" ]
65
- subset_results = raw_results [' extra_results' ]
66
- overall = raw_results [' accuracy' ]
67
- # print(model_id, overall)
68
- # print("\t", subset_results)
69
- # results_all.append([model_id, overall, subset_results])
70
- results_all .append ({' Model' : model_id , ' Avg' : overall , ** subset_results })
71
-
72
- # import ipdb; ipdb.set_trace()
73
-
74
- TOP = 10
47
+ raw_results = load_json (result_file )
48
+ if "leaderboard" in raw_results .keys ():
49
+ model_id = raw_results ["model" ]
50
+ subset_results = raw_results [" subset" ]
51
+ overall = raw_results [" scores" ][ " accuracy" ]
52
+ remove_key = [" model" , " model_type" , " chat_template" ]
53
+ for key in remove_key :
54
+ del subset_results [key ]
55
+ elif "subset_results" in raw_results .keys ():
56
+ model_id = raw_results ["model" ]
57
+ subset_results = raw_results [" subset_results" ]
58
+ overall = raw_results [" accuracy" ]
59
+ else :
60
+ model_id = raw_results ["model" ]
61
+ subset_results = raw_results [" extra_results" ]
62
+ overall = raw_results [" accuracy" ]
63
+ # print(model_id, overall)
64
+ # print("\t", subset_results)
65
+ # results_all.append([model_id, overall, subset_results])
66
+ results_all .append ({" Model" : model_id , " Avg" : overall , ** subset_results })
67
+
68
+ # import ipdb; ipdb.set_trace()
69
+
70
+ TOP = 10
75
71
# results_all.sort(key=lambda x: x[1], reverse=True)
76
72
# results_all = results_all[:TOP]
77
73
# print(results_all)
78
74
79
75
df_results = pd .DataFrame (results_all )
80
- df_results = df_results .sort_values (by = ' Avg' , ascending = False ).reset_index (drop = True )
76
+ df_results = df_results .sort_values (by = " Avg" , ascending = False ).reset_index (drop = True )
81
77
df_results = df_results .head (10 ).reset_index (drop = True )
82
78
83
- df_results .columns = df_results .columns .str .replace (' ^maple-' , '' , regex = True )
79
+ df_results .columns = df_results .columns .str .replace (" ^maple-" , "" , regex = True )
84
80
df_results = df_results .set_index ("Model" )
85
81
df_results = df_results * 100
86
82
fig , ax = plt .subplots (1 , 1 , figsize = (18 , 5 ))
87
83
88
- sns .heatmap (df_results , ax = ax , cmap = "YlGn" , annot = True , annot_kws = {"size" : 16 },
89
- fmt = ".1f" , cbar = False )
84
+ sns .heatmap (df_results , ax = ax , cmap = "YlGn" , annot = True , annot_kws = {"size" : 16 }, fmt = ".1f" , cbar = False )
90
85
91
86
ax .xaxis .set_ticks_position ("top" )
92
87
ax .tick_params (axis = "x" , labelrotation = 45 )
@@ -97,5 +92,3 @@ def load_json(json_file_path):
97
92
98
93
plt .savefig ("plots/maple.pdf" , bbox_inches = "tight" )
99
94
# import ipdb; ipdb.set_trace()
100
-
101
-
0 commit comments