1
1
2
+ import re
2
3
from numpy .core import numeric
4
+ from numpy .core .arrayprint import printoptions
3
5
from lib .utils import StatisticResult , statistic_test
4
6
import scipy .stats
7
+ # import argparse
5
8
from collections import defaultdict
6
9
import pandas as pd
7
10
import sys , os
8
11
from typing import final
9
12
sys .path .insert (0 , os .path .abspath ('lib' ))
10
13
from lib .RecRunner import NameType , RecRunner
11
- # rr=RecRunner("usg","geocat","madison",80,20,"../data")
12
- # print(rr.get_base_rec_file_name())
13
- # print(rr.get_final_rec_file_name())
14
-
15
- # rr.load_base()
16
- # rr.run_base_recommender()
17
- # rr.run_final_recommender()
18
14
import inquirer
19
15
from lib .constants import METRICS_PRETTY , RECS_PRETTY , experiment_constants , CITIES_PRETTY
20
16
17
+ LATEX_HEADER = r"""\documentclass{article}
18
+ \usepackage{graphicx}
19
+ \usepackage[utf8]{inputenc}
20
+ \usepackage{xcolor}
21
+ \usepackage{amsmath}
22
+ \usepackage{amssymb}
23
+ \usepackage{underscore}
24
+ \usepackage[margin=0.5in]{geometry}
25
+ \usepackage{booktabs}
26
+ \begin{document}
27
+ """
28
+
29
+ LATEX_FOOT = r"""
30
+ \end{document}"""
31
+
32
+ # argparser = argparse.ArgumentParser()
33
+ # argparser.add_argument('-f')
34
+ # args= argparser.parse_args()
35
+
21
36
# questions = [
22
37
# inquirer.Checkbox('city',
23
38
# message="City to use",
40
55
41
56
# cities = ['lasvegas', 'phoenix']
42
57
cities = ['lasvegas' ]
43
- base_recs = ['geomf' , 'usg' ]
44
- # base_recs = ['geomf']
45
- final_recs = ['geocat' ]
46
- # print(cities)
47
- # print(base_recs)
48
- # print(final_recs)
58
+ # base_recs = [ 'usg','geosoca','geomf',]
59
+ base_recs = ['geomf' ]
60
+ final_recs = ['geodiv' ]
49
61
final_rec_list_size = 20
50
62
rr = RecRunner (base_recs [0 ],final_recs [0 ],cities [0 ],80 ,final_rec_list_size ,"../data" )
51
63
52
- # rr.print_latex_vert_cities_metrics_table(cities=city)
53
64
metrics_k = experiment_constants .METRICS_K
54
65
final_recs_metrics = defaultdict (lambda : defaultdict (lambda : defaultdict (lambda : defaultdict ())))
55
66
base_recs_metrics = defaultdict (lambda : defaultdict (lambda : defaultdict ()))
56
67
latex_table = ""
57
- num_metrics = None
68
+ main_metrics = ['precision' ,'recall' ,'gc' ,'ild' ,'pr' ,'epc' ]
69
+ def get_metrics_renamed_order (METRICS_PRETTY_k ):
70
+ METRICS_PRETTY_k = [METRICS_PRETTY_k [v ] for v in main_metrics ]
71
+ return METRICS_PRETTY_k
72
+ def get_metrics_pretty_k (metric_k :int ) -> dict :
73
+ return {k : v + f'@{ metric_k } ' for k , v in METRICS_PRETTY .items ()}
74
+ def df_format (df_unf :pd .DataFrame ,metric_k ):
75
+ # METRICS_PRETTY_k = {k: v+f'@{metric_k}' for k, v in METRICS_PRETTY.items()}
76
+ METRICS_PRETTY_k = get_metrics_pretty_k (metric_k )
77
+ df_unf = df_unf [main_metrics ].rename (columns = METRICS_PRETTY_k )
78
+ # print(df_unf)
79
+ return df_unf
80
+
81
+ def get_base_name (base_name ):
82
+ return RECS_PRETTY [base_name ]
83
+
84
+ def get_final_name (base_name ,final_name ):
85
+ return get_base_name (base_name )+ '+' + RECS_PRETTY [final_name ]
86
+
58
87
for city in cities :
59
88
for base_rec in base_recs :
60
89
rr .city = city
61
90
rr .base_rec = base_rec
62
91
metrics = rr .load_metrics (
63
92
base = True , name_type = NameType .PRETTY , METRICS_KS = metrics_k )
64
93
for metric_k in metrics_k :
65
- METRICS_PRETTY_k = {k :v + f'@{ metric_k } ' for k ,v in METRICS_PRETTY .items ()}
66
94
base_recs_metrics [city ][base_rec ][metric_k ] = pd .DataFrame (
67
- metrics [metric_k ]).rename (columns = METRICS_PRETTY_k )
95
+ metrics [metric_k ])
96
+ base_recs_metrics [city ][base_rec ][metric_k ]= df_format (base_recs_metrics [city ][base_rec ][metric_k ],metric_k )
97
+ # base_recs_metrics[city][base_rec][metric_k]=base_recs_metrics[city][base_rec][metric_k].rename(columns=METRICS_PRETTY_k)
68
98
# rr.final_rec_list_size = final_rec_list_size
69
99
70
100
for final_rec in final_recs :
71
101
rr .final_rec = final_rec
72
102
metrics = rr .load_metrics (
73
103
base = False , name_type = NameType .PRETTY , METRICS_KS = metrics_k )
74
104
for metric_k in metrics_k :
75
- METRICS_PRETTY_k = {k : v + f'@{ metric_k } ' for k , v in METRICS_PRETTY .items ()}
76
105
final_recs_metrics [city ][base_rec ][final_rec ][metric_k ] = pd .DataFrame (
77
- metrics [metric_k ]). rename ( columns = METRICS_PRETTY_k )
78
-
106
+ metrics [metric_k ])
107
+ final_recs_metrics [ city ][ base_rec ][ final_rec ][ metric_k ] = df_format ( final_recs_metrics [ city ][ base_rec ][ final_rec ][ metric_k ], metric_k )
79
108
num_metrics = 6
109
+ num_columns = num_metrics + 1
80
110
latex_table_header = """
81
111
\\ begin{{tabular}}{{{}}}
82
- """ .format ('c' * num_metrics )
112
+ """ .format ('c' * ( num_columns ) )
83
113
latex_table_footer = r"""
84
114
\end{tabular}
85
115
"""
86
- for city in cities :
87
- latex_table += r'\toprule\n'
88
- latex_table += '\\ multicolumn{{{}}}{{l}}{}\\ \\ \n ' .format (num_metrics ,CITIES_PRETTY [city ])
89
- latex_table += r'\bottomrule\n'
116
+
117
+ top_count = defaultdict (lambda :defaultdict (int ))
118
+ for count1 , city in enumerate (cities ):
119
+ if count1 == 0 :
120
+ latex_table += '\\ toprule\n '
121
+ latex_table += '\\ multicolumn{{{}}}{{l}}{{{}}}\\ \\ \n ' .format ((num_columns ),CITIES_PRETTY [city ])
122
+ latex_table += '\\ bottomrule\n '
90
123
for metric_k in metrics_k :
91
124
dfs = []
125
+ names_recs_in_order = []
92
126
for base_rec in base_recs :
93
127
current_metrics = {}
94
- current_metrics [RECS_PRETTY [base_rec ]] = base_recs_metrics [city ][base_rec ][metric_k ].drop (columns = 'user_id' )
128
+ current_metrics [get_base_name (base_rec )] = base_recs_metrics [city ][base_rec ][metric_k ]
129
+ names_recs_in_order .append (get_base_name (base_rec ))
95
130
for final_rec in final_recs :
96
- current_metrics [RECS_PRETTY [base_rec ]+ '+' + RECS_PRETTY [final_rec ]] = final_recs_metrics [city ][base_rec ][final_rec ][metric_k ].drop (columns = 'user_id' )
131
+ current_metrics [get_final_name (base_rec ,final_rec )] = final_recs_metrics [city ][base_rec ][final_rec ][metric_k ]
132
+ names_recs_in_order .append (get_final_name (base_rec ,final_rec ))
97
133
df = pd .concat (current_metrics , axis = 1 )
134
+ # print(df)
98
135
dfs .append (df )
99
- df = pd .concat (dfs )
136
+ df = pd .concat (dfs ,axis = 1 )
137
+ # print(df)
100
138
101
139
df_reordered = df .reorder_levels ([1 ,0 ],axis = 1 )
140
+ # print(df_reordered)
141
+ df_reordered_means = df_reordered .mean ()
102
142
top_methods = df_reordered .mean ().reset_index ().set_index ('level_1' ).groupby ('level_0' ).idxmax ().to_dict ()[0 ]
103
143
df_top2_methods = df_reordered .copy ()
104
-
105
- print (df_top2_methods )
106
144
for k , v in top_methods .items ():
107
- print (k ,v )
108
145
df_top2_methods = df_top2_methods .drop ((k ,v ),axis = 1 )
109
146
top2_methods = df_top2_methods .mean ().reset_index ().set_index ('level_1' ).groupby ('level_0' ).idxmax ().to_dict ()[0 ]
110
- print (top_methods )
111
- print (top2_methods )
112
147
highlight_elements = defaultdict (list )
148
+ metrics_og_name = {v : k for k ,v in get_metrics_pretty_k (metric_k ).items ()}
149
+ # print(metrics_og_name)
150
+ names_recs_to_og = {}
151
+ names_recs_to_og [get_base_name (base_rec )] = base_rec
152
+ for final_rec in final_recs :
153
+ names_recs_to_og [get_final_name (base_rec ,final_rec )] = (base_rec ,final_rec )
113
154
for k ,v in top_methods .items ():
114
155
top1_values = df_reordered [k ,v ]
115
156
v_top2 = top2_methods [k ]
116
157
top2_values = df_reordered [k ,v_top2 ]
117
158
statistic_result = statistic_test (top1_values ,top2_values ,0.05 )
118
- if statistic_test == StatisticResult .GAIN :
159
+ if statistic_result == StatisticResult .GAIN :
119
160
highlight_elements [k ].extend ([v ])
120
- elif statistic_test == StatisticResult .TIE :
121
- highlight_elements [k ].extend ([v ,top2_values ])
122
- else :
123
- highlight_elements [k ].extend ([top2_values ])
161
+ elif statistic_result == StatisticResult .TIE :
162
+ highlight_elements [k ].extend ([v ,v_top2 ])
163
+ elif statistic_result == StatisticResult .LOSS :
164
+ highlight_elements [k ].extend ([v_top2 ])
165
+ for hige in highlight_elements [k ]:
166
+ top_count [metrics_og_name [k ]][names_recs_to_og [hige ]] += 1
124
167
125
- table_df_result = df_reordered .mean ().unstack (level = 0 )
126
- table_df_result_latex = table_df_result .to_latex (header = False )
168
+
169
+ df_reordered_means = df_reordered_means .map (lambda x : f'{ x :.4f} ' )
170
+ for metric , methods in highlight_elements .items ():
171
+ for method in methods :
172
+ df_reordered_means .at [metric ,method ] = '\\ textbf{{{}}}' .format (df_reordered_means .at [metric ,method ])
173
+ table_df_result : pd .DataFrame = df_reordered_means .unstack (level = 0 )
174
+ table_df_result = table_df_result [get_metrics_renamed_order (get_metrics_pretty_k (metric_k ))]
175
+ table_df_result = table_df_result .reindex (names_recs_in_order )
176
+
177
+ table_df_result_latex = table_df_result .to_latex (header = True ,escape = False )
178
+ table_df_result_latex = re .sub ('\{\}' ,'Algorithm' ,table_df_result_latex )
127
179
table_df_result_latex = table_df_result_latex .split ('\n ' )[:- 2 ][2 :]
128
180
table_df_result_latex = '\n ' .join (table_df_result_latex )
129
181
latex_table += table_df_result_latex + '\n '
130
182
# raise SystemError
131
183
132
- latex_table = latex_table_header + latex_table + latex_table_footer
133
- print (latex_table )
184
+ latex_table = LATEX_HEADER + latex_table_header + latex_table + latex_table_footer + LATEX_FOOT
185
+ with open ('../data/result/util/main_benchmark_table.tex' ,'w' ) as f :
186
+ f .write (latex_table )
187
+
188
+
189
+ table_top_count = pd .DataFrame .from_dict (top_count ).fillna (0 )
190
+ table_top_count = table_top_count [main_metrics ]
191
+ table_top_count .columns = [METRICS_PRETTY [i ] for i in table_top_count .columns ]
192
+ recs_order = []
193
+ for base_rec in base_recs :
194
+ recs_order .append (base_rec )
195
+ for final_rec in final_recs :
196
+ recs_order .append ((base_rec ,final_rec ))
197
+
198
+ table_top_count = table_top_count .reindex (recs_order )
199
+ # for i in table_top_count.index:
200
+
201
+ # table_df_result=table_df_result.reindex(names_recs_in_order)
202
+ # table_df_result=table_df_result.reindex(names_recs_in_order)
0 commit comments