Skip to content

Commit c0c56c3

Browse files
committed
new table
1 parent 5351740 commit c0c56c3

File tree

3 files changed

+115
-45
lines changed

3 files changed

+115
-45
lines changed

algorithms/lib/constants.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ class usg_constants:
2323
METRICS_PRETTY = {'precision':'Prec',
2424
'recall':'Rec',
2525
'ild': 'ILD',
26-
'gc': 'GC',
26+
'gc': 'Cov',
2727
'pr':'PRg',
2828
'epc':'EPC',
2929
'ndcg':'NDCG',
@@ -40,7 +40,8 @@ class usg_constants:
4040
# "geocat": "DisCovER",
4141
"geocat": "DisCovER",
4242
"persongeocat": "PersonDisCovER",
43-
"geodiv": "Geo-Div(PR)",
43+
# "geodiv": "Geo-Div(PR)",
44+
"geodiv": "GeoDiv",
4445
"ld": "LD",
4546
"binomial": "Binom",
4647
"pm2": "PM2",

algorithms/lib/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ class StatisticResult(Enum):
2929
def statistic_test(x, y, p):
3030
# try:
3131
statistic, pvalue = scipy.stats.ttest_ind(x, y)
32-
y_mean = np.mean(y)
3332
x_mean = np.mean(x)
33+
y_mean = np.mean(y)
3434
if pvalue < p:
3535
if x_mean > y_mean:
3636
return StatisticResult.GAIN

algorithms/print_table.py

Lines changed: 111 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,38 @@
11

2+
import re
23
from numpy.core import numeric
4+
from numpy.core.arrayprint import printoptions
35
from lib.utils import StatisticResult, statistic_test
46
import scipy.stats
7+
# import argparse
58
from collections import defaultdict
69
import pandas as pd
710
import sys, os
811
from typing import final
912
sys.path.insert(0, os.path.abspath('lib'))
1013
from lib.RecRunner import NameType, RecRunner
11-
# rr=RecRunner("usg","geocat","madison",80,20,"../data")
12-
# print(rr.get_base_rec_file_name())
13-
# print(rr.get_final_rec_file_name())
14-
15-
# rr.load_base()
16-
# rr.run_base_recommender()
17-
# rr.run_final_recommender()
1814
import inquirer
1915
from lib.constants import METRICS_PRETTY, RECS_PRETTY, experiment_constants, CITIES_PRETTY
2016

17+
LATEX_HEADER = r"""\documentclass{article}
18+
\usepackage{graphicx}
19+
\usepackage[utf8]{inputenc}
20+
\usepackage{xcolor}
21+
\usepackage{amsmath}
22+
\usepackage{amssymb}
23+
\usepackage{underscore}
24+
\usepackage[margin=0.5in]{geometry}
25+
\usepackage{booktabs}
26+
\begin{document}
27+
"""
28+
29+
LATEX_FOOT = r"""
30+
\end{document}"""
31+
32+
# argparser = argparse.ArgumentParser()
33+
# argparser.add_argument('-f')
34+
# args= argparser.parse_args()
35+
2136
# questions = [
2237
# inquirer.Checkbox('city',
2338
# message="City to use",
@@ -40,94 +55,148 @@
4055

4156
# cities = ['lasvegas', 'phoenix']
4257
cities = ['lasvegas']
43-
base_recs = ['geomf', 'usg']
44-
# base_recs = ['geomf']
45-
final_recs = ['geocat']
46-
# print(cities)
47-
# print(base_recs)
48-
# print(final_recs)
58+
# base_recs = [ 'usg','geosoca','geomf',]
59+
base_recs = ['geomf']
60+
final_recs = ['geodiv']
4961
final_rec_list_size = 20
5062
rr=RecRunner(base_recs[0],final_recs[0],cities[0],80,final_rec_list_size,"../data")
5163

52-
# rr.print_latex_vert_cities_metrics_table(cities=city)
5364
metrics_k = experiment_constants.METRICS_K
5465
final_recs_metrics= defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict())))
5566
base_recs_metrics= defaultdict(lambda: defaultdict(lambda: defaultdict()))
5667
latex_table = ""
57-
num_metrics = None
68+
main_metrics = ['precision','recall','gc','ild','pr','epc']
69+
def get_metrics_renamed_order(METRICS_PRETTY_k):
70+
METRICS_PRETTY_k = [METRICS_PRETTY_k[v] for v in main_metrics]
71+
return METRICS_PRETTY_k
72+
def get_metrics_pretty_k(metric_k:int) -> dict:
73+
return {k: v+f'@{metric_k}' for k, v in METRICS_PRETTY.items()}
74+
def df_format(df_unf:pd.DataFrame,metric_k):
75+
# METRICS_PRETTY_k = {k: v+f'@{metric_k}' for k, v in METRICS_PRETTY.items()}
76+
METRICS_PRETTY_k = get_metrics_pretty_k(metric_k)
77+
df_unf=df_unf[main_metrics].rename(columns=METRICS_PRETTY_k)
78+
# print(df_unf)
79+
return df_unf
80+
81+
def get_base_name(base_name):
82+
return RECS_PRETTY[base_name]
83+
84+
def get_final_name(base_name,final_name):
85+
return get_base_name(base_name)+'+'+RECS_PRETTY[final_name]
86+
5887
for city in cities:
5988
for base_rec in base_recs:
6089
rr.city = city
6190
rr.base_rec = base_rec
6291
metrics = rr.load_metrics(
6392
base=True, name_type=NameType.PRETTY, METRICS_KS=metrics_k)
6493
for metric_k in metrics_k:
65-
METRICS_PRETTY_k = {k:v+f'@{metric_k}' for k,v in METRICS_PRETTY.items()}
6694
base_recs_metrics[city][base_rec][metric_k] = pd.DataFrame(
67-
metrics[metric_k]).rename(columns=METRICS_PRETTY_k)
95+
metrics[metric_k])
96+
base_recs_metrics[city][base_rec][metric_k]=df_format(base_recs_metrics[city][base_rec][metric_k],metric_k)
97+
# base_recs_metrics[city][base_rec][metric_k]=base_recs_metrics[city][base_rec][metric_k].rename(columns=METRICS_PRETTY_k)
6898
# rr.final_rec_list_size = final_rec_list_size
6999

70100
for final_rec in final_recs:
71101
rr.final_rec = final_rec
72102
metrics = rr.load_metrics(
73103
base=False, name_type=NameType.PRETTY, METRICS_KS=metrics_k)
74104
for metric_k in metrics_k:
75-
METRICS_PRETTY_k = {k: v+f'@{metric_k}' for k, v in METRICS_PRETTY.items()}
76105
final_recs_metrics[city][base_rec][final_rec][metric_k] = pd.DataFrame(
77-
metrics[metric_k]).rename(columns=METRICS_PRETTY_k)
78-
106+
metrics[metric_k])
107+
final_recs_metrics[city][base_rec][final_rec][metric_k] =df_format(final_recs_metrics[city][base_rec][final_rec][metric_k],metric_k)
79108
num_metrics = 6
109+
num_columns= num_metrics+1
80110
latex_table_header= """
81111
\\begin{{tabular}}{{{}}}
82-
""".format('c'*num_metrics)
112+
""".format('c'*(num_columns))
83113
latex_table_footer= r"""
84114
\end{tabular}
85115
"""
86-
for city in cities:
87-
latex_table += r'\toprule\n'
88-
latex_table += '\\multicolumn{{{}}}{{l}}{}\\\\\n'.format(num_metrics,CITIES_PRETTY[city])
89-
latex_table += r'\bottomrule\n'
116+
117+
top_count = defaultdict(lambda:defaultdict(int))
118+
for count1, city in enumerate(cities):
119+
if count1 == 0:
120+
latex_table += '\\toprule\n'
121+
latex_table += '\\multicolumn{{{}}}{{l}}{{{}}}\\\\\n'.format((num_columns),CITIES_PRETTY[city])
122+
latex_table += '\\bottomrule\n'
90123
for metric_k in metrics_k:
91124
dfs = []
125+
names_recs_in_order = []
92126
for base_rec in base_recs:
93127
current_metrics = {}
94-
current_metrics[RECS_PRETTY[base_rec]] = base_recs_metrics[city][base_rec][metric_k].drop(columns='user_id')
128+
current_metrics[get_base_name(base_rec)] = base_recs_metrics[city][base_rec][metric_k]
129+
names_recs_in_order.append(get_base_name(base_rec))
95130
for final_rec in final_recs:
96-
current_metrics[RECS_PRETTY[base_rec]+'+'+RECS_PRETTY[final_rec]] = final_recs_metrics[city][base_rec][final_rec][metric_k].drop(columns='user_id')
131+
current_metrics[get_final_name(base_rec,final_rec)] = final_recs_metrics[city][base_rec][final_rec][metric_k]
132+
names_recs_in_order.append(get_final_name(base_rec,final_rec))
97133
df = pd.concat(current_metrics, axis=1)
134+
# print(df)
98135
dfs.append(df)
99-
df = pd.concat(dfs)
136+
df = pd.concat(dfs,axis=1)
137+
# print(df)
100138

101139
df_reordered = df.reorder_levels([1,0],axis=1)
140+
# print(df_reordered)
141+
df_reordered_means = df_reordered.mean()
102142
top_methods = df_reordered.mean().reset_index().set_index('level_1').groupby('level_0').idxmax().to_dict()[0]
103143
df_top2_methods = df_reordered.copy()
104-
105-
print(df_top2_methods)
106144
for k, v in top_methods.items():
107-
print(k,v)
108145
df_top2_methods=df_top2_methods.drop((k,v),axis=1)
109146
top2_methods = df_top2_methods.mean().reset_index().set_index('level_1').groupby('level_0').idxmax().to_dict()[0]
110-
print(top_methods)
111-
print(top2_methods)
112147
highlight_elements = defaultdict(list)
148+
metrics_og_name = {v: k for k,v in get_metrics_pretty_k(metric_k).items()}
149+
# print(metrics_og_name)
150+
names_recs_to_og = {}
151+
names_recs_to_og[get_base_name(base_rec)] = base_rec
152+
for final_rec in final_recs:
153+
names_recs_to_og[get_final_name(base_rec,final_rec)] = (base_rec,final_rec)
113154
for k,v in top_methods.items():
114155
top1_values = df_reordered[k,v]
115156
v_top2 = top2_methods[k]
116157
top2_values =df_reordered[k,v_top2]
117158
statistic_result= statistic_test(top1_values,top2_values,0.05)
118-
if statistic_test == StatisticResult.GAIN:
159+
if statistic_result == StatisticResult.GAIN:
119160
highlight_elements[k].extend([v])
120-
elif statistic_test == StatisticResult.TIE:
121-
highlight_elements[k].extend([v,top2_values])
122-
else:
123-
highlight_elements[k].extend([top2_values])
161+
elif statistic_result == StatisticResult.TIE:
162+
highlight_elements[k].extend([v,v_top2])
163+
elif statistic_result == StatisticResult.LOSS:
164+
highlight_elements[k].extend([v_top2])
165+
for hige in highlight_elements[k]:
166+
top_count[metrics_og_name[k]][names_recs_to_og[hige]] += 1
124167

125-
table_df_result = df_reordered.mean().unstack(level=0)
126-
table_df_result_latex = table_df_result.to_latex(header=False)
168+
169+
df_reordered_means=df_reordered_means.map(lambda x: f'{x:.4f}')
170+
for metric, methods in highlight_elements.items():
171+
for method in methods:
172+
df_reordered_means.at[metric,method] = '\\textbf{{{}}}'.format(df_reordered_means.at[metric,method])
173+
table_df_result : pd.DataFrame = df_reordered_means.unstack(level=0)
174+
table_df_result=table_df_result[get_metrics_renamed_order(get_metrics_pretty_k(metric_k))]
175+
table_df_result=table_df_result.reindex(names_recs_in_order)
176+
177+
table_df_result_latex = table_df_result.to_latex(header=True,escape=False)
178+
table_df_result_latex=re.sub('\{\}','Algorithm',table_df_result_latex)
127179
table_df_result_latex = table_df_result_latex.split('\n')[:-2][2:]
128180
table_df_result_latex = '\n'.join(table_df_result_latex)
129181
latex_table+=table_df_result_latex+'\n'
130182
# raise SystemError
131183

132-
latex_table = latex_table_header+latex_table+latex_table_footer
133-
print(latex_table)
184+
latex_table = LATEX_HEADER+latex_table_header+latex_table+latex_table_footer+LATEX_FOOT
185+
with open('../data/result/util/main_benchmark_table.tex','w') as f:
186+
f.write(latex_table)
187+
188+
189+
table_top_count = pd.DataFrame.from_dict(top_count).fillna(0)
190+
table_top_count=table_top_count[main_metrics]
191+
table_top_count.columns = [METRICS_PRETTY[i] for i in table_top_count.columns]
192+
recs_order = []
193+
for base_rec in base_recs:
194+
recs_order.append(base_rec)
195+
for final_rec in final_recs:
196+
recs_order.append((base_rec,final_rec))
197+
198+
table_top_count = table_top_count.reindex(recs_order)
199+
# for i in table_top_count.index:
200+
201+
# table_df_result=table_df_result.reindex(names_recs_in_order)
202+
# table_df_result=table_df_result.reindex(names_recs_in_order)

0 commit comments

Comments
 (0)