Skip to content

Commit

Permalink
code refactor for utileval before and after.
Browse files Browse the repository at this point in the history
  • Loading branch information
hosseinfani committed Mar 10, 2023
1 parent 015e578 commit 040b34b
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 15 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
metric,mean.before,mean.after
map_cut_2,0.21666666666666665,0.15
map_cut_5,0.30333333333333334,0.2033333333333333
P_2,0.3,
P_5,0.24,
P_10,0.1399999999999999,
recall_2,0.2166666666666666,
recall_5,0.4833333333333333,
recall_10,0.5833333333333333,
ndcg_cut_2,0.367888315659275,
ndcg_cut_5,0.4257486373712523,
ndcg_cut_10,0.4666251168889495,
map_cut_2,0.2166666666666666,0.15
map_cut_5,0.3033333333333333,0.2033333333333333
map_cut_10,0.3176190476190476,0.3005555555555556
aucroc,0.6316568047337279,0.6494082840236687
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
metric,mean.before,mean.after
map_cut_2,0.10833333333333332,0.10833333333333332
map_cut_5,0.21944444444444441,0.175
P_2,0.3,
P_5,0.24,
P_10,0.2,
recall_2,0.2166666666666666,
recall_5,0.4333333333333333,
recall_10,0.8,
ndcg_cut_2,0.232111684340725,
ndcg_cut_5,0.333128018266259,
ndcg_cut_10,0.4926083647102192,
map_cut_2,0.1083333333333333,0.10833333333333332
map_cut_5,0.2194444444444444,0.175
map_cut_10,0.3319444444444444,0.3256547619047619
aucroc,0.6442307692307693,0.6479289940828403
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
metric,mean.before,mean.after
P_2,0.1,
P_5,0.04,
P_10,0.16,
recall_2,0.05,
recall_5,0.05,
recall_10,0.6333333333333333,
ndcg_cut_2,0.0773705614469083,
ndcg_cut_5,0.049260477748146,
ndcg_cut_10,0.3069278887774607,
map_cut_2,0.025,0.31666666666666665
map_cut_5,0.025,0.31666666666666665
map_cut_10,0.14384920634920634,0.4896825396825396
aucroc,0.45192307692307687,0.6923076923076923
map_cut_10,0.1438492063492063,0.4896825396825396
aucroc,0.4519230769230768,0.6923076923076923
24 changes: 15 additions & 9 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,12 @@ def reranked_preds(teamsvecs_members, splits, reranked_idx, reranked_probs, outp
return sparse_matrix_reranked

@staticmethod
def eval_utility(teamsvecs_members, reranked_preds, preds, splits, metrics, output, algorithm, k_max) -> None:
def eval_utility(teamsvecs_members, reranked_preds, fpred, preds, splits, metrics, output, algorithm, k_max) -> None:
"""
Args:
teamsvecs_members: teamsvecs pickle file
reranked_preds: re-ranked teams
fpred: .pred filename (to see if .pred.eval.mean.csv exists)
preds: loaded predictions from a .pred file
splits: indices of test and train samples
metrics: desired utility metrics
Expand All @@ -128,7 +129,11 @@ def eval_utility(teamsvecs_members, reranked_preds, preds, splits, metrics, outp
"""
# predictions = torch.load(self.predictions_address)
y_test = teamsvecs_members[splits['test']]
_, df_mean_before, _, _ = calculate_metrics(y_test, preds, False, metrics) #although we already have this at test.pred.eval.mean.csv
try:
df_mean_before = pd.read_csv(f'{fpred}.eval.mean.csv', names=['mean'], header=0)#we should already have it at f*.test.pred.eval.mean.csv
except FileNotFoundError:
_, df_mean_before, _, _ = calculate_metrics(y_test, preds, False, metrics)
df_mean_before.to_csv(f'{fpred}.eval.mean.csv', columns=['mean'])
df_mean_before.rename(columns={'mean': 'mean.before'}, inplace=True)
_, df_mean_after, _, _ = calculate_metrics(y_test, reranked_preds.toarray(), False, metrics)
df_mean_after.rename(columns={'mean': 'mean.after'}, inplace=True)
Expand Down Expand Up @@ -190,24 +195,23 @@ def run(fpred, output, fteamsvecs, fsplits, np_ratio, algorithm='det_cons', k_ma
#not sure os handles file locking for append during parallel run ...
# with open(f'{new_output}.rerank.time', 'a') as file: file.write(f'{elapsed_time} {new_output} {algorithm} {k_max}\n')
with open(f'{output}/rerank.time', 'a') as file: file.write(f'{elapsed_time} {new_output} {algorithm} {k_max}\n')
try:
with open(f'{new_output}.{algorithm}.{k_max}.rerank.pred', 'rb') as f: reranked_preds = pickle.load(f)
except FileNotFoundError: reranked_preds = Reranking.reranked_preds(teamsvecs['member'], splits, reranked_idx, probs, new_output, algorithm, k_max)

try:
print('Loading fairness evaluation results ...')
print('Loading fairness evaluation results before and after reranking ...')
fairness_eval = pd.read_csv(f'{new_output}.{algorithm}.{k_max}.faireval.csv')
except FileNotFoundError:
print(f'Loading fairness results failed! Evaluating fairness metric {fairness_metrics} ...') #for now, it's hardcoded for 'ndkl'
Reranking.eval_fairness(preds, labels, reranked_idx, ratios, new_output, algorithm, k_max)

try:
with open(f'{new_output}.{algorithm}.{k_max}.rerank.pred', 'rb') as f: reranked_preds = pickle.load(f)
except FileNotFoundError:
reranked_preds = Reranking.reranked_preds(teamsvecs['member'], splits, reranked_idx, probs, new_output, algorithm, k_max)
try:
print('Loading utility metric evaluation results ...')
print('Loading utility metric evaluation results before and after reranking ...')
utility_before = pd.read_csv(f'{new_output}.{algorithm}.{k_max}.utileval.csv')
except:
print(f' Loading utility metric results failed! Evaluating utility metric {utility_metrics} ...')
Reranking.eval_utility(teamsvecs['member'], reranked_preds, preds, splits, utility_metrics, new_output, algorithm, k_max=k_max)
Reranking.eval_utility(teamsvecs['member'], reranked_preds, fpred, preds, splits, utility_metrics, new_output, algorithm, k_max=k_max)

print(f'Pipeline for the baseline {fpred} completed by {multiprocessing.current_process()}! {time() - st}')
print('#'*100)
Expand All @@ -233,13 +237,15 @@ def addargs(parser):

"""
A running example of arguments
# single *.pred file
python -u main.py
-fteamsvecs ../data/preprocessed/dblp/toy.dblp.v12.json/teamsvecs.pkl
-fsplit ../output/toy.dblp.v12.json/splits.json
-fpred ../output/toy.dblp.v12.json/bnn/t31.s11.m13.l[100].lr0.1.b4096.e20.s1/f0.test.pred
-reranker det_cons
-output ../output/toy.dblp.v12.json/
# root folder containing many *.pred files.
python -u main.py
-fteamsvecs ../data/preprocessed/dblp/toy.dblp.v12.json/teamsvecs.pkl
-fsplit ../output/toy.dblp.v12.json/splits.json
Expand Down

0 comments on commit 040b34b

Please sign in to comment.