forked from hibagus/ISCA-2021-Script
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy paths04_pcconflict_merge_hotcrp.py
81 lines (67 loc) · 3.34 KB
/
s04_pcconflict_merge_hotcrp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# Project: ISCA 2021 Script
# Filename: s04_pcconflict_merge_hotcrp.py
# Date: March 16, 2021
# Author: Bagus Hanindhito (hanindhito[at]bagus[dot]my[dot]id)
# Title: PC Member Co-Authors DBLP Merge to HotCRP
# Description:
## This script is used to merge co-authors list obtained from DBLP that has not been included
## in collaborators list in HotCRP.
## Please make sure to backup HotCRP configuration before uploading the CSV file generated by this script.
#%% Import some libraries that are needed
import pandas as pd
import numpy as np
import tqdm
from s00_function import request_affiliation
#%% Define the input and output CSV filename
# Input CSV filename
pc_info_hotcrp_filename = 'sample-data/input/isca2021-pcinfo.csv'
pc_conflict_crosscheck_filename = 'sample-data/output/isca2021-pcconflict-crosscheck.csv'
# Output CSV filename
pc_info_hotcrp_update_filename = 'sample-data/output/isca2021-pcinfo-update.csv'
# %%# Load Input CSV to Pandas Dataframe
# Load the PC Info HotCRP
pc_info_hotcrp_df = pd.read_csv(pc_info_hotcrp_filename)
pc_info_hotcrp_df['collaborators'] = pc_info_hotcrp_df['collaborators'].fillna(' ')
# Load the PC Conflict Crosscheck
pc_conflict_crosscheck_df = pd.read_csv(pc_conflict_crosscheck_filename, converters={
'conflict_only_dblp_name': eval,
'conflict_only_dblp_url': eval,
'conflict_only_hotcrp': eval}
)
# %% Iterate over PC member
new_collaborators_list = []
for index,pc_member in tqdm.tqdm(pc_info_hotcrp_df.iterrows(), total=pc_info_hotcrp_df.shape[0]):
email = pc_member['email']
dblp_only_conflict_name = pc_conflict_crosscheck_df.loc[pc_conflict_crosscheck_df['email']==email]['conflict_only_dblp_name'].to_list()[0]
dblp_only_conflict_url = pc_conflict_crosscheck_df.loc[pc_conflict_crosscheck_df['email']==email]['conflict_only_dblp_url'].to_list()[0]
new_conflict_strings = ''
if(len(dblp_only_conflict_name)!=0):
new_conflict_strings = '\n'
for name_dblp,url_dblp in zip(dblp_only_conflict_name,dblp_only_conflict_url):
# !!! THIS WILL TAKE VERY LONG TIME !!!
# comment these two lines below if you don't want affiliation information
affiliation = request_affiliation(url_dblp)
conflict_string = name_dblp + ' (' + affiliation + ')\n'
# uncomment this line below to put generic affiliation to reduce runtime
#conflict_string = name_dblp + ' (NONE <dblp>)\n'
new_conflict_strings = new_conflict_strings + conflict_string
new_collaborators_dict = \
{
"email" : pc_member['email'],
"new_collaborators" : pc_member['collaborators'] + new_conflict_strings[:-1]
}
new_collaborators_list.append(new_collaborators_dict)
new_collaborators_df = pd.DataFrame(new_collaborators_list)
pc_info_hotcrp_df['collaborators'] = new_collaborators_df['new_collaborators']
# %% Dump to CSV and Post-Processing
#print(pc_info_hotcrp_df.dtypes)
pc_info_hotcrp_df.to_csv(pc_info_hotcrp_update_filename, index=False)
# post-processing to match the CSV header
with open(pc_info_hotcrp_filename) as orig_file:
lines_orig = orig_file.readlines()
with open(pc_info_hotcrp_update_filename) as target_file:
lines_targ = target_file.readlines()
lines_targ[0] = lines_orig[0]
with open(pc_info_hotcrp_update_filename, "w") as target_file:
target_file.writelines(lines_targ)
# %%