Skip to content

Commit a64beff

Browse files
author
luli2949
committed
added dummy labels to user study candidate labels
1 parent b941210 commit a64beff

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

cartograph/user_study_label.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,50 @@
11
import pandas as pd
2+
import random
23

34
"""
4-
This script creates the user study candidate labels for each map.
5+
This script creates the user study candidate labels for each map.
56
"""
67

78

8-
def union_label_candidates(path, k):
9+
def union_label_candidates(path, k, project):
10+
dum = pd.read_csv("./data/" + project + "/dummy_labels.csv")
11+
used = set()
12+
dummy = pd.DataFrame()
13+
for k in range(7):
14+
i = random.randint(0, dum.shape[0] - 1)
15+
dum_label = dum.iloc[:, 1][i]
16+
for l in range(2):
17+
while dum_label in used:
18+
i = random.randint(0, dum.shape[0] - 1)
19+
dum_label = dum.iloc[:, 1][i]
20+
used.add(dum_label)
21+
dummy = dummy.append({'country': int(k), 'new_name': dum_label, 'simple': 0, 'complex': 0, 'isDummy': True},
22+
ignore_index=True)
923
all_labels = pd.read_csv(path + "/check.csv")
1024
all_labels['complex'] = all_labels['tf'] * all_labels['idf'] * all_labels['sum']
1125
all_labels['simple'] = all_labels['tf'] * all_labels['idf']
1226
all_labels = all_labels[['country', 'new_name', 'simple', 'complex']].drop_duplicates()
1327
simple = all_labels.sort_values('simple', ascending=False).groupby("country").head(k).sort_values('country')
1428
complex = all_labels.sort_values('complex', ascending=False).groupby("country").head(k).sort_values('country')
1529
union = pd.concat([simple, complex]).drop_duplicates().sort_values('country').reset_index().drop(columns=['index'])
16-
union.to_csv(path + "/candidate_labels.csv")
30+
u = pd.DataFrame(pd.concat([union, dummy]))[['country', 'new_name', 'simple', 'complex', 'isDummy']]
31+
u = u.astype({"country": int})
32+
u.to_csv(path + "/candidate_labels.csv")
1733
return union
1834

1935
if __name__ == '__main__':
2036
import argparse
2137
parser = argparse.ArgumentParser(description='path to the check.csv file')
2238
parser.add_argument('--experiment', required=True)
2339
parser.add_argument('--num_top_labels', required=True, type=int)
40+
parser.add_argument('--project', required=True)
41+
2442

2543
args = parser.parse_args()
2644

27-
union_label_candidates(args.experiment, args.num_top_labels)
45+
union_label_candidates(args.experiment, args.num_top_labels, args.project)
46+
47+
48+
49+
project = "food"
50+
candi = pd.read_csv("/Users/luli/PycharmProjects/cartograph-alg/experiments/food/0296/candidate_labels.csv")

0 commit comments

Comments
 (0)