|
1 | 1 | import pandas as pd
|
| 2 | +import random |
2 | 3 |
|
3 | 4 | """
|
4 |
| -This script creates the user study candidate labels for each map. |
| 5 | +This script creates the user study candidate labels for each map. |
5 | 6 | """
|
6 | 7 |
|
7 | 8 |
|
8 |
| -def union_label_candidates(path, k): |
| 9 | +def union_label_candidates(path, k, project): |
| 10 | + dum = pd.read_csv("./data/" + project + "/dummy_labels.csv") |
| 11 | + used = set() |
| 12 | + dummy = pd.DataFrame() |
| 13 | + for k in range(7): |
| 14 | + i = random.randint(0, dum.shape[0] - 1) |
| 15 | + dum_label = dum.iloc[:, 1][i] |
| 16 | + for l in range(2): |
| 17 | + while dum_label in used: |
| 18 | + i = random.randint(0, dum.shape[0] - 1) |
| 19 | + dum_label = dum.iloc[:, 1][i] |
| 20 | + used.add(dum_label) |
| 21 | + dummy = dummy.append({'country': int(k), 'new_name': dum_label, 'simple': 0, 'complex': 0, 'isDummy': True}, |
| 22 | + ignore_index=True) |
9 | 23 | all_labels = pd.read_csv(path + "/check.csv")
|
10 | 24 | all_labels['complex'] = all_labels['tf'] * all_labels['idf'] * all_labels['sum']
|
11 | 25 | all_labels['simple'] = all_labels['tf'] * all_labels['idf']
|
12 | 26 | all_labels = all_labels[['country', 'new_name', 'simple', 'complex']].drop_duplicates()
|
13 | 27 | simple = all_labels.sort_values('simple', ascending=False).groupby("country").head(k).sort_values('country')
|
14 | 28 | complex = all_labels.sort_values('complex', ascending=False).groupby("country").head(k).sort_values('country')
|
15 | 29 | union = pd.concat([simple, complex]).drop_duplicates().sort_values('country').reset_index().drop(columns=['index'])
|
16 |
| - union.to_csv(path + "/candidate_labels.csv") |
| 30 | + u = pd.DataFrame(pd.concat([union, dummy]))[['country', 'new_name', 'simple', 'complex', 'isDummy']] |
| 31 | + u = u.astype({"country": int}) |
| 32 | + u.to_csv(path + "/candidate_labels.csv") |
17 | 33 | return union
|
18 | 34 |
|
19 | 35 | if __name__ == '__main__':
|
20 | 36 | import argparse
|
21 | 37 | parser = argparse.ArgumentParser(description='path to the check.csv file')
|
22 | 38 | parser.add_argument('--experiment', required=True)
|
23 | 39 | parser.add_argument('--num_top_labels', required=True, type=int)
|
| 40 | + parser.add_argument('--project', required=True) |
| 41 | + |
24 | 42 |
|
25 | 43 | args = parser.parse_args()
|
26 | 44 |
|
27 |
| - union_label_candidates(args.experiment, args.num_top_labels) |
| 45 | + union_label_candidates(args.experiment, args.num_top_labels, args.project) |
| 46 | + |
| 47 | + |
| 48 | + |
| 49 | +project = "food" |
| 50 | +candi = pd.read_csv("/Users/luli/PycharmProjects/cartograph-alg/experiments/food/0296/candidate_labels.csv") |
0 commit comments