-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgetsounds.py
156 lines (119 loc) · 5.89 KB
/
getsounds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import os
import glob
import json
default_N_assign_more_sounds = 10
PATH_TO_FSL10K = "/static/FSL10K"
PATH_TO_AC_ANALYSIS = os.path.join(PATH_TO_FSL10K, 'ac_analysis/')
PATH_TO_METADATA = os.path.join(PATH_TO_FSL10K, 'fs_analysis/')
def compile_annotated_sounds(annotations_path):
annotated_sounds = {}
annotation_files = glob.glob(annotations_path + '/*/*.json', recursive=True)
for an_file in annotation_files:
an = json.load(open(an_file,'rb'))
#remove the path, then the extension and then the "sound-" string
sound_id = os.path.basename(os.path.splitext(an_file)[0]).replace('sound-','')
if sound_id in annotated_sounds:
annotated_sounds[sound_id] = { "genres" : list(set(an["genres"] + annotated_sounds[sound_id]["genres"])),
"num_annotations" : annotated_sounds[sound_id]["num_annotations"] + 1 }
else:
annotated_sounds[sound_id] = { "genres" : an["genres"],
"num_annotations" : 1 }
return annotated_sounds
def collect_assigned_sounds():
assigned_sounds = []
#for key in sound_id_user.keys():
user_path = os.path.join(PATH_TO_FSL10K, 'annotators/')
for user_file in os.listdir(user_path):
if user_file.endswith(".json"):
assigned_user_sounds = json.load(open(os.path.join(user_path,user_file), 'rb'))
for sound in assigned_user_sounds:
assigned_sounds.append(sound)
return assigned_sounds
def collect_authors(sounds_annotated,metadata):
author_sounds = {}
for sound in sounds_annotated:
author = metadata[sound]["username"]
if author in author_sounds.keys():
author_sounds[author] = author_sounds[author] + 1
else:
author_sounds[author] = 1
return author_sounds
def collect_packs(sounds_annotated,metadata):
pack_sounds = {}
for sound in sounds_annotated:
pack = metadata[sound]["pack_name"]
if pack is not None:
if pack in pack_sounds.keys():
pack_sounds[pack] = pack_sounds[pack] + 1
else:
pack_sounds[pack] = 1
return pack_sounds
def collect_genres(sounds_annotated):
genre_sounds = {}
for sound in sounds_annotated:
genres = sounds_annotated[sound]["genres"]
for genre in genres:
if genre in genre_sounds.keys():
genre_sounds[genre] = genre_sounds[genre] + 1
else:
genre_sounds[genre] = 1
return genre_sounds
def genre_importance(sound_genres, genre_sounds):
importance = 0
genre_sounds_sorted = sorted(genre_sounds.items(), key=lambda x: x[1])
less_annotated_sounds = [genre_sounds_sorted[0][0], genre_sounds_sorted[1][0]]
for genre in sound_genres:
if genre in less_annotated_sounds:
importance = importance + 1
return importance
def discard_packs(all_sound_ids,metadata):
packs_to_discard = []
for sound in all_sound_ids:
if metadata[sound]["pack"] in packs_to_discard:
all_sounds_ids.remove(sound)
return all_sound_ids
def select_relevant_sounds(annotations_path, metadata, genre_metadata, all_sound_ids, N=default_N_assign_more_sounds):
sounds_annotated = compile_annotated_sounds(annotations_path)
#These weights are used to create an irrelevance metric for each loop
#based on the existing annotated loops
#If a loop has been annotated already, multiply the number of times it has been
#annotated by this weight. 1000 makes sure that we first annotate sounds which
#haven't been annotated
anno_weight=500
#The number of times the author has been annotated should lightly influence the overall
#irrelevance score. We chose a lower value which allows for more important metrics
#such as the number of times annotated to predominate
auth_weight=1
#Same applies to the number of times a pack has been annotated
pack_weight=1
#The genre importance forces the algorithm to fetch sounds from the less annotated
#genres. As gen_importance is proportional to the importance of the loop to be annotated
#we select a negative weight, to make the loops "less irrelevant"
genre_weight=-10
authors_sounds = collect_authors(sounds_annotated,metadata)
pack_sounds = collect_packs(sounds_annotated,metadata)
genre_sounds = collect_genres(sounds_annotated)
sounds_to_rate = discard_packs(all_sound_ids,metadata)
sound_irrelevance_list = []
assigned_sounds = collect_assigned_sounds()
for sound in sounds_to_rate:
num_annotated = 0
if sounds_annotated.get(sound) != None:
num_annotated = sounds_annotated[sound]["num_annotations"]
num_author = 0
if authors_sounds.get(metadata[sound]["username"]) != None:
num_author = authors_sounds[metadata[sound]["username"]]
num_pack = 0
if pack_sounds.get(metadata[sound]["pack"]) != None:
num_pack = pack_sounds[metadata[sound]["pack"]]
num_assigned = assigned_sounds.count(sound)
gen_importance = genre_importance(genre_metadata.get(sound,[]), genre_sounds)
irrelevance = (num_annotated+num_assigned)*anno_weight + num_author*auth_weight + num_pack*pack_weight + gen_importance*genre_weight
ac_analysis_filename = metadata[sound]["preview_url"]
base_name = ac_analysis_filename[ac_analysis_filename.rfind("/"):ac_analysis_filename.find("-hq")]
ac_analysis_filename = base_name + "_analysis.json"
if os.path.exists(PATH_TO_AC_ANALYSIS + ac_analysis_filename):
sound_irrelevance_list.append((sound,irrelevance))
sound_irrelevance_sorted = sorted(sound_irrelevance_list, key=lambda x: x[1])
sound_irrelevance_ids = [lis[0] for lis in sound_irrelevance_sorted]
return sound_irrelevance_ids[0:N]