-
Notifications
You must be signed in to change notification settings - Fork 2
/
dcase2018bad.py
56 lines (42 loc) · 1.33 KB
/
dcase2018bad.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import itertools
import urllib.request
import pandas
import numpy
import dask
import dask.array
default_location = 'https://storage.googleapis.com/dcase2018-bad'
def chunk_sequence(iterable, size):
it = iter(iterable)
item = list(itertools.islice(it, size))
while item:
yield item
item = list(itertools.islice(it, size))
# returns Pandas.DataFrame for whole dataset
def load_dataset(location=None):
if location is None:
location = default_location
folders = [
'polandnfc',
'birdvox',
'chern',
'ff1010bird',
'warblr10k_public',
'warblr10k_test',
]
def load_folder(folder):
u = '/'.join((location, folder, 'files.csv'))
df = pandas.read_csv(u, dtype={'itemid': str})
assert df.itemid.dtype == 'object', df.itemid.dtype
df['folder'] = folder
return df
df = pandas.concat([load_folder(f) for f in folders])
return df
# Return URL for a single wav file
def wav_url(folder, item, location=None):
if location is None:
location = default_location
return '/'.join((location, folder, 'wav', item+'.wav'))
# Return URLs for each wav file in dataset
def wav_urls(dataset, location=None):
urls = (wav_url(*t, location=location) for t in zip(dataset.folder, dataset.itemid))
return urls