-
Notifications
You must be signed in to change notification settings - Fork 0
/
imagenet_download.py
55 lines (46 loc) · 1.47 KB
/
imagenet_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import subprocess as sp
from progress.bar import IncrementalBar as Bar
def countlines(fname):
p = sp.Popen(['wc', '-l', fname], stdout=sp.PIPE, stderr=sp.PIPE)
result, err = p.communicate()
if p.returncode != 0:
raise IOError(err)
return int(result.strip().split()[0])
synsets = open('synsets.txt', 'r')
wnids = []
labels = []
for line in synsets:
w, l = line.split()
wnids.append(w)
labels.append(l)
sp.call(['mkdir','-p','imagenet'])
sp.call(['mkdir','-p','imagenet/images'])
sp.call(['mkdir','-p','imagenet/urls'])
for w in wnids:
sp.call(['mkdir','-p','imagenet/images/' + w])
print()
bar = Bar('Fetching URL lists...', max=len(wnids))
for i in range(len(wnids)):
w = wnids[i]
l = labels[i]
sp.call(['wget', '-O', 'imagenet/urls/{}.txt'.format(w), 'http://www.image-net.org/api/text/imagenet.synset.geturls?wnid={}'.format(w)], stdout=sp.DEVNULL, stderr=sp.DEVNULL)
bar.next()
print()
timeout = '5'
tries = '3'
for w in wnids:
print(w)
print()
for w in wnids:
fname = 'imagenet/urls/{}.txt'.format(w)
urls = open(fname, 'r')
bar = Bar('Downloading images for label {}'.format(w), max=countlines(fname))
for url in urls:
u = url.strip()
# print(w, type(w), u, type(u))
sp.call(['wget', '-T', timeout, '-t', tries, '--directory-prefix', 'imagenet/images/{}'.format(w), u], stdout=sp.DEVNULL, stderr=sp.DEVNULL)
bar.next()
print()
urls.close()
print('completed')