-
Notifications
You must be signed in to change notification settings - Fork 6
/
data_downloader.py
92 lines (74 loc) · 3.2 KB
/
data_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import cv2
import pickle
from glob import glob
import os
from tensorflow.keras.utils import get_file
class Div2k:
"""
Class that can download the specified version of DIV2k dataset and transform it to a pickle file if required.
Parameters
----------
path : str
The path to the folder containing the dataset on your disk (Required if you want to pickle the data)
subsets : list, tuple
download training or validation or both.
Default=["train"]
downgrade : str
the type of downgrade on the low resolution images, can be either "bicubic" or "unknown"
Defalut="bicubic"
scale: int
the downscale of the low resolution images.
Default=4
Methods
-------
download()
Fetch and download the desired split of DIV2K dataset.
pickle()
Read the dataset and sava each subset in pickle file(s).
"""
def __init__(self, path: str, subsets: list = None, downgrade: str = "bicubic", scale: int = 4):
self._DL_URL = "https://data.vision.ee.ethz.ch/cvl/DIV2K/"
self.path = path
self.subsets = subsets if subsets else ["train"]
self.downgrade = downgrade
self.scale = scale
self.data_dirs = {f"{subset}_hr": f"{self.path}/div2k/DIV2K_{subset}_HR" for subset in self.subsets}
self.data_dirs.update({f"{subset}_lr": f"{self.path}/div2k/DIV2K_{subset}_LR_{downgrade}/X{4}" for subset in self.subsets})
def download(self):
"""
Fetch and download the desired split of DIV2K dataset.
"""
for split in self.subsets:
HR_DATA = f"DIV2K_{split}_HR"
LR_DATA = f"DIV2K_{split}_LR_{self.downgrade}_X{self.scale}"
for data in [HR_DATA, LR_DATA]:
path_to_downloaded_file = get_file(fname = f"{data}.zip",
cache_dir = self.path,
cache_subdir = self.path + "/div2k",
origin = f"{self._DL_URL}{data}.zip",
extract = True,
archive_format = 'zip')
#Remove the zip file after extraction to save disk space:
os.remove(path_to_downloaded_file)
def pickle(self):
"""
Read the dataset and sava each subset in pickle file(s).
"""
for subset_name, subset_path in self.data_dirs.items():
i = 0 #Images counter
images = list()
for image_path in sorted(glob(f"{subset_path}/*.png")):
i += 1
#Read the image:
images.append(cv2.imread(image_path))
#Remove the image from disk to save space:
os.remove(image_path)
#Save every 100 images in a pickle file:
if i % 100 == 0:
with open(f"{subset_path}/{subset_name}_{i//100}.p","wb") as pickle_file:
pickle.dump(images,pickle_file)
images = list()
if __name__ == '__main__':
PATH = r"./datasets"
div2k = Div2k(PATH, ["train", "valid"])
div2k.download()