-
Notifications
You must be signed in to change notification settings - Fork 5
/
dataset.py
executable file
·84 lines (66 loc) · 2.88 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# TODO(theis): DELETE THIS from project root!!!!
# only copied here to make previous pickled datasets load...
from interactive_spectrogram_inpainting.utils.datasets.label_encoders import (
load_label_encoders_from_file)
import pathlib
from typing import Mapping, Sequence
import pickle
from collections import namedtuple, OrderedDict
from sklearn.preprocessing import LabelEncoder
import lmdb
import torch
from torch.utils.data import Dataset
CodeRow = namedtuple('CodeRow', ['top', 'bottom', 'attributes', 'filename'])
class LMDBDataset(Dataset):
"""Dataset based on a LMDB database
Arguments:
* path, str:
The path to the directory containing the database
"""
def __init__(self, path: pathlib.Path,
classes_for_conditioning: Sequence[str] = []):
self.env = lmdb.open(
str(path),
max_readers=32,
readonly=True,
lock=False,
readahead=False,
meminit=False,
)
print("\n\nWARNING: DEPRECATED, use version in submodule utils.datasets\n\n")
self.classes_for_conditioning = classes_for_conditioning
if not self.env:
raise IOError('Cannot open lmdb dataset', path)
self.label_encoders: Mapping[str, LabelEncoder]
with self.env.begin(write=False) as txn:
self.length = int(
txn.get('length'.encode('utf-8')).decode('utf-8'))
if (self.classes_for_conditioning is None
or len(self.classes_for_conditioning) == 0):
self.label_encoders = {}
else:
try:
self.label_encoders = pickle.loads(
txn.get('label_encoders'.encode('utf-8')))
self.label_encoders = self._filter_classes_labels(
self.label_encoders)
except:
self.label_encoders = (
load_label_encoders_from_file(
path / 'label_encoders.json'))
def __len__(self):
return self.length
def _filter_classes_labels(self, class_labels: Mapping[str, any]
) -> Mapping[str, any]:
return {class_name: class_label
for class_name, class_label in class_labels.items()
if class_name in self.classes_for_conditioning}
def __getitem__(self, index):
with self.env.begin(write=False) as txn:
key = str(index).encode('utf-8')
row = pickle.loads(txn.get(key))
attributes = OrderedDict()
for class_name in self.classes_for_conditioning:
attributes[class_name] = row.attributes[class_name].view(1)
return (torch.from_numpy(row.top), torch.from_numpy(row.bottom),
attributes)