-
Notifications
You must be signed in to change notification settings - Fork 3
/
utils.py
107 lines (70 loc) · 2.59 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# -*- coding: utf-8 -*-
import gzip
import json
import os
import pickle
import joblib
def make_dirs(dirname):
if dirname:
os.makedirs(dirname, exist_ok=True)
def read_file(filename):
with open(filename, mode="rb") as f:
return f.read()
def write_file(data, filename):
make_dirs(os.path.dirname(filename))
with open(filename, mode="wb") as f:
f.write(data)
def read_text(filename, encoding="UTF-8"):
with open(filename, mode="r", encoding=encoding) as f:
return f.read()
def write_text(text, filename, encoding="UTF-8"):
make_dirs(os.path.dirname(filename))
with open(filename, mode="w", encoding=encoding) as f:
f.write(text)
def read_lines(filename, encoding="UTF-8"):
with open(filename, mode="r", encoding=encoding) as f:
for line in f:
yield line.rstrip("\r\n\v")
def write_lines(lines, filename, linesep="\n", encoding="UTF-8"):
make_dirs(os.path.dirname(filename))
with open(filename, mode="w", encoding=encoding) as f:
for line in lines:
f.write(line)
f.write(linesep)
f.flush()
def read_json(filename, encoding="UTF-8"):
with open(filename, mode="r", encoding=encoding) as f:
return json.load(f)
def write_json(obj, filename, indent=None, encoding="UTF-8"):
make_dirs(os.path.dirname(filename))
with open(filename, mode="w", encoding=encoding) as f:
json.dump(obj, fp=f, ensure_ascii=False, indent=indent)
def read_jsonlines(filename, encoding="UTF-8"):
for json_line in read_lines(filename, encoding=encoding):
yield json.loads(json_line)
def write_jsonlines(objs, filename, linesep="\n", encoding="UTF-8"):
json_lines = (json.dumps(obj, ensure_ascii=False) for obj in objs)
write_lines(json_lines, filename=filename, linesep=linesep, encoding=encoding)
def deserialize_object(filename, mmap_mode=None):
return joblib.load(filename, mmap_mode=mmap_mode)
def serialize_object(obj, filename, compress=3, protocol=None, cache_size=None):
make_dirs(os.path.dirname(filename))
joblib.dump(
obj,
filename=filename,
compress=compress,
protocol=protocol,
cache_size=cache_size,
)
def deserialize_objects(filename):
with gzip.open(filename, mode="rb") as f:
while True:
try:
yield pickle.load(f)
except EOFError:
break
def serialize_objects(objs, filename):
make_dirs(os.path.dirname(filename))
with gzip.open(filename, mode="wb") as f:
for obj in objs:
pickle.dump(obj, f)