-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathutils.py
86 lines (71 loc) · 2.56 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import dataclasses
import logging
import math
import os
import io
import sys
import time
import json
from typing import Optional, Sequence, Union, Any, Mapping, Iterable, Union, List, Callable
import openai
import tqdm
from openai import openai_object
import copy
def _make_w_io_base(f, mode: str):
if not isinstance(f, io.IOBase):
f_dirname = os.path.dirname(f)
if f_dirname != "":
os.makedirs(f_dirname, exist_ok=True)
f = open(f, mode=mode)
return f
def _make_r_io_base(f, mode: str):
if not isinstance(f, io.IOBase):
f = open(f, mode=mode)
return f
def jdump(obj, f, mode="w", indent=4, default=str):
"""Dump a str or dictionary to a file in json format.
Args:
obj: An object to be written.
f: A string path to the location on disk.
mode: Mode for opening the file.
indent: Indent for storing json dictionaries.
default: A function to handle non-serializable entries; defaults to `str`.
"""
f = _make_w_io_base(f, mode)
if isinstance(obj, (dict, list)):
json.dump(obj, f, indent=indent, default=default)
elif isinstance(obj, str):
f.write(obj)
else:
raise ValueError(f"Unexpected type: {type(obj)}")
f.close()
def jload(f, mode="r"):
"""Load a .json file into a dictionary."""
f = _make_r_io_base(f, mode)
jdict = json.load(f)
f.close()
return jdict
### jsonl utils
def read_jsonlines(filename: str) -> Iterable[Mapping[str, Any]]:
"""Yields an iterable of Python dicts after reading jsonlines from the input file."""
file_size = os.path.getsize(filename)
with open(filename) as fp:
for line in tqdm.tqdm(fp.readlines(), desc=f"Reading JSON lines from {filename}", unit="lines"):
try:
example = json.loads(line)
yield example
except json.JSONDecodeError as ex:
logging.error(f'Input text: "{line}"')
logging.error(ex.args)
raise ex
def load_jsonlines(filename: str) -> List[Mapping[str, Any]]:
"""Returns a list of Python dicts after reading jsonlines from the input file."""
return list(read_jsonlines(filename))
def write_jsonlines(
objs: Iterable[Mapping[str, Any]], filename: str, to_dict: Callable = lambda x: x
):
"""Writes a list of Python Mappings as jsonlines at the input file."""
with open(filename, "w") as fp:
for obj in tqdm.tqdm(objs, desc=f"Writing JSON lines at {filename}"):
fp.write(json.dumps(to_dict(obj)))
fp.write("\n")