-
Notifications
You must be signed in to change notification settings - Fork 1
/
parser.py
179 lines (147 loc) · 3.69 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from collections import deque
class Parser:
'''
Abstract parser for some filetypes with buffering.
'''
def __init__(self, filepath, buffer_size):
'''
Parser(filepath, buffer_size)
Open the parser of file with name filepath and with the given buffer size.
'''
self._buffer = deque(maxlen = buffer_size)
self._buffer_size = buffer_size
self.open(filepath)
def __iter__(self):
'''
iter(P) -> return itself
'''
return self
def __next__(self):
'''
next(P) -> data or None
'''
# Try to get data.
data = self.read()
# If no data left in both buffer and input file, stop iteration.
if data:
return data
else:
raise StopIteration
def _parse_data(self):
'''
P._parse_data() -> one data unit or None if no data left in the file.
Parse the input file and return one data unit or None if no data left.
'''
raise NotImplementedError
def _parse_to_buffer(self):
'''
P._parse_to_buffer() -> None
Parse data from the input file to the buffer or raise BufferError if no data left in the file.
'''
for i in range(self._buffer_size):
data = self._parse_data()
if data:
self._buffer.append(data)
else:
break;
def read(self):
'''
P.read() -> one data unit or None if no data left in file.
Get one data record or None if no data left.
'''
# Try to get data from the buffer.
# If no data left in buffer, try to parse new data to the buffer.
if len(self._buffer) == 0:
self._parse_to_buffer()
# If no data left in file, return void.
if len(self._buffer) == 0:
return None
# Return the oldest data from file.
return self._buffer.popleft()
def open(self, filepath):
'''
P.open(filepath)
Open the input file for parsing.
'''
self._file = open(filepath)
def is_eof(self):
'''
Check if the parser has reached the end of the inpiut file.
'''
if not self._buffer:
self._parse_to_buffer()
if not self._buffer:
return False
return True
def data(self):
'''
'''
tmp = self._buffer[:]
self._buffer.clear()
self._parse_to_buffer()
return tmp
def close(self):
'''
P.close()
Close the input file.
'''
self._file.close()
self._buffer.clear()
class PairendParser:
'''
Abstract parser for pair of files based with underlying Parser objects.
'''
def __init__(self, filepath1, filepath2, buffer_size):
'''
P(filepath1, filepath2, buffer_size)
Open the given two files with the given buffer size for underlying Parser objects.
'''
self._buffer_size = buffer_size
self.open(filepath1, filepath2)
def __iter__(self):
'''
iter(P) -> return itself
'''
return self
def __next__(self):
'''
next(P) -> data or None
'''
# Try to get data.
data = self.read()
# If no data left, stop iteration.
if data:
return data
else:
raise StopIteration
def _open_parser(self, filepath, buffer_size):
'''
P._open_parser(filepath, buffer_size) -> Parser
Open and return Parser to the given file with the given buffer_size
'''
raise NotImplementedError
def read(self):
'''
P.read() -> (left_data, right_data)
Get the pair of data where is left_data from the first file and right_data from the second file.
'''
raise NotImplementedError
def open(self, filepath1, filepath2):
'''
P.open(self, filepath1, filepath2)
Open the given two files.
'''
self._parser_alpha = self._open_parser(filepath1, self._buffer_size)
self._parser_beta = self._open_parser(filepath2, self._buffer_size)
def close(self):
'''
P.close()
Close writers.
'''
self._parser_alpha.close()
self._parser_beta.close()
def is_eof(self):
'''
Check if the parser has reached the end of the inpiut file.
'''
return self._parser_alpha.is_eof() and self._parser_beta.is_eof()