-
Notifications
You must be signed in to change notification settings - Fork 4
/
Utils.py
220 lines (161 loc) · 5.8 KB
/
Utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import matplotlib
import math
import collections
import re
import sys
from Line import Line
from Bar import Bar
def getGoldenRatioDimensions(width):
goldenRatio = (math.sqrt(5) - 1.0) / 2.0
return (width, goldenRatio * width)
def getXYValsFromFile(filename, regex, postFunction=None,
autofillXValues=False):
fp = open(filename)
regex = re.compile(regex)
matches = []
i = 0
for line in fp:
line = line.strip()
match = regex.match(line)
if match is not None:
matchGroups = match.groups()
if postFunction is not None:
matchGroups = postFunction(matchGroups)
else:
matchGroups = [float(x) for x in matchGroups]
if len(matchGroups) < 2 or autofillXValues:
matchGroups.insert(0, i)
matches.append(matchGroups)
i += 1
fp.close()
matches.sort()
xValues = []
yValues = []
for matchGroups in matches:
numMatchGroups = len(matchGroups)
if len(yValues) == 0:
yValues = [[] for i in xrange(numMatchGroups - 1)]
xValues.append(matchGroups[0])
for i in xrange(1, numMatchGroups):
yValues[i-1].append(matchGroups[i])
return [xValues, yValues]
def getLinesFromFile(filename, regex, postFunction=None, autofillXValues=False):
"""
Turn a regularly-structured file into a collection of
:class:`boomslang.Line.Line` objects.
Parses each line in `filename` using the regular expression `regex`. By
default, the first matching group from the regular expression gives the
x-axis value for a set of points and all subsequent matching groups give
the y-axis values for each line. If `postFunction` is not None, it is a
function that is applied to the matching groups before they are inserted
into the lines. If `autofillXValues` is True, all matching groups are
treated as y-axis values for lines and the x-axis value is the line number,
indexed from 0.
Returns a list of :class:`boomslang.Line.Line` objects.
**Example:** Suppose I had a file `blah.txt` that looked like this::
1980 - 1, 2, 3
1981 - 4, 5, 6
1982 - 7, 8, 9
The snippet below shows the result of running :py:func:`boomslang.Utils.getLinesFromFile` on `blah.txt`:
>>> lines = boomslang.Utils.getLinesFromFile("blah.txt", "(\d+) - (\d+), (\d+), (\d+)")
>>> len(lines)
3
>>> lines[0].xValues
[1980, 1981, 1982]
>>> lines[1].xValues
[1980, 1981, 1982]
>>> lines[2].xValues
[1980, 1981, 1982]
>>> lines[0].yValues
[1, 4, 7]
>>> lines[1].yValues
[2, 5, 8]
>>> lines[1].yValues
[3, 6, 9]
"""
(xValues, yValues) = getXYValsFromFile(filename, regex, postFunction,
autofillXValues)
lines = []
for i in xrange(len(yValues)):
line = Line()
line.xValues = xValues[:]
line.yValues = yValues[i][:]
lines.append(line)
return lines
def getBarsFromFile(filename, regex, postFunction=None, autofillXValues=False):
"""
Turns a regularly-structured file into a collection of
:class:`boomslang.Bar.Bar` objects.
For more details on arguments, see :py:func:`getLinesFromFile`.
Returns a list of :class:`boomslang.Bar.Bar` objects.
"""
(xValues, yValues) = getXYValsFromFile(filename, regex, postFunction,
autofillXValues)
bars = []
for i in xrange(len(yValues)):
bar = Bar()
bar.xValues = xValues[:]
bar.yValues = yValues[i][:]
bars.append(bar)
return bars
def cdf(values):
"""
Returns a :class:`boomslang.Line.Line` representing the CDF of the list of
values given in `values`.
"""
line = Line()
cdfValues = values[:]
cdfValues.sort()
count = float(len(cdfValues))
line.xValues = cdfValues
line.yValues = [float(x) / count for x in xrange(1, int(count) + 1)]
assert(count == len(line.yValues))
return line
def getCDF(values):
return cdf(values)
def histogram(values, binSize):
"""
Returns a :class:`boomslang.Line.Line` representing a histogram of the list
of values given in `values` with bin size `binSize`.
"""
line = Line()
line.stepFunction('post')
bins = collections.defaultdict(int)
maxBin = 0
for value in values:
currentBin = value / binSize
bins[currentBin] += 1
maxBin = max(maxBin, currentBin)
for currentBin, binCount in bins.items():
nextBin = currentBin + binSize
if nextBin not in bins:
bins[nextBin] = 0
for currentBin in sorted(bins.keys()):
line.xValues.append(currentBin * binSize)
line.yValues.append(bins[currentBin])
return line
def _check_min_matplotlib_version(*min_version_pieces):
def version_piece_to_int(piece):
if piece == 'x':
return 0
else:
return int(piece)
def trimrc(ver):
rc_match = re.match("(.*?)rc[0-9]*", ver)
if rc_match is not None:
return rc_match.group(1)
else:
return ver
version_pieces = [version_piece_to_int(x)
for x in trimrc(matplotlib.__version__).split('.')]
return _check_min_version(version_pieces, min_version_pieces)
def _check_min_version(version_pieces, min_version_pieces):
for i, min_version_piece in enumerate(min_version_pieces):
version_piece = version_pieces[i]
if (version_piece > min_version_piece or
(i == len(min_version_pieces) - 1 and
version_piece >= min_version_piece)):
return True
elif version_piece < min_version_piece:
return False
return False