-
Notifications
You must be signed in to change notification settings - Fork 0
/
timeseries.py
84 lines (73 loc) · 2.75 KB
/
timeseries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import statistics
import datetime
import manage_articles as mng
def getFirstDateFromIsoWeek(p_year,p_week):
'''
Returns the first day as full date of a given calendar week ``p_week`` in year ``p_year``.
'''
firstdayofweek = datetime.datetime.strptime(f'{p_year}-W{int(p_week )- 1}-1', "%Y-W%W-%w").date()
return firstdayofweek
class Timepoint:
'''
Class to store and access a single timepoint of a day, including the calendar week (gets used in class ``Timeseries``).
'''
def __init__(self,countPerWeek):
self.isoweek = countPerWeek[0]
self.year = countPerWeek[0][0]
self.date = getFirstDateFromIsoWeek(countPerWeek[0][0],countPerWeek[0][1])
self.count = countPerWeek[1]
class Timeseries:
'''
Class to store and access a list of daily wikipedia counts (obtained from ``get_counts``) using class ``Timepoint``.
'''
def getCounts(self):
return [x.count for x in self.timepoints]
def getDates(self):
return [x.date for x in self.timepoints]
def getIsoWeeks(self):
return [x.isoweek for x in self.timepoints]
def __init__(self,countsPerWeek):
self.timepoints = []
for i in range(len(countsPerWeek)):
self.timepoints.append(Timepoint(countsPerWeek[i]))
def getStartDate(self):
return sorted(self.getDates())[0]
def getEndDate(self):
return sorted(self.getDates())[-1]
def getOutliers(self):
return statistics.findOutliers(self.getCounts())
def getVariance(self):
return statistics.getVariance(self.getCounts())
def parseWikipediaCounts(wiki):
'''
Converts a list of daily wikipedia counts (obtained from ``get_counts``) to a ``Timeseries`` object.
'''
d = {}
for i in range(len(wiki[0])):
year = wiki[0][i].isocalendar()[0]
week = wiki[0][i].isocalendar()[1]
count = wiki[1][i]
if (year,week) not in d:
d[(year,week)] = count
else:
d[(year,week)] = d[(year,week)]+count
l = []
for k in d:
l.append((k,d[k]))
return Timeseries(l)
def alignTimeseries(seriesA, seriesB):
'''
Removes timepoints of format ((year,week),count) from Timeseries ``seriesA`` and ``seriesB`` if timepoint does not occur in both series.
'''
timepointsBoth = [list(set(seriesA.getIsoWeeks()) & set(seriesB.getIsoWeeks()))][0]
delA = [x for x in seriesA.timepoints if x.isoweek not in timepointsBoth]
while(len(delA)>0):
x = delA[0]
delA.remove(x)
seriesA.timepoints.remove(x)
delB = [x for x in seriesB.timepoints if x.isoweek not in timepointsBoth]
while(len(delB)>0):
x = delB[0]
delB.remove(x)
seriesB.timepoints.remove(x)
return seriesA,seriesB