-
Notifications
You must be signed in to change notification settings - Fork 0
/
viewcounts.py
executable file
·43 lines (34 loc) · 1.2 KB
/
viewcounts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# -*- coding: utf-8 -*-
from __future__ import division, print_function
import pdb
import random
import re
import time
import urllib2
from decorators import Cached
class WikipediaViewCounts(object):
def __init__(self):
self.count = {}
self.url = 'http://stats.grok.se/en/2015'
@Cached
def get_frequency(self, title):
return self.retrieve_frequency(title)
def retrieve_frequency(self, title):
title = title.replace(' ', '_').replace('%2F', '/')
# months = [unicode(i).zfill(2) for i in range(1, 13)]
# months = [unicode(i).zfill(2) for i in [11]]
months = [u'02']
views, trials = 0, 0
data = ''
for month in months:
url = self.url + month + '/' + urllib2.quote(title.encode('utf-8'))
while not data:
try:
data = urllib2.urlopen(url).read()
except (urllib2.HTTPError, urllib2.URLError) as e:
if trials > 5:
print(title, e)
time.sleep(int(random.random() * 5))
views += int(re.findall(r'has been viewed (\d+)', data)[0])
return views
viewcount = WikipediaViewCounts()