-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathrankingsScraper.py
30 lines (26 loc) · 957 Bytes
/
rankingsScraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import urllib
import urllib.error
from bs4 import BeautifulSoup
def convertNameToBetfairName(name):
extensions = ['City', 'United', 'Hotspur', 'Albion', 'rystal', 'wich']
if 'Man' in name:
name = name.replace('chester', '')
name = name.replace('United', 'Utd')
else:
for ext in extensions:
name = name.replace(ext, '')
return name
try:
web_page = urllib.request.urlopen("http://www.bbc.co.uk/sport/football/premier-league/table")
soup = BeautifulSoup(web_page, 'lxml')
teams = [td.find('a').getText() for td in soup.findAll('td', class_='team-name')[0:20]]
out = open('rankings.csv', 'w')
headers = 'Ranking,Team'
out.write(headers+'\n')
for i in range(0, len(teams)):
out.write(str(i+1)+','+convertNameToBetfairName(teams[i]).strip()+'\n')
out.close()
except urllib.error.HTTPError:
print("HTTPERROR!")
except urllib.error.URLError:
print("URLERROR!")