-
Notifications
You must be signed in to change notification settings - Fork 5
/
main.py
82 lines (65 loc) · 2.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
#packages to install:
# python3 -m pip install soupsieve
# python3 -m pip install beautifulsoup4
# python3 -m pip install bs4
# python3 -m pip install dnspython
# python3 -m pip install pymongo
import requests #Lib to make web requests
from bs4 import BeautifulSoup
from datetime import datetime
#Read in url-list
try:
with open('url_list1.txt') as url_file:
url_data = []
url_data = [line.rstrip() for line in url_file]
#Loop through url-list
for i in url_data:
print(i)
url = i
#extract the palylist title (pos in url after 'de.'')
station = url.split('de.')[1]
print(station)
#create empty data list
data = []
#Read in current playlist if existing
try:
with open('playlists/' + station + '.txt') as playlist_file:
data = [line.rstrip() for line in playlist_file]
except IOError:
print('error: Playlist File does not exist')
#print(data)
print(len(data)) #print number of Songs in playlist
#download content from the web url
try:
html = requests.get(url).text
soup = BeautifulSoup(html, "html.parser")
#scrape the online playlist
table = soup.find("table", attrs={"class": "tablelist-schedule"})
table_data = table.find_all("a")
for link in table_data:
#print(link.get("href"))
#print(format(link.text))
data.append(format(link.text)) #append online playlist to existing data
#remove duplicate values (convert to dictionary, because dict cannot have duplicate values and convert back to list)
data = list(dict.fromkeys(data))
data = sorted(data) #sort alphabetically
#print(data)
print(len(data)) #print number of Songs in playlist
#Save Data to playlist.txt
playlist_file = open('playlists/' + station + '.txt','w')
s1='\n'.join(data) #join the list to one string and wirte this string. Saves writing the list line per line
playlist_file.write(s1)
playlist_file.close()
#Save statistic Data to stat.txt
now = datetime.now() # current date and time
date_time = now.strftime("%m/%d/%Y")
playlist_file = open('stat.txt','a')
s1 = date_time + '\t' + station + '\t' + str(len(data)) + '\n'
playlist_file.write(s1)
playlist_file.close()
except:
print('error: No online data found')
except IOError:
print('error: URL-File does not exist')
# EOF