-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlyricsscrap.py
36 lines (33 loc) · 1.18 KB
/
lyricsscrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import re
import urllib.request
import random
from time import sleep
# Find all links on an artist's AZLyrics page
originalLyrics = open('lyrics.txt', 'w')
url = "https://www.azlyrics.com/a/artist.html"
artistHtml = urllib.request.urlopen(url)
artistHtmlStr = str(artistHtml.read())
links = re.findall('href="([^"]+)"', artistHtmlStr)
songLinks = []
for x in links:
if "lyrics/artist" in x:
x = x.replace("..", "")
x = "https://www.azlyrics.com/" + x
songLinks.append(x)
for x in songLinks:
songHtml = urllib.request.urlopen(x)
songHtmlStr = str(songHtml.read())
split = songHtmlStr.split(
'content by any third-party lyrics provider is prohibited by our licensing agreement. Sorry about that. -->', 1)
split_html = split[1]
split = split_html.split('</div>', 1)
lyrics = split[0]
lyrics = lyrics.replace('<br>', '\n')
lyrics = lyrics.replace('\\', '')
lyrics = lyrics.replace('\nn', '\n')
lyrics = lyrics.replace('<i>', '')
lyrics = lyrics.replace('</i>', '')
lyrics = lyrics.replace('[Chorus]', '')
originalLyrics.write(lyrics)
sleep(random.randint(2, 10))
originalLyrics.close()