forked from KenT2/tboplayer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhtmlparsers.py
33 lines (25 loc) · 834 Bytes
/
htmlparsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from HTMLParser import HTMLParser
class LyricWikiParser(HTMLParser):
result = ""
grab = False
def __init__(self):
HTMLParser.__init__(self)
def handle_starttag(self, tag, attrs):
if tag == "div" :
for t in attrs:
if "lyricbox" in t[1]:
self.grab = True
break
def handle_startendtag(self, tag, attrs):
if self.grab and tag == "br":
self.result += "\n"
def handle_endtag(self, tag):
if self.grab and tag == "div":
self.grab = False
def handle_charref(self, name):
if self.grab:
if name.startswith('x'):
c = unichr(int(name[1:], 16))
else:
c = unichr(int(name))
self.result += c