-
Notifications
You must be signed in to change notification settings - Fork 1
/
lyrics
executable file
·160 lines (140 loc) · 4.76 KB
/
lyrics
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# lyrics is a Python 3 script to retrieve lyrics from azlyrics.com
# The basic usage is
#
# lyrics [<args>] keyword...
#
# For more information on the arguments, run
#
# lyrics --help
#
# lyrics can be used to retrieve lyrics for the currently playing song
# if your audio player is capable of printing the details of the current
# song on the standard output.
#
# * Example with keyword:
#
# lyrics rebecca black friday
#
# * Example with MPD:
#
# lyrics $(mpc --format '%artist% %title%' | head -n 1)
#
# * Example with DeaDBeeF:
#
# lyrics $(deadbeef --nowplaying '%a %t' 2>/dev/null)
#
# Requires: Python 3 with BeautifulSoup >= 4.0 and Requests
#
import argparse
import os
import re
import requests
import subprocess
import sys
from bs4 import BeautifulSoup
from pydoc import pager
from unicodedata import normalize
# Use Google's "I'm Feeling Lucky" to "fuzzy" search for the right page.
SEARCH_URL = "https://www.google.com/search?hl=en&btnI&q=site:www.azlyrics.com"
REDIRECT_RE = r"href=['\"](https?://www.azlyrics.com/[^'\"]*)['\"]"
HEADERS = {"User-Agent": "Mozilla/5.0 (compatible)", "Referer": "www.google.com"}
CACHE_DIR = os.path.join(os.path.expanduser("~"), ".cache/lyrics")
if not os.path.exists(CACHE_DIR):
os.makedirs(CACHE_DIR)
EDITOR = os.environ.get("EDITOR", "vi")
class PrintError(Exception):
pass
def sanitize(string):
"""Sanitize string."""
string = normalize("NFKD", string).encode("ASCII", "ignore")
string = string.decode("ASCII")
string = string.lower()
string = re.sub(r"['\"]+", "", string)
string = re.sub(r"[([{][^\]})]*[\]})]", "", string)
string = re.sub(r"^\W+", "", string)
string = re.sub(r"\W+$", "", string)
string = re.sub(r"\W+", "+", string, flags=re.ASCII)
return string
def get_lyrics(keyword):
"""Extract lyrics from HTML using BeautifulSoup."""
keyword = sanitize(keyword)
url = "+".join([SEARCH_URL, keyword, "lyrics"])
req = requests.get(url, headers=HEADERS)
# Google seems to be using JavaScript for redirection these days.
if re.search(r"unauthorizedredirect", req.text):
new_url = re.search(REDIRECT_RE, req.text).group(1)
req = requests.get(new_url, headers=HEADERS)
# requests seems to have trouble realizing that azlyrics.com
# uses UTF-8 encoding, so specify it explicitly.
req.encoding = "utf-8"
# If lxml is available, prefer that.
if "lxml" in sys.modules:
soup = BeautifulSoup(req.text, features="lxml")
else:
soup = BeautifulSoup(req.text, features="html.parser")
div = soup.find("div", attrs={"class": "col-xs-12 col-lg-8 text-center"})
try:
artist = div.find("div", attrs={"class": "lyricsh"}).find("h2").text
artist = re.sub(r"(^\s*|\s*Lyrics\s*$)", "", artist)
title = div.find("b", recursive=False).text
title = re.sub(r'(^[\s"]*|[\s"]*$)', "", title)
except AttributeError:
raise PrintError("Error retrieving lyrics for the given keyword.\n" + req.url)
lyrics = div.find_all("div", attrs={"class": None, "id": None})
if lyrics is None:
raise PrintError("Error extracting lyrics from HTML.\n" + req.url)
lyrics = "".join([l.get_text().strip() for l in lyrics])
return artist, title, lyrics, req.url
def main():
"""Argument parsing."""
arg_parser = argparse.ArgumentParser(
prog="lyrics",
description="retrieve lyrics from azlyrics.com using keywords",
)
arg_parser.add_argument(
"-f",
"--force",
action="store_true",
default=False,
help="force download lyrics (overwrites cache)",
)
group = arg_parser.add_mutually_exclusive_group()
group.add_argument(
"-e",
"--edit",
action="store_true",
help="edit cached lyrics file using $EDITOR",
)
group.add_argument(
"-c",
"--cat",
action="store_true",
help="write to stdout instead of using pager",
)
arg_parser.add_argument("keyword", help="search keywords", nargs="+")
args = arg_parser.parse_args()
keyword = " ".join(args.keyword)
cache_file = os.path.join(CACHE_DIR, sanitize(keyword) + ".txt")
if args.edit:
subprocess.call([EDITOR, cache_file])
return
if os.path.exists(cache_file) and not args.force:
with open(cache_file, "r") as fd:
text = fd.read()
else:
try:
data = get_lyrics(keyword)
text = "{} - {}\n---\n\n{}\n\n{}".format(*data)
with open(cache_file, "w") as fd:
fd.write(text)
except PrintError as e:
sys.exit(print(e, file=sys.stderr))
if args.cat:
print(text)
else:
pager(text)
if __name__ == "__main__":
sys.exit(main())