Skip to content

Commit 8d3fd8f

Browse files
authored
Fix issue (#9) and Improve performance (#10)
* remove dependencies & add node-html-parser * use node https builtin * improve scrape performance * eslint allow explicit any * adjust logic with multipe sources * prettier * remove .npmignore * update scripts * bump package version to v2.4.0 * fix typo
1 parent 3b14860 commit 8d3fd8f

File tree

15 files changed

+235
-547
lines changed

15 files changed

+235
-547
lines changed

.eslintrc.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
"comma-dangle": ["error", "always-multiline"],
5252
"no-trailing-spaces": "error",
5353
"prettier/prettier": "off",
54-
"@typescript-eslint/no-explicit-any": "warn",
54+
"@typescript-eslint/no-explicit-any": "off",
5555
"@typescript-eslint/no-unused-vars": [
5656
"error",
5757
{ "argsIgnorePattern": "^_" }

.npmignore

Whitespace-only changes.

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
<!-- URLs -->
2+
23
[repo]: https://github.com/vookav2/songlyrics
34
[issues]: https://github.com/vookav2/songlyrics/issues
45
[nodejs]: https://nodejs.org/
@@ -125,7 +126,6 @@ Should you have any questions or concerns please contact me directly via [email]
125126

126127
## License
127128

128-
Distributed under the [MIT](https://github.com/vookav2/searchmusic/blob/main/LICENSE) License.
129-
130-
([⬆ back to top](#songlyrics))
129+
Distributed under the [MIT](https://github.com/vookav2/songlyrics/blob/main/LICENSE) License.
131130

131+
([⬆ back to top](#songlyrics))

package.json

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
{
22
"name": "songlyrics",
3-
"version": "2.3.3",
3+
"version": "2.4.0",
44
"description": "Find a lyrics just enter the song title",
55
"keywords": [
66
"lyrics",
77
"songlyrics",
88
"unofficial"
99
],
1010
"scripts": {
11-
"ts:watch": "ts-node-dev --respawn --transpile-only --poll ./src/index.ts",
12-
"ts:build": "rimraf ./build && tsc",
11+
"dev": "ts-node-dev --respawn --cls --transpile-only --poll ./src/index.ts",
1312
"build": "rimraf ./build && tsc",
1413
"pretty": "prettier --write .",
1514
"lint": "eslint . --ext .ts",
16-
"prepare": "husky install && yarn build"
15+
"prepare": "husky install"
1716
},
1817
"main": "./build/index.js",
1918
"types": "./build/index.d.ts",
@@ -27,10 +26,7 @@
2726
"author": "mr687 <davinomoehdanino@gmail.com>",
2827
"license": "MIT",
2928
"dependencies": {
30-
"cheerio": "^1.0.0-rc.11",
31-
"duck-duck-scrape": "^2.2.1",
32-
"got": "^11.8.2",
33-
"user-agents": "^1.0.793"
29+
"node-html-parser": "^5.3.3"
3430
},
3531
"devDependencies": {
3632
"@types/node": "^16.10.2",

src/index.ts

Lines changed: 2 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,4 @@
1-
/* eslint-disable no-await-in-loop */
2-
/* eslint-disable complexity */
3-
4-
import { request, webSearch } from './lib'
5-
6-
import { format } from 'util'
7-
import sources from './sources'
8-
9-
const cleanTitleRegexp = /\s(-.+|\[.+\]|\(.+\))/g
10-
const cleanTitle = (title: string) => title.replace(cleanTitleRegexp, '').trim()
11-
const lyricsNotFound = () => {
12-
throw new Error('No lyrics found!')
13-
}
14-
15-
export type TLyrics = {
16-
lyrics: string
17-
source: {
18-
name: string
19-
url: string
20-
link: string
21-
}
22-
}
23-
24-
const songlyrics = async (title: string): Promise<TLyrics | undefined> => {
25-
const queryFormat = '%s site:%s'
26-
const cleanedTitle = cleanTitle(title).toLowerCase()
27-
const query = (sourceUrl: string) =>
28-
format(queryFormat, cleanedTitle, sourceUrl)
29-
30-
for (const source of sources) {
31-
const sourceUrl = `${source.hostname}${source.path}`
32-
const { noResults, results } = await webSearch(query(sourceUrl))
33-
if (noResults) {
34-
lyricsNotFound()
35-
}
36-
37-
const [result] = results
38-
if (result.hostname !== source.hostname || !result.url) {
39-
lyricsNotFound()
40-
}
41-
42-
const response = await request(result.url)
43-
const lyrics = await source.parse(response.body)
44-
45-
return {
46-
lyrics: lyrics,
47-
source: {
48-
name: result.title.replace(/\s(\|.+)/g, ''),
49-
url: `https://${result.hostname}`,
50-
link: result.url,
51-
},
52-
}
53-
}
54-
55-
lyricsNotFound()
56-
}
1+
import { TLyrics, songlyrics } from './lib'
572

3+
export type { TLyrics }
584
export default songlyrics

src/lib/duckduckgo.ts

Lines changed: 62 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,63 @@
1-
import { SafeSearchType, search } from 'duck-duck-scrape'
2-
3-
export const webSearch = (query: string) =>
4-
search(query, {
5-
locale: 'en-us', // The locale(?) of the search
6-
region: 'wt-wt', // All regions
7-
marketRegion: 'US', // The market region(?) of the search
8-
safeSearch: SafeSearchType.OFF,
1+
import { makeRequest } from './request'
2+
3+
const VQD_REGEX = /vqd='(\d+-\d+-\d+)'/
4+
const SEARCH_REGEX =
5+
/DDG\.pageLayout\.load\('d',(\[.+\])\);DDG\.duckbar\.load\('images'/
6+
7+
const queryString = (query: Record<string, string>) =>
8+
new URLSearchParams(query).toString()
9+
10+
const getVQD = async (query: string) => {
11+
try {
12+
const vqdRequestUrl = new URL(
13+
`https://duckduckgo.com/?${queryString({
14+
q: query,
15+
ia: 'web',
16+
})}`,
17+
)
18+
const html = await makeRequest(vqdRequestUrl)
19+
return VQD_REGEX.exec(html)?.at(1)
20+
} catch (err) {
21+
throw new Error(`Failed to get the VQD for query "${query}".`)
22+
}
23+
}
24+
25+
// eslint-disable-next-line complexity
26+
export const webSearch = async (
27+
query: string,
28+
): Promise<{ c: string; t: string; i: string }[] | undefined> => {
29+
const vqd = await getVQD(query)
30+
if (!vqd) {
31+
throw new Error(`Failed to get the VQD for query "${query}".`)
32+
}
33+
const requestParams = new URLSearchParams({
34+
q: query,
35+
vqd,
36+
kl: 'wt-wt',
37+
l: 'en-us',
38+
dl: 'en',
39+
ct: 'US',
40+
sp: '1',
41+
df: 'a',
42+
ss_mkt: 'us',
43+
s: '0',
44+
bpa: '1',
45+
biaexp: 'b',
46+
msvrtexp: 'b',
47+
nadse: 'b',
48+
eclsexp: 'b',
49+
tjsexp: 'b',
950
})
51+
const requestUrl = new URL(
52+
`https://links.duckduckgo.com/d.js?${requestParams.toString()}`,
53+
)
54+
const responseString = await makeRequest(requestUrl)
55+
if (/DDG.deep.is506/.test(responseString)) {
56+
throw new Error('A server error occurred!')
57+
}
58+
const raw = SEARCH_REGEX.exec(responseString)?.at(1)?.replace(/\t/g, ' ')
59+
if (!raw) {
60+
throw new Error('No results found!')
61+
}
62+
return JSON.parse(raw) as any[]
63+
}

src/lib/index.ts

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
import { makeHttpRequest } from './request'
2-
import userAgent from 'user-agents'
3-
4-
const randomUserAgent = () =>
5-
new userAgent({ deviceCategory: 'desktop' }).random().toString()
6-
7-
export const request = makeHttpRequest({ randomUserAgent })
8-
export { webSearch } from './duckduckgo'
1+
export * from './duckduckgo'
2+
export * from './request'
3+
export * from './songlyrics'

src/lib/request.ts

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
1-
import got, { OptionsOfTextResponseBody } from 'got'
1+
import { RequestOptions, request } from 'https'
22

3-
export const makeHttpRequest = ({
4-
randomUserAgent,
5-
}: {
6-
randomUserAgent: () => string
7-
}) => {
8-
const request = (url: string, options?: OptionsOfTextResponseBody) =>
9-
got(url, {
10-
headers: {
11-
'User-Agent': randomUserAgent(),
12-
},
13-
...options,
3+
export const makeRequest = (url: URL, method = 'GET') => {
4+
const options: RequestOptions = {
5+
hostname: url.hostname,
6+
port: url.protocol.startsWith('https') ? 443 : 80,
7+
path: `${url.pathname}${url.search}`,
8+
method: method,
9+
}
10+
return new Promise<string>((resolve, reject) => {
11+
const req = request(options, res => {
12+
if (res.statusCode && (res.statusCode < 200 || res.statusCode > 299)) {
13+
return reject(
14+
new Error(`Failed to get the response: ${res.statusCode}`),
15+
)
16+
}
17+
const data: any[] = []
18+
res.on('data', chunk => {
19+
data.push(chunk)
20+
})
21+
res.on('end', () => resolve(Buffer.concat(data).toString('utf8')))
1422
})
15-
return request
23+
req.on('error', reject)
24+
req.end()
25+
})
1626
}

src/lib/songlyrics.ts

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
import { makeRequest, webSearch } from '../lib'
2+
3+
import htmlParser from 'node-html-parser'
4+
import { makeSources } from '../sources'
5+
6+
const cleanTitleRegexp = /\s(-.+|\[.+\]|\(.+\))/g
7+
const cleanTitle = (title: string) => title.replace(cleanTitleRegexp, '').trim()
8+
export type TLyrics = {
9+
lyrics: string
10+
source: {
11+
name: string
12+
url: string
13+
link: string
14+
}
15+
}
16+
export const songlyrics = async (
17+
title: string,
18+
): Promise<TLyrics | undefined> => {
19+
const clean = cleanTitle(title).toLowerCase()
20+
const query = `${clean} intitle:lyrics`
21+
const ddgResults = await webSearch(query)
22+
const ddgResult = ddgResults?.shift()
23+
const sourceName = ddgResult?.i.replace(/(www.|.com)/g, '').toLowerCase()
24+
25+
const sources = makeSources()
26+
if (ddgResult && sourceName && sources.has(sourceName)) {
27+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
28+
const source = sources.get(sourceName)!
29+
const html = await makeRequest(new URL(ddgResult.c))
30+
const lyrics = await source.parse(htmlParser(html))
31+
return {
32+
lyrics,
33+
source: {
34+
name: ddgResult.t.replace(/\|.+/g, '').trim(),
35+
url: ddgResult.i,
36+
link: ddgResult.c,
37+
},
38+
}
39+
}
40+
return
41+
}

src/sources/azlyrics.ts

Lines changed: 7 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,11 @@
1-
import { TSource, useSpacingLyrics } from '.'
1+
import { HTMLElement } from 'node-html-parser'
22

3-
import { load } from 'cheerio'
4-
5-
const azlyrics: TSource = {
3+
export const azlyrics = {
64
name: 'AZLyrics',
7-
hostname: 'www.azlyrics.com',
8-
path: '/lyrics',
9-
parse: (html: string): Promise<string> => {
10-
const $ = load(html)
11-
12-
const content = $('div.main-page div div.col-xs-12 > div:nth(4)')
13-
.text()
14-
.trim()
15-
16-
return Promise.resolve(useSpacingLyrics(content))
5+
parse: (html: HTMLElement): Promise<string> => {
6+
const content = html
7+
.querySelector('div.ringtone')
8+
?.nextElementSibling.nextElementSibling.nextElementSibling.nextElementSibling.textContent.trim()
9+
return Promise.resolve(`${content}`)
1710
},
1811
}
19-
20-
export default azlyrics

src/sources/genius.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import { HTMLElement } from 'node-html-parser'
2+
3+
export const genius = {
4+
name: 'Genius',
5+
parse: (html: HTMLElement): Promise<string> => {
6+
const content = html
7+
.querySelectorAll('div[data-lyrics-container=true]')
8+
.map(x => x.structuredText)
9+
.join('')
10+
.replace(/\[.+\]/g, '')
11+
.trim()
12+
13+
return Promise.resolve(content)
14+
},
15+
}

src/sources/index.ts

Lines changed: 15 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,19 @@
1-
import azlyrics from './azlyrics'
2-
import lyrics from './lyrics'
3-
import musixmatch from './musixmatch'
1+
import { HTMLElement } from 'node-html-parser'
2+
import { azlyrics } from './azlyrics'
3+
import { genius } from './genius'
4+
import { lyrics } from './lyrics'
5+
import { musixmatch } from './musixmatch'
46

5-
const chunkArray = <T>(arr: T[], size: number): T[][] => {
6-
const chunk: T[][] = []
7-
let i = 0
8-
while (i < arr.length) {
9-
chunk.push(arr.slice(i, (i += size)))
10-
}
11-
return chunk
12-
}
13-
14-
export const useSpacingLyrics = (lyrics: string): string => {
15-
if (lyrics.includes('\n\n')) {
16-
return lyrics
17-
}
18-
const splitLyrics = lyrics.split('\n')
19-
const chunkLyrics = chunkArray(splitLyrics, 4)
20-
return chunkLyrics.map(x => x.join('\n')).join('\n\n')
21-
}
22-
23-
export type TSource = {
7+
type Source = {
248
name: string
25-
hostname: string
26-
path: string
27-
parse: (html: string) => Promise<string>
9+
parse: (html: HTMLElement) => Promise<string>
2810
}
11+
export const makeSources = () => {
12+
const sources: Map<string, Source> = new Map()
13+
sources.set('genius', genius)
14+
sources.set('azlyrics', azlyrics)
15+
sources.set('musixmatch', musixmatch)
16+
sources.set('lyrics', lyrics)
2917

30-
const sources: TSource[] = [musixmatch, azlyrics, lyrics]
31-
32-
export default sources
18+
return sources
19+
}

0 commit comments

Comments
 (0)