Skip to content

Commit e09bae7

Browse files
committed
fix: always unescape html content
1 parent 2ee8bac commit e09bae7

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

dvtag/scrape.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@ def scrape(workno: str) -> DoujinVoice:
3131
html = _get_200(url).text
3232

3333
if m := re.search(r'data-product-name="(.+)"\s*data-maker-name="(.+)"', html):
34-
name = m.group(1)
35-
circle = m.group(2)
34+
name = unescape(m.group(1))
35+
circle = unescape(m.group(2))
3636
else:
3737
raise ParsingError(f"no work name found", workno)
3838

3939
if m := re.search(r"\"og:image\"[\s\S]*?content=\"(.+?)\"", html):
40-
image_url = urljoin("https://www.dlsite.com", m.group(1))
40+
image_url = urljoin("https://www.dlsite.com", unescape(m.group(1)))
4141
else:
4242
raise ParsingError(f"no cover image url found", workno)
4343

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = dvtag
3-
version = 0.7.2
3+
version = 0.7.3
44
author = Nobe Kanai
55
author_email = nobekanai@gmail.com
66
description = A tool for tagging your doujin voice library.

0 commit comments

Comments
 (0)