Skip to content

Commit

Permalink
Fix azuki chapter parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
s0hv committed Dec 6, 2023
1 parent 1b6fab9 commit aa6e1d2
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
10 changes: 8 additions & 2 deletions src/scrapers/azuki.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@

from lxml import etree

from src.scrapers.base_scraper import BaseChapterSimple, ScrapeServiceRetVal, \
BaseScraperWhole
from src.scrapers.base_scraper import (BaseChapterSimple, ScrapeServiceRetVal,
BaseScraperWhole)
from src.utils.utilities import utctoday

logger = logging.getLogger('debug')

chapter_regex = re.compile(r'^Chapter (?P<chapter_number>\d+)((-\d+)| ?(?P<special_chapter>ex\d*|\.?[A-z]|extra *\d*))?(\.(?P<chapter_decimal>\d+))?( – (?P<chapter_title>.+?))?$', re.I)
ignore_chapter_regex = re.compile(r'^\s*chapter announcement\s*$', re.I)


class ParsedChapter(BaseChapterSimple, ABC):
Expand All @@ -35,7 +36,12 @@ def title(self) -> str:
return self.chapter_title

def parse_title(self, title: str) -> Optional[Tuple[str, int, Optional[int]]]:
if ignore_chapter_regex.match(title):
self.invalid = True
return None

match = chapter_regex.match(title)

if not match:
logger.error(f'Failed to parse title from {title}')
self.invalid = True
Expand Down
1 change: 1 addition & 0 deletions src/tests/scrapers/azuki/test_azuki.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ def test_parse_releases_page(self):
('Chapter 3b – Special Broadcast: His Unhinged Passions', ('Special Broadcast: His Unhinged Passions', 3, 2)),
('Chapter 126-127', ('Chapter 126-127', 126, None)),
('Chapter 10.b', ('Chapter 10.b', 10, 2)),
('Chapter Announcement', None),
])
def test_parse_chapter_title(title: str, correct):
ch = TempChapter()
Expand Down

0 comments on commit aa6e1d2

Please sign in to comment.