Skip to content

Commit

Permalink
Refactor: most viewed scraper and endpoint logic
Browse files Browse the repository at this point in the history
  • Loading branch information
moonlitgrace committed Oct 9, 2023
1 parent 0466b1c commit 3569795
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 28 deletions.
4 changes: 2 additions & 2 deletions app/api/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,10 @@ async def get_top_ten(offset: int = 0, limit: int = Query(10, le=10)):
)
@return_on_404()
async def get_most_viewed(chart: str, offset: int = 0, limit: int = Query(10, le=10)):
most_viewed_scraper = MostViewedScraper()
most_viewed_scraper = MostViewedScraper(chart)

if chart in most_viewed_scraper.CHARTS:
response = most_viewed_scraper.scrape(chart)
response = most_viewed_scraper.scrape
return response[offset : offset + limit]
else:
raise HTTPException(status_code=404, detail=f"Invalid chart {chart}")
Expand Down
65 changes: 39 additions & 26 deletions app/api/scrapers/most_viewed.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from types import ClassMethodDescriptorType
from selectolax.parser import Node

from app.api.decorators.return_decorator import return_on_error
from ..utils import get_text, get_attribute
from ..helpers.html_helper import HTMLHelper

Expand All @@ -8,37 +11,58 @@ class MostViewedScraper:
# eg. "today" | "week" | "month"
CHARTS = ["today", "week", "month"]

def __init__(self) -> None:
def __init__(self, chart: str) -> None:
url = "https://mangareader.to/home"
self.chart = chart
# Facades
self.html_helper = HTMLHelper()
# Parser
self.parser = self.html_helper.get_parser(url)

def __get_slug(self, node: Node) -> str | None:
@property
@return_on_error([])
def scrape(self) -> list:
mangas_list = []
container = self.parser.css_first(f"#main-sidebar #chart-{self.chart}")
node_list = container.css("ul > li")

for index, node in enumerate(node_list, start=1):
manga_dict = {"id": index, **self.__build_dict(node)}

mangas_list.append(manga_dict)
return mangas_list

@return_on_error("")
def __get_slug(self, node: Node) -> str:
slug = get_attribute(node, ".manga-detail .manga-name a", "href")
return slug.replace("/", "") if slug else None
return slug.replace("/", "") if slug else ""

def __get_cover(self, node: Node) -> str | None:
@return_on_error("")
def __get_cover(self, node: Node) -> str:
cover = get_attribute(node, "img.manga-poster-img", "src")
return cover.replace("200x300", "500x800") if cover else None
return cover.replace("200x300", "500x800") if cover else ""

def __get_views(self, node: Node) -> str | None:
@return_on_error("")
def __get_views(self, node: Node) -> str:
views_string = get_text(node, ".fd-infor .fdi-view")
return views_string.split()[0].replace(",", "") if views_string else None
return views_string.split()[0].replace(",", "") if views_string else ""

def __get_langs(self, node: Node) -> list | None:
@return_on_error([])
def __get_langs(self, node: Node) -> list:
langs_string = get_text(node, ".fd-infor > span:nth-child(1)")
return [lang for lang in langs_string.split("/")] if langs_string else None
return [lang for lang in langs_string.split("/")] if langs_string else []

def __get_chapters_volumes(self, node: Node, index: int) -> str | None:
@return_on_error("")
def __get_chapters_volumes(self, node: Node, index: int) -> str:
data_string = get_text(node, f".d-block span:nth-child({index})")
return data_string.split()[1] if data_string else None
return data_string.split()[1] if data_string else ""

def __get_genres(self, node: Node) -> list | None:
@return_on_error([])
def __get_genres(self, node: Node) -> list:
genres = node.css(".fd-infor .fdi-cate a")
return [genre.text() for genre in genres] if genres else None
return [genre.text() for genre in genres] if genres else []

@return_on_error({})
def __build_dict(self, node: Node) -> dict:
manga_dict = {
"rank": get_text(node, ".ranking-number span"),
Expand All @@ -47,20 +71,9 @@ def __build_dict(self, node: Node) -> dict:
"cover": self.__get_cover(node),
"views": self.__get_views(node),
"langs": self.__get_langs(node),
"chapters": self.__get_chapters_volumes(node, 1),
"volumes": self.__get_chapters_volumes(node, 2),
"chapters": self.__get_chapters_volumes(node, 1), # chaper index
"volumes": self.__get_chapters_volumes(node, 2), # volume index
"genres": self.__get_genres(node),
}

return manga_dict

def scrape(self, chart) -> list:
mangas_list = []
container = self.parser.css_first(f"#main-sidebar #chart-{chart}")
node_list = container.css("ul > li")

for index, node in enumerate(node_list, start=1):
manga_dict = {"id": index, **self.__build_dict(node)}

mangas_list.append(manga_dict)
return mangas_list

0 comments on commit 3569795

Please sign in to comment.