Skip to content

Commit

Permalink
Add ability to set number of workers to fix rate limiting issue (#61)
Browse files Browse the repository at this point in the history
* Add ability to set number of workers

* disable traceback by default

* raise error on non 200 response

* improve logger

---------

Co-authored-by: Zehina <zehinadev@gmail.com>
  • Loading branch information
Zehina and Zehina authored Oct 20, 2024
1 parent 4609cc7 commit 0758b3c
Show file tree
Hide file tree
Showing 10 changed files with 263 additions and 82 deletions.
68 changes: 55 additions & 13 deletions webtoon_downloader/cmd/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,24 @@

from webtoon_downloader import logger
from webtoon_downloader.cmd.exceptions import (
LatestWithStartOrEndError,
SeparateOptionWithNonImageSaveAsError,
CLIInvalidConcurrentCountError,
CLIInvalidStartAndEndRangeError,
CLILatestWithStartOrEndError,
CLISeparateOptionWithNonImageSaveAsError,
handle_deprecated_options,
)
from webtoon_downloader.cmd.progress import ChapterProgressManager, init_progress
from webtoon_downloader.core.exceptions import WebtoonDownloadError
from webtoon_downloader.core.webtoon.downloaders import comic
from webtoon_downloader.core.webtoon.downloaders.options import StorageType, WebtoonDownloadOptions
from webtoon_downloader.core.webtoon.downloaders.options import (
DEFAULT_CONCURENT_CHAPTER_DOWNLOADS,
DEFAULT_CONCURENT_IMAGE_DOWNLOADS,
StorageType,
WebtoonDownloadOptions,
)
from webtoon_downloader.core.webtoon.exporter import DataExporterFormat
from webtoon_downloader.transformers.image import ImageFormat

log, console = logger.setup()
help_config = click.RichHelpConfiguration(
show_metavars_column=False,
append_metavars_help=True,
Expand All @@ -33,6 +40,13 @@ class GracefulExit(SystemExit):
code = 1


def validate_concurrent_count(ctx: Any, param: Any, value: int | None) -> int | None:
if value is not None and value <= 0:
raise CLIInvalidConcurrentCountError(value)

return value


@click.command()
@click.version_option()
@click.pass_context
Expand All @@ -44,12 +58,7 @@ class GracefulExit(SystemExit):
type=int,
help="Start chapter",
)
@click.option(
"--end",
"-e",
type=int,
help="End chapter",
)
@click.option("--end", "-e", type=int, help="End chapter")
@click.option(
"--latest",
"-l",
Expand Down Expand Up @@ -113,6 +122,21 @@ class GracefulExit(SystemExit):
hidden=True,
help="[Deprecated] Use --export-metadata instead",
)
@click.option(
"--concurrent-chapters",
type=int,
default=DEFAULT_CONCURENT_CHAPTER_DOWNLOADS,
callback=validate_concurrent_count,
help="Number of workers for concurrent chapter downloads",
)
@click.option(
"--concurrent-pages",
type=int,
default=DEFAULT_CONCURENT_IMAGE_DOWNLOADS,
callback=validate_concurrent_count,
help="Number of workers for concurrent image downloads. This value is shared between all concurrent chapter downloads.",
)
@click.option("--debug", type=bool, is_flag=True, help="Enable debug mode")
def cli(
ctx: click.Context,
url: str,
Expand All @@ -125,17 +149,28 @@ def cli(
export_metadata: bool,
export_format: DataExporterFormat,
save_as: StorageType,
concurrent_chapters: int,
concurrent_pages: int,
debug: bool,
) -> None:
log, console = logger.setup(
log_filename="webtoon_downloader.log" if debug else None,
enable_traceback=debug,
enable_console_logging=debug,
)

loop = asyncio.get_event_loop()
if not url:
console.print(
'[red]A Webtoon URL of the form [green]"https://www.webtoons.com/.../list?title_no=??"[/] of is required.'
)
ctx.exit(1)
if latest and (start or end):
raise LatestWithStartOrEndError(ctx)
raise CLILatestWithStartOrEndError(ctx)
if separate and (save_as != "images"):
raise SeparateOptionWithNonImageSaveAsError(ctx)
raise CLISeparateOptionWithNonImageSaveAsError(ctx)
if start is not None and end is not None and start > end:
raise CLIInvalidStartAndEndRangeError(ctx)

progress = init_progress(console)
series_download_task = progress.add_task(
Expand All @@ -160,6 +195,8 @@ def cli(
save_as=save_as,
chapter_progress_callback=progress_manager.advance_progress,
on_webtoon_fetched=progress_manager.on_webtoon_fetched,
concurrent_chapters=concurrent_chapters,
concurrent_pages=concurrent_pages,
)

loop = asyncio.get_event_loop()
Expand All @@ -181,11 +218,16 @@ def _raise_graceful_exit(*_: Any) -> None:
signal.signal(signal.SIGINT, _raise_graceful_exit)
signal.signal(signal.SIGTERM, _raise_graceful_exit)
with contextlib.suppress(GracefulExit):
loop.run_until_complete(main_task)
try:
loop.run_until_complete(main_task)
except WebtoonDownloadError as exc:
console.print(f"[red][bold]Download error:[/bold] {exc}[/]")
log.exception("Download error")


def run() -> None:
"""CLI entrypoint"""
if len(sys.argv) <= 1:
sys.argv.append("--help")

cli() # pylint: disable=no-value-for-parameter
33 changes: 28 additions & 5 deletions webtoon_downloader/cmd/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,20 @@
import rich_click as click


class LatestWithStartOrEndError(click.UsageError):
class CLIInvalidStartAndEndRangeError(click.UsageError):
"""
This error is raised when the user provides a start that is greater than the end.
Args:
ctx: The Click context associated with the error, if any.
"""

def __init__(self, ctx: click.Context | None = None) -> None:
message = "Start chapter cannot be greater than end chapter."
super().__init__(message, ctx)


class CLILatestWithStartOrEndError(click.UsageError):
"""
This error is raised when the user attempts to use --latest in conjunction
with either --start or --end options, which is not allowed due to their
Expand All @@ -20,7 +33,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class SeparateOptionWithNonImageSaveAsError(click.UsageError):
class CLISeparateOptionWithNonImageSaveAsError(click.UsageError):
"""
This error is raised when the user attempts to use --separate with a save-as
option other than 'images'. The --separate option is only compatible with
Expand All @@ -35,7 +48,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class DeprecatedOptionError(click.UsageError):
class CLIDeprecatedOptionError(click.UsageError):
"""
Custom error for handling deprecated options in the CLI.
"""
Expand All @@ -45,9 +58,19 @@ def __init__(self, deprecated_option: str, use_instead_option: str):
super().__init__(message)


class CLIInvalidConcurrentCountError(click.BadParameter):
"""
Custom error for handling invalid value for concurrent workers in the CLI.
"""

def __init__(self, value: Any):
message = f"Invalid value for concurrent workers {value}."
super().__init__(message)


def handle_deprecated_options(_: click.Context, param: click.Parameter, value: Any) -> None:
"""Handler for deprecated options"""
if param.name == "export_texts" and value:
raise DeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
raise CLIDeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
elif param.name == "dest" and value is not None:
raise DeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
raise CLIDeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
13 changes: 12 additions & 1 deletion webtoon_downloader/core/downloaders/image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass, field
from typing import Awaitable, Callable

Expand All @@ -9,6 +11,8 @@
from webtoon_downloader.storage import AioWriter
from webtoon_downloader.transformers.base import AioImageTransformer

log = logging.getLogger(__name__)

ImageProgressCallback = Callable[[int], Awaitable[None]]
"""
Progress callback called for each image download.
Expand All @@ -32,9 +36,15 @@ class ImageDownloadResult:
@dataclass
class ImageDownloader:
client: httpx.AsyncClient
concurent_downloads_limit: int
transformers: list[AioImageTransformer] = field(default_factory=list)
progress_callback: ImageProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurent_downloads_limit)

async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadResult:
"""
Initiates the downloading of an image from a specified URL.
Expand All @@ -50,7 +60,8 @@ async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadR
ImageDownloadError: If an error occurs during the download process.
"""
try:
return await self._download_image(self.client, url, target, storage)
async with self._semaphore:
return await self._download_image(self.client, url, target, storage)
except Exception as exc:
raise ImageDownloadError(url=url, cause=exc) from exc

Expand Down
57 changes: 56 additions & 1 deletion webtoon_downloader/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,20 @@ class DownloadError(Exception):
def __str__(self) -> str:
if self.message:
return self.message
return f'Failed to download from "{self.url}" due to: {self.cause}'

if self.cause:
cause_msg = str(self.cause)
if cause_msg:
return f"Failed to download from {self.url} => {cause_msg}"

return f"Failed to download from {self.url} due to: {self.cause.__class__.__name__}"

return f"Failed to download from {self.url}"


@dataclass
class WebtoonDownloadError(DownloadError):
"""Exception raised for Webtoon download errors"""


@dataclass
Expand All @@ -31,21 +44,63 @@ class ChapterDownloadError(DownloadError):
chapter_info: ChapterInfo | None = None


@dataclass
class WebtoonGetError(Exception):
"""Exception raised due to a fetch error when retreiving Webtoon information"""

series_url: str
status_code: int

def __str__(self) -> str:
return f"Failed to fetch Webtoon information from {self.series_url}. Status code: {self.status_code}"


@dataclass
class FetchError(Exception):
"""Exception raised due to a fetch error"""

msg: str | None = None


@dataclass
class ChapterURLFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the chapter URL"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch chapter URL"


@dataclass
class ChapterTitleFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the chapter title"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch chapter title"


@dataclass
class ChapterDataEpisodeNumberFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving data chapter number"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch data episode number"


@dataclass
class SeriesTitleFetchError(FetchError):
"""Exception raised due to a fetch error when retreiving the series title"""

def __str__(self) -> str:
if self.msg:
return self.msg

return "Failed to fetch series title"
24 changes: 17 additions & 7 deletions webtoon_downloader/core/webtoon/downloaders/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio
import logging
from dataclasses import dataclass
from dataclasses import dataclass, field
from os import PathLike
from pathlib import Path

Expand All @@ -27,20 +27,27 @@ class ChapterDownloader:
Downloads chapters from a Webtoon.
Attributes:
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
concurrent_downloads_limit : The number of chapters to download concurrently.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
"""

client: httpx.AsyncClient
image_downloader: ImageDownloader
file_name_generator: FileNameGenerator
concurrent_downloads_limit: int

exporter: DataExporter | None = None
progress_callback: ChapterProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurrent_downloads_limit)

async def run(
self, chapter_info: ChapterInfo, directory: str | PathLike[str], storage: AioWriter
) -> list[DownloadResult]:
Expand All @@ -59,7 +66,8 @@ async def run(
ChapterDownloadError in case of error downloading the chapter.
"""
try:
return await self._run(chapter_info, directory, storage)
async with self._semaphore:
return await self._run(chapter_info, directory, storage)
except Exception as exc:
raise ChapterDownloadError(chapter_info.viewer_url, exc, chapter_info=chapter_info) from exc

Expand Down Expand Up @@ -104,7 +112,9 @@ def _create_task(self, chapter_info: ChapterInfo, url: str, name: str, storage:
"""

async def _task() -> ImageDownloadResult:
log.debug('Downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
res = await self.image_downloader.run(url, name, storage)
log.debug('Finished downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
await self._report_progress(chapter_info, "PageCompleted")
return res

Expand Down
Loading

0 comments on commit 0758b3c

Please sign in to comment.