Skip to content

Commit

Permalink
Add ability to set number of workers
Browse files Browse the repository at this point in the history
  • Loading branch information
Zehina committed Oct 19, 2024
1 parent 4609cc7 commit c4aef88
Show file tree
Hide file tree
Showing 8 changed files with 150 additions and 73 deletions.
54 changes: 43 additions & 11 deletions webtoon_downloader/cmd/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

from webtoon_downloader import logger
from webtoon_downloader.cmd.exceptions import (
LatestWithStartOrEndError,
SeparateOptionWithNonImageSaveAsError,
CLIInvalidConcurrentCountError,
CLIInvalidStartAndEndRangeError,
CLILatestWithStartOrEndError,
CLISeparateOptionWithNonImageSaveAsError,
handle_deprecated_options,
)
from webtoon_downloader.cmd.progress import ChapterProgressManager, init_progress
Expand All @@ -20,7 +22,6 @@
from webtoon_downloader.core.webtoon.exporter import DataExporterFormat
from webtoon_downloader.transformers.image import ImageFormat

log, console = logger.setup()
help_config = click.RichHelpConfiguration(
show_metavars_column=False,
append_metavars_help=True,
Expand All @@ -33,6 +34,13 @@ class GracefulExit(SystemExit):
code = 1


def validate_concurrent_count(*_: Any, value: int | None) -> int | None:
if value is not None and value <= 0:
raise CLIInvalidConcurrentCountError(value)

return value


@click.command()
@click.version_option()
@click.pass_context
Expand All @@ -44,12 +52,7 @@ class GracefulExit(SystemExit):
type=int,
help="Start chapter",
)
@click.option(
"--end",
"-e",
type=int,
help="End chapter",
)
@click.option("--end", "-e", type=int, help="End chapter")
@click.option(
"--latest",
"-l",
Expand Down Expand Up @@ -113,6 +116,26 @@ class GracefulExit(SystemExit):
hidden=True,
help="[Deprecated] Use --export-metadata instead",
)
@click.option(
"--concurrent-chapters",
type=int,
default=1,
callback=validate_concurrent_count,
help="Number of workers for concurrent chapter downloads",
)
@click.option(
"--concurrent-pages",
type=int,
default=1,
callback=validate_concurrent_count,
help="Number of workers for concurrent image downloads. This value is shared between all concurrent chapter downloads.",
)
@click.option(
"--log-to-file",
type=bool,
is_flag=True,
help="Write debug logs to the log file",
)
def cli(
ctx: click.Context,
url: str,
Expand All @@ -125,17 +148,24 @@ def cli(
export_metadata: bool,
export_format: DataExporterFormat,
save_as: StorageType,
concurrent_chapters: int,
concurrent_pages: int,
log_to_file: bool,
) -> None:
_, console = logger.setup(log_filename="webtoon_downloader.log" if log_to_file else None)

loop = asyncio.get_event_loop()
if not url:
console.print(
'[red]A Webtoon URL of the form [green]"https://www.webtoons.com/.../list?title_no=??"[/] of is required.'
)
ctx.exit(1)
if latest and (start or end):
raise LatestWithStartOrEndError(ctx)
raise CLILatestWithStartOrEndError(ctx)
if separate and (save_as != "images"):
raise SeparateOptionWithNonImageSaveAsError(ctx)
raise CLISeparateOptionWithNonImageSaveAsError(ctx)
if start > end:
raise CLIInvalidStartAndEndRangeError(ctx)

progress = init_progress(console)
series_download_task = progress.add_task(
Expand All @@ -160,6 +190,8 @@ def cli(
save_as=save_as,
chapter_progress_callback=progress_manager.advance_progress,
on_webtoon_fetched=progress_manager.on_webtoon_fetched,
concurrent_chapters=concurrent_chapters,
concurrent_pages=concurrent_pages,
)

loop = asyncio.get_event_loop()
Expand Down
33 changes: 28 additions & 5 deletions webtoon_downloader/cmd/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,20 @@
import rich_click as click


class LatestWithStartOrEndError(click.UsageError):
class CLIInvalidStartAndEndRangeError(click.UsageError):
"""
This error is raised when the user provides a start that is greater than the end.
Args:
ctx: The Click context associated with the error, if any.
"""

def __init__(self, ctx: click.Context | None = None) -> None:
message = "Start chapter cannot be greater than end chapter."
super().__init__(message, ctx)


class CLILatestWithStartOrEndError(click.UsageError):
"""
This error is raised when the user attempts to use --latest in conjunction
with either --start or --end options, which is not allowed due to their
Expand All @@ -20,7 +33,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class SeparateOptionWithNonImageSaveAsError(click.UsageError):
class CLISeparateOptionWithNonImageSaveAsError(click.UsageError):
"""
This error is raised when the user attempts to use --separate with a save-as
option other than 'images'. The --separate option is only compatible with
Expand All @@ -35,7 +48,7 @@ def __init__(self, ctx: click.Context | None = None) -> None:
super().__init__(message, ctx)


class DeprecatedOptionError(click.UsageError):
class CLIDeprecatedOptionError(click.UsageError):
"""
Custom error for handling deprecated options in the CLI.
"""
Expand All @@ -45,9 +58,19 @@ def __init__(self, deprecated_option: str, use_instead_option: str):
super().__init__(message)


class CLIInvalidConcurrentCountError(click.BadParameter):
"""
Custom error for handling invalid value for concurrent workers in the CLI.
"""

def __init__(self, value: Any):
message = f"Invalid value for concurrent workers {value}."
super().__init__(message)


def handle_deprecated_options(_: click.Context, param: click.Parameter, value: Any) -> None:
"""Handler for deprecated options"""
if param.name == "export_texts" and value:
raise DeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
raise CLIDeprecatedOptionError(deprecated_option="--export-texts", use_instead_option="--export-metadata")
elif param.name == "dest" and value is not None:
raise DeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
raise CLIDeprecatedOptionError(deprecated_option="--dest", use_instead_option="--out")
13 changes: 12 additions & 1 deletion webtoon_downloader/core/downloaders/image.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import asyncio
import logging
from dataclasses import dataclass, field
from typing import Awaitable, Callable

Expand All @@ -9,6 +11,8 @@
from webtoon_downloader.storage import AioWriter
from webtoon_downloader.transformers.base import AioImageTransformer

log = logging.getLogger(__name__)

ImageProgressCallback = Callable[[int], Awaitable[None]]
"""
Progress callback called for each image download.
Expand All @@ -32,9 +36,15 @@ class ImageDownloadResult:
@dataclass
class ImageDownloader:
client: httpx.AsyncClient
concurent_downloads_limit: int
transformers: list[AioImageTransformer] = field(default_factory=list)
progress_callback: ImageProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurent_downloads_limit)

async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadResult:
"""
Initiates the downloading of an image from a specified URL.
Expand All @@ -50,7 +60,8 @@ async def run(self, url: str, target: str, storage: AioWriter) -> ImageDownloadR
ImageDownloadError: If an error occurs during the download process.
"""
try:
return await self._download_image(self.client, url, target, storage)
async with self._semaphore:
return await self._download_image(self.client, url, target, storage)
except Exception as exc:
raise ImageDownloadError(url=url, cause=exc) from exc

Expand Down
24 changes: 17 additions & 7 deletions webtoon_downloader/core/webtoon/downloaders/chapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import asyncio
import logging
from dataclasses import dataclass
from dataclasses import dataclass, field
from os import PathLike
from pathlib import Path

Expand All @@ -27,20 +27,27 @@ class ChapterDownloader:
Downloads chapters from a Webtoon.
Attributes:
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
client : HTTP client for making web requests.
image_downloader : Downloader for Webtoon images.
file_name_generator : Generator for file names based on chapter and page details.
concurrent_downloads_limit : The number of chapters to download concurrently.
exporter : Optional data exporter for exporting chapter details.
progress_callback : Optional callback for reporting chapter download progress.
"""

client: httpx.AsyncClient
image_downloader: ImageDownloader
file_name_generator: FileNameGenerator
concurrent_downloads_limit: int

exporter: DataExporter | None = None
progress_callback: ChapterProgressCallback | None = None

_semaphore: asyncio.Semaphore = field(init=False)

def __post_init__(self) -> None:
self._semaphore = asyncio.Semaphore(self.concurrent_downloads_limit)

async def run(
self, chapter_info: ChapterInfo, directory: str | PathLike[str], storage: AioWriter
) -> list[DownloadResult]:
Expand All @@ -59,7 +66,8 @@ async def run(
ChapterDownloadError in case of error downloading the chapter.
"""
try:
return await self._run(chapter_info, directory, storage)
async with self._semaphore:
return await self._run(chapter_info, directory, storage)
except Exception as exc:
raise ChapterDownloadError(chapter_info.viewer_url, exc, chapter_info=chapter_info) from exc

Expand Down Expand Up @@ -104,7 +112,9 @@ def _create_task(self, chapter_info: ChapterInfo, url: str, name: str, storage:
"""

async def _task() -> ImageDownloadResult:
log.debug('Downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
res = await self.image_downloader.run(url, name, storage)
log.debug('Finished downloading: "%s" from "%s" from chapter "%s"', name, url, chapter_info.viewer_url)
await self._report_progress(chapter_info, "PageCompleted")
return res

Expand Down
34 changes: 14 additions & 20 deletions webtoon_downloader/core/webtoon/downloaders/comic.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@

log = logging.getLogger(__name__)

DEFAULT_CHAPTER_LIMIT = 8
"""Default number of asynchronous workers. More == More likely to get server rate limited"""


@dataclass
class WebtoonDownloader:
Expand All @@ -38,15 +35,14 @@ class WebtoonDownloader:
Manages the entire process of downloading multiple chapters from a Webtoon series, including fetching chapter details, setting up storage, and handling concurrency.
Attributes:
url : URL of the Webtoon series to download.
chapter_downloader : The downloader responsible for individual chapters.
storage_type : The type of storage to use for the downloaded chapters.
start_chapter : The first chapter to download.
end_chapter : The last chapter to download.
concurrent_chapters : The number of chapters to download concurrently.
directory : The directory where the downloaded chapters will be stored.
exporter : Optional data exporter for exporting series details.
on_webtoon_fetched : Optional callback executed after fetching Webtoon information.
url : URL of the Webtoon series to download.
chapter_downloader : The downloader responsible for individual chapters.
storage_type : The type of storage to use for the downloaded chapters.
start_chapter : The first chapter to download.
end_chapter : The last chapter to download.
directory : The directory where the downloaded chapters will be stored.
exporter : Optional data exporter for exporting series details.
on_webtoon_fetched : Optional callback executed after fetching Webtoon information.
"""

url: str
Expand All @@ -55,7 +51,6 @@ class WebtoonDownloader:

start_chapter: int | None = None
end_chapter: int | None | Literal["latest"] = None
concurrent_chapters: int = DEFAULT_CHAPTER_LIMIT
directory: str | PathLike[str] | None = None
exporter: DataExporter | None = None
on_webtoon_fetched: OnWebtoonFetchCallback | None = None
Expand Down Expand Up @@ -83,11 +78,9 @@ async def run(self) -> list[DownloadResult]:

await self._export_data(extractor)

# Semaphore to limit the number of concurrent chapter downloads
semaphore = asyncio.Semaphore(self.concurrent_chapters)
tasks = []
for chapter_info in chapter_list:
task = self._create_task(chapter_info, semaphore)
task = self._create_task(chapter_info)
tasks.append(task)

results = await asyncio.gather(*tasks, return_exceptions=False)
Expand All @@ -113,7 +106,7 @@ async def _get_chapters(self, client: httpx.AsyncClient) -> list[ChapterInfo]:

return chapters

def _create_task(self, chapter_info: ChapterInfo, semaphore: asyncio.Semaphore) -> asyncio.Task:
def _create_task(self, chapter_info: ChapterInfo) -> asyncio.Task:
"""
Creates an asynchronous task for downloading a Webtoon chapter.
Expand All @@ -126,9 +119,8 @@ def _create_task(self, chapter_info: ChapterInfo, semaphore: asyncio.Semaphore)
"""

async def task() -> list[DownloadResult]:
async with semaphore:
storage = await self._get_storage(chapter_info)
return await self.chapter_downloader.run(chapter_info, self._directory, storage)
storage = await self._get_storage(chapter_info)
return await self.chapter_downloader.run(chapter_info, self._directory, storage)

return asyncio.create_task(task())

Expand Down Expand Up @@ -180,6 +172,7 @@ async def download_webtoon(opts: WebtoonDownloadOptions) -> list[DownloadResult]
image_downloader = ImageDownloader(
client=webtoon.client.new_image_client(),
transformers=[AioImageFormatTransformer(opts.image_format)],
concurent_downloads_limit=opts.concurrent_pages,
)

exporter = DataExporter(opts.exporter_format) if opts.export_metadata else None
Expand All @@ -189,6 +182,7 @@ async def download_webtoon(opts: WebtoonDownloadOptions) -> list[DownloadResult]
progress_callback=opts.chapter_progress_callback,
image_downloader=image_downloader,
file_name_generator=file_name_generator,
concurrent_downloads_limit=opts.concurrent_chapters,
)

end: int | None | Literal["latest"]
Expand Down
Loading

0 comments on commit c4aef88

Please sign in to comment.