From 2310fb5587512bb14164091f934328ce3ca4b94b Mon Sep 17 00:00:00 2001 From: NonVideri Date: Wed, 20 Sep 2023 12:06:47 +0200 Subject: [PATCH 1/4] Add --name-format and --direct options --- blinkist/book.py | 19 +++++++++---------- blinkist/chapter.py | 4 ++-- blinkist/config.py | 7 ++++--- main.py | 37 +++++++++++++++++++++++++++++++------ 4 files changed, 46 insertions(+), 21 deletions(-) diff --git a/blinkist/book.py b/blinkist/book.py index 18282c7..7a76745 100644 --- a/blinkist/book.py +++ b/blinkist/book.py @@ -6,10 +6,9 @@ from .chapter import Chapter from .common import api_request_web, download, request -from .config import BASE_URL, FILENAME_COVER, FILENAME_RAW, FILENAME_TEXT +from .config import BASE_URL, DEFAULT_FILENAME_COVER, DEFAULT_FILENAME_RAW, DEFAULT_FILENAME_TEXT from .console import track - class Book: def __init__(self, book_data: dict) -> None: self.data = book_data @@ -17,8 +16,8 @@ def __init__(self, book_data: dict) -> None: # pylint: disable=C0103 self.id = book_data['id'] self.language = book_data['language'] - self.slug = book_data['slug'] - self.title = book_data['title'] + self.slug: str = book_data['slug'] + self.title: str = book_data['title'] self.is_audio: bool = book_data['isAudio'] def __repr__(self) -> str: @@ -57,7 +56,7 @@ def chapters(self) -> List[Chapter]: ] return chapters - def download_cover(self, target_dir: Path) -> None: + def download_cover(self, target_dir: Path, file_name: str | None) -> None: """ Downloads the cover image to the given directory, in the highest resolution available. @@ -70,12 +69,12 @@ def download_cover(self, target_dir: Path) -> None: # example: 'https://images.blinkist.io/images/books/617be9b56cee07000723559e/1_1/470.jpg' → 470 url = sorted(urls, key=lambda x: int(x.split('/')[-1].rstrip('.jpg')), reverse=True)[0] - file_path = target_dir / f"{FILENAME_COVER}.jpg" + file_path = target_dir / f"{file_name or DEFAULT_FILENAME_COVER}.jpg" assert url.endswith('.jpg') download(url, file_path) - def download_text_md(self, target_dir: Path) -> None: + def download_text_md(self, target_dir: Path, file_name: str | None) -> None: """ Downloads the text content as Markdown to the given directory. """ @@ -120,7 +119,7 @@ def md_section(level: int, title: str, text: str) -> str: markdown_text = "\n\n\n".join(parts) - file_path = target_dir / f"{FILENAME_TEXT}.md" + file_path = target_dir / f"{file_name or DEFAULT_FILENAME_TEXT}.md" file_path.write_text(markdown_text, encoding='utf-8') def serialize(self) -> dict: @@ -135,11 +134,11 @@ def serialize(self) -> dict: ], } - def download_raw_yaml(self, target_dir: Path) -> None: + def download_raw_yaml(self, target_dir: Path, file_name: str | None) -> None: """ Downloads the raw YAML to the given directory. """ - file_path = target_dir / f"{FILENAME_RAW}.yaml" + file_path = target_dir / f"{file_name or DEFAULT_FILENAME_RAW}.yaml" file_path.write_text( yaml.dump( self.serialize(), diff --git a/blinkist/chapter.py b/blinkist/chapter.py index 04a6107..525b7d1 100644 --- a/blinkist/chapter.py +++ b/blinkist/chapter.py @@ -22,13 +22,13 @@ def serialize(self) -> dict: """ return self.data - def download_audio(self, target_dir: Path) -> None: + def download_audio(self, target_dir: Path, file_name: str | None) -> None: if not self.data.get('signed_audio_url'): # NOTE: In books where is_audio is true, every chapter should have audio, so this should never happen. logging.warning(f'No audio for chapter {self.id}') return - file_path = target_dir / f"chapter_{self.data['order_no']}.m4a" + file_path = target_dir / f"{f'{file_name} ' if file_name else ''}chapter_{self.data['order_no']}.m4a" assert 'm4a' in self.data['signed_audio_url'] download(self.data['signed_audio_url'], file_path) diff --git a/blinkist/config.py b/blinkist/config.py index e967a5b..0980beb 100644 --- a/blinkist/config.py +++ b/blinkist/config.py @@ -14,6 +14,7 @@ LANGUAGES = ['en', 'de'] -FILENAME_COVER = "cover" -FILENAME_TEXT = "book" -FILENAME_RAW = "book" +# Default names for downloaded files if --name-format is not specified. +DEFAULT_FILENAME_COVER = "cover" +DEFAULT_FILENAME_TEXT = "book" +DEFAULT_FILENAME_RAW = "book" diff --git a/main.py b/main.py index b4b65fe..9482dcc 100755 --- a/main.py +++ b/main.py @@ -25,6 +25,8 @@ def download_book( book: Book, language: str, library_dir: Path, + name_format: str = None, + direct: bool = False, # --- yaml: bool = True, markdown: bool = True, @@ -43,11 +45,24 @@ def download_book( # setup book directory # book_dir = library_dir / f"{datetime.today().strftime('%Y-%m-%d')} – {book.slug}" book_dir = library_dir / book.slug - if book_dir.exists() and not redownload: + if direct: + book_dir = library_dir + if book_dir.exists() and not redownload and not direct: logging.info(f"Skipping “{book.title}” – already downloaded.") # TODO: this doss not check if the download was complete! Can we do something about that return - book_dir.mkdir(exist_ok=True) # We don't make parents in order to avoid user error. + if not direct: + book_dir.mkdir(exist_ok=True) # We don't make parents in order to avoid user error. + + file_name = None + if name_format == "slug": + file_name = book.slug + elif name_format == "title": + file_name = book.title + elif name_format == "title-upper": + file_name = book.title.upper() + elif name_format == "id": + file_name = book.id try: # prefetch chapter_list and chapters for nicer progress info @@ -60,25 +75,25 @@ def download_book( # This comes first so we have all information saved as early as possible. if yaml: with status("Downloading raw YAML…"): - book.download_raw_yaml(book_dir) + book.download_raw_yaml(book_dir, file_name) # download text (Markdown) if markdown: with status("Downloading text…"): - book.download_text_md(book_dir) + book.download_text_md(book_dir, file_name) # download audio if audio: if book.is_audio: for chapter in track(book.chapters, description="Downloading audio…"): - chapter.download_audio(book_dir) + chapter.download_audio(book_dir, file_name) else: logging.warning("This book has no audio.") # download cover if cover: with status("Downloading cover…"): - book.download_cover(book_dir) + book.download_cover(book_dir, file_name) except Exception as e: logging.error(f"Error downloading “{book.title}”: {e}") @@ -117,6 +132,12 @@ def download_book( @click.option('--trending', help="Download trending books. Limited to 8 results by default. Use --limit to override.", is_flag=True, default=False) # ▒▒ meta @click.option('--limit', help="Limit the number of books to download. Defaults to no limit.", type=int, default=None) +@click.option('--name-format', '-n', help='''Sets file names format. By default no format is set, and generic names from `config.py` are used. Supported values: + - "slug": Book title slug (e.g. "the-4-hour-workweek") + - "title": Book title (e.g. "The 4-Hour Workweek") + - "title-upper": Book title in uppercase (e.g. "THE 4-HOUR WORKWEEK") + - "id": Book ID (e.g. "617be9b56cee07000723559e")''', type=str, default=None) +@click.option('--direct', help="Save directly in parent folder, instead of creating a new folder for the book. Requires --file-format to be set.", is_flag=True, default=False) # ▒ file format switches ↓ # ▒▒ raw @click.option('--audio/--no-audio', help="Download audio", default=True) @@ -128,6 +149,10 @@ def main(**kwargs): languages_to_download = [kwargs['language']] if kwargs['language'] else LANGUAGES # default to all languages books_to_download = set() + if kwargs['direct'] and not kwargs['name_format']: + logging.error("Error: --direct requires --name-format to be set.") + return + if kwargs['book_slug']: books_to_download.add(Book.from_slug(kwargs['book_slug'])) From be9984dc12b5171e067f78bb79c68db5b1d7f7fa Mon Sep 17 00:00:00 2001 From: NonVideri Date: Wed, 20 Sep 2023 12:07:00 +0200 Subject: [PATCH 2/4] Update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index bee8a64..b0f2192 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ __pycache__ +.venv \ No newline at end of file From 06416ff43b0aa5f8e22a136859e08f82720e8a33 Mon Sep 17 00:00:00 2001 From: NonVideri Date: Wed, 20 Sep 2023 12:28:20 +0200 Subject: [PATCH 3/4] Move click options, update help descriptions --- main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index 9482dcc..2c26c0b 100755 --- a/main.py +++ b/main.py @@ -132,12 +132,6 @@ def download_book( @click.option('--trending', help="Download trending books. Limited to 8 results by default. Use --limit to override.", is_flag=True, default=False) # ▒▒ meta @click.option('--limit', help="Limit the number of books to download. Defaults to no limit.", type=int, default=None) -@click.option('--name-format', '-n', help='''Sets file names format. By default no format is set, and generic names from `config.py` are used. Supported values: - - "slug": Book title slug (e.g. "the-4-hour-workweek") - - "title": Book title (e.g. "The 4-Hour Workweek") - - "title-upper": Book title in uppercase (e.g. "THE 4-HOUR WORKWEEK") - - "id": Book ID (e.g. "617be9b56cee07000723559e")''', type=str, default=None) -@click.option('--direct', help="Save directly in parent folder, instead of creating a new folder for the book. Requires --file-format to be set.", is_flag=True, default=False) # ▒ file format switches ↓ # ▒▒ raw @click.option('--audio/--no-audio', help="Download audio", default=True) @@ -145,6 +139,13 @@ def download_book( @click.option('--yaml/--no-yaml', help="Save content as YAML", default=True) # ▒▒ processed @click.option('--markdown/--no-markdown', help="Save content as Markdown", default=True) +# ▒▒ output format +@click.option('--name-format', '-n', help='''Sets the format for output file names. By default no format is set, and generic names from config.py are used. Supported values: + - "slug": Book title slug (e.g. "the-4-hour-workweek") + - "title": Book title (e.g. "The 4-Hour Workweek") + - "title-upper": Book title in uppercase (e.g. "THE 4-HOUR WORKWEEK") + - "id": Book ID (e.g. "617be9b56cee07000723559e")''', type=str, default=None) +@click.option('--direct', help="Saves files directly in the parent folder, instead of creating a new folder for the book. Requires --file-format to be set.", is_flag=True, default=False) def main(**kwargs): languages_to_download = [kwargs['language']] if kwargs['language'] else LANGUAGES # default to all languages books_to_download = set() From aa03d10a434014c84d01fbaf18a68cef542b5cbc Mon Sep 17 00:00:00 2001 From: NonVideri Date: Thu, 21 Sep 2023 12:10:19 +0200 Subject: [PATCH 4/4] Don't rename output directory if --direct is set --- main.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/main.py b/main.py index 2c26c0b..b4eaee4 100755 --- a/main.py +++ b/main.py @@ -97,14 +97,17 @@ def download_book( except Exception as e: logging.error(f"Error downloading “{book.title}”: {e}") - error_dir = book_dir.parent / f"{book.slug} – ERROR" - i = 0 - while error_dir.exists() and any(error_dir.iterdir()): - i += 1 - error_dir = book_dir.parent / f"{book.slug} – ERROR ({i})" - - book_dir.replace(target=error_dir) - logging.warning(f"Renamed output directory to “{error_dir.relative_to(book_dir.parent)}”") + if not direct: + error_dir = book_dir.parent / f"{book.slug} – ERROR" + i = 0 + while error_dir.exists() and any(error_dir.iterdir()): + i += 1 + error_dir = book_dir.parent / f"{book.slug} – ERROR ({i})" + + book_dir.replace(target=error_dir) + logging.warning(f"Renamed output directory to “{error_dir.relative_to(book_dir.parent)}”") + else: + logging.warning(f"Leaving output directory as “{book_dir.relative_to(book_dir.parent)}” because --direct was set.") if continue_on_error: logging.info("Continuing with next book… (--continue-on-error was set)")