From 92a40227b8ce49415394af771eb4766a3993951c Mon Sep 17 00:00:00 2001 From: getzze Date: Tue, 10 Sep 2024 00:02:21 +0100 Subject: [PATCH] Add two options for language name style in saved subtitles (#1155) * add an option for language name style in saved subtitles * fix mypy --- changelog.d/1022.change.rst | 2 + subliminal/cli.py | 23 ++++++++- subliminal/core.py | 12 ++++- subliminal/subtitle.py | 95 ++++++++++++++++++++++++++++++++----- tests/test_subtitle.py | 40 ++++++++++++++-- 5 files changed, 156 insertions(+), 16 deletions(-) create mode 100644 changelog.d/1022.change.rst diff --git a/changelog.d/1022.change.rst b/changelog.d/1022.change.rst new file mode 100644 index 00000000..eee4f57f --- /dev/null +++ b/changelog.d/1022.change.rst @@ -0,0 +1,2 @@ +Add an option to change the style of the language suffix of saved subtitles. +Allow adding the language type, hi or forced. diff --git a/subliminal/cli.py b/subliminal/cli.py index cb832294..222760a7 100644 --- a/subliminal/cli.py +++ b/subliminal/cli.py @@ -419,6 +419,17 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None: default=0, help='Minimum score for a subtitle to be downloaded (0 to 100).', ) +@click.option( + '--language-type-suffix', + is_flag=True, + default=False, + help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign part subtitle.', +) +@click.option( + '--language-format', + default='alpha2', + help='Format of the language code in the saved subtitle name. Default is a 2-letter language code.', +) @click.option('-w', '--max-workers', type=click.IntRange(1, 50), default=None, help='Maximum number of threads to use.') @click.option( '-z/-Z', @@ -459,6 +470,8 @@ def download( force: bool, hearing_impaired: bool, min_score: int, + language_type_suffix: bool, + language_format: str, max_workers: int, archives: bool, name: str | None, @@ -651,7 +664,15 @@ def download( # save subtitles total_subtitles = 0 for v, subtitles in downloaded_subtitles.items(): - saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding) + saved_subtitles = save_subtitles( + v, + subtitles, + single=single, + directory=directory, + encoding=encoding, + language_type_suffix=language_type_suffix, + language_format=language_format, + ) total_subtitles += len(saved_subtitles) if verbose > 0: diff --git a/subliminal/core.py b/subliminal/core.py index b6d429ff..d511bf84 100644 --- a/subliminal/core.py +++ b/subliminal/core.py @@ -838,6 +838,8 @@ def save_subtitles( directory: str | os.PathLike | None = None, encoding: str | None = None, extension: str | None = None, + language_type_suffix: bool = False, + language_format: str = 'alpha2', ) -> list[Subtitle]: """Save subtitles on filesystem. @@ -855,6 +857,8 @@ def save_subtitles( :param str directory: path to directory where to save the subtitles, default is next to the video. :param str encoding: encoding in which to save the subtitles, default is to keep original encoding. :param (str | None) extension: the subtitle extension, default is to match to the subtitle format. + :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :param str language_format: format of the language suffix. Default to 'alpha2'. :return: the saved subtitles :rtype: list of :class:`~subliminal.subtitle.Subtitle` @@ -872,7 +876,13 @@ def save_subtitles( continue # create subtitle path - subtitle_path = subtitle.get_path(video, single=single, extension=extension) + subtitle_path = subtitle.get_path( + video, + single=single, + extension=extension, + language_type_suffix=language_type_suffix, + language_format=language_format, + ) if directory is not None: subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1]) diff --git a/subliminal/subtitle.py b/subliminal/subtitle.py index be5e410f..7d903e87 100644 --- a/subliminal/subtitle.py +++ b/subliminal/subtitle.py @@ -425,20 +425,41 @@ def guess_encoding(self) -> str | None: return encoding_or_none - def get_path(self, video: Video, *, single: bool = False, extension: str | None = None) -> str: + def get_path( + self, + video: Video, + *, + single: bool = False, + extension: str | None = None, + language_type_suffix: bool = False, + language_format: str = 'alpha2', + ) -> str: """Get the subtitle path using the `video`, `language` and `extension`. :param video: path to the video. :type video: :class:`~subliminal.video.Video` :param bool single: save a single subtitle, default is to save one subtitle per language. :param (str | None) extension: the subtitle extension, default is to match to the subtitle format. + :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :param str language_format: format of the language suffix. Default to 'alpha2'. :return: path of the subtitle. :rtype: str """ if extension is None: extension = FORMAT_TO_EXTENSION.get(self.subtitle_format, '.srt') # type: ignore[arg-type] - return get_subtitle_path(video.name, None if single else self.language, extension=extension) + + suffix = ( + '' + if single + else get_subtitle_suffix( + self.language, + language_format=language_format, + language_type=self.language_type, + language_type_suffix=language_type_suffix, + ) + ) + return get_subtitle_path(video.name, suffix=suffix, extension=extension) def get_matches(self, video: Video) -> set[str]: """Get the matches against the `video`. @@ -513,30 +534,82 @@ def get_subtitle_format( except UnknownFPSError: default_fps = 24 return get_subtitle_format(text, subtitle_format=subtitle_format, fps=default_fps) - except Exception: + except Exception: # pragma: no cover logger.exception('not a valid subtitle.') else: return str(obj.format) - return None + return None # pragma: no cover -def get_subtitle_path(video_path: str | os.PathLike, language: Language | None = None, extension: str = '.srt') -> str: - """Get the subtitle path using the `video_path` and `language`. +def get_subtitle_suffix( + language: Language, + *, + language_format: str = 'alpha2', + language_type: LanguageType = LanguageType.UNKNOWN, + language_type_suffix: bool = False, +) -> str: + """Get the subtitle suffix using the `language` and `language_type`. - :param str video_path: path to the video. :param language: language of the subtitle to put in the path. :type language: :class:`~babelfish.language.Language` + :param str language_format: format of the language suffix. Default to 'alpha2'. + :param LanguageType language_type: the language type of the subtitle (hearing impaired or forced). + :param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False. + :return: suffix to the subtitle name. + :rtype: str + + """ + only_language_formats = ('alpha2', 'alpha3', 'alpha3b', 'alpha3t', 'name') + + # Language part + language_part = '' + if language: + # Defined language, not Language('und') + try: + language_str = getattr(language, language_format) + except AttributeError: # pragma: no cover + logger.warning('cannot convert language %s using scheme: %s', language, language_format) + language_str = str(language) + + language_part = f'.{language_str}' + if language_format in only_language_formats: # pragma: no branch + # Add country and script if present + if language.country is not None: + # add country + language_part += f'-{language.country!s}' + if language.script is not None: + # add script + language_part += f'-{language.script!s}' + + # Language type part + language_type_part = '' + if language_type_suffix: + if language_type == LanguageType.HEARING_IMPAIRED: + language_type_part = '.hi' + elif language_type == LanguageType.FORCED: + language_type_part = '.forced' + + return language_type_part + language_part + + +def get_subtitle_path( + video_path: str | os.PathLike, + suffix: str = '', + extension: str = '.srt', +) -> str: + """Get the subtitle path using the `video_path` and `language`. + + :param str video_path: path to the video. + :param str suffix: suffix with the language of the subtitle to put in the path. :param str extension: extension of the subtitle. :return: path of the subtitle. :rtype: str """ + # Full name and path subtitle_root = os.path.splitext(video_path)[0] - if language: - subtitle_root += '.' + str(language) - - return subtitle_root + extension + return subtitle_root + suffix + extension def find_encoding_with_bom(data: bytes) -> list[str]: diff --git a/tests/test_subtitle.py b/tests/test_subtitle.py index db3d3d23..ae759f8e 100644 --- a/tests/test_subtitle.py +++ b/tests/test_subtitle.py @@ -2,7 +2,7 @@ import pytest from babelfish import Language # type: ignore[import-untyped] -from subliminal.subtitle import Subtitle, fix_line_ending, get_subtitle_path +from subliminal.subtitle import LanguageType, Subtitle, fix_line_ending, get_subtitle_path, get_subtitle_suffix def test_subtitle_text(): @@ -84,12 +84,46 @@ def test_get_subtitle_path(movies): def test_get_subtitle_path_language(movies): video = movies['man_of_steel'] - assert get_subtitle_path(video.name, Language('por', 'BR')) == os.path.splitext(video.name)[0] + '.pt-BR.srt' + suffix = get_subtitle_suffix(Language('por', 'BR')) + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.pt-BR.srt' def test_get_subtitle_path_language_undefined(movies): video = movies['man_of_steel'] - assert get_subtitle_path(video.name, Language('und')) == os.path.splitext(video.name)[0] + '.srt' + suffix = get_subtitle_suffix(Language('und')) + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.srt' + + +def test_get_subtitle_path_hearing_impaired(movies): + video = movies['man_of_steel'] + suffix = get_subtitle_suffix( + Language('deu', 'CH', 'Latn'), + language_type=LanguageType.HEARING_IMPAIRED, + language_type_suffix=True, + ) + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.hi.de-CH-Latn.srt' + + +def test_get_subtitle_path_forced(movies): + video = movies['man_of_steel'] + suffix = get_subtitle_suffix( + Language('srp', None, 'Cyrl'), + language_type=LanguageType.FORCED, + language_type_suffix=True, + ) + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.forced.sr-Cyrl.srt' + + +def test_get_subtitle_path_alpha3(movies): + video = movies['man_of_steel'] + suffix = get_subtitle_suffix(Language('fra', 'CA'), language_format='alpha3') + assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.fra-CA.srt' + + +def test_get_subtitle_path_extension(movies): + video = movies['man_of_steel'] + suffix = get_subtitle_suffix(Language('zho', 'CN'), language_type_suffix=True) + assert get_subtitle_path(video.name, suffix, extension='.sub') == os.path.splitext(video.name)[0] + '.zh-CN.sub' def test_fix_line_ending():