Skip to content

Commit

Permalink
Add two options for language name style in saved subtitles (#1155)
Browse files Browse the repository at this point in the history
* add an option for language name style in saved subtitles

* fix mypy
  • Loading branch information
getzze committed Sep 9, 2024
1 parent dcbae82 commit 92a4022
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 16 deletions.
2 changes: 2 additions & 0 deletions changelog.d/1022.change.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add an option to change the style of the language suffix of saved subtitles.
Allow adding the language type, hi or forced.
23 changes: 22 additions & 1 deletion subliminal/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,17 @@ def cache(ctx: click.Context, clear_subliminal: bool) -> None:
default=0,
help='Minimum score for a subtitle to be downloaded (0 to 100).',
)
@click.option(
'--language-type-suffix',
is_flag=True,
default=False,
help='Add a suffix to the saved subtitle name to indicate a hearing impaired or foreign part subtitle.',
)
@click.option(
'--language-format',
default='alpha2',
help='Format of the language code in the saved subtitle name. Default is a 2-letter language code.',
)
@click.option('-w', '--max-workers', type=click.IntRange(1, 50), default=None, help='Maximum number of threads to use.')
@click.option(
'-z/-Z',
Expand Down Expand Up @@ -459,6 +470,8 @@ def download(
force: bool,
hearing_impaired: bool,
min_score: int,
language_type_suffix: bool,
language_format: str,
max_workers: int,
archives: bool,
name: str | None,
Expand Down Expand Up @@ -651,7 +664,15 @@ def download(
# save subtitles
total_subtitles = 0
for v, subtitles in downloaded_subtitles.items():
saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding)
saved_subtitles = save_subtitles(
v,
subtitles,
single=single,
directory=directory,
encoding=encoding,
language_type_suffix=language_type_suffix,
language_format=language_format,
)
total_subtitles += len(saved_subtitles)

if verbose > 0:
Expand Down
12 changes: 11 additions & 1 deletion subliminal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,8 @@ def save_subtitles(
directory: str | os.PathLike | None = None,
encoding: str | None = None,
extension: str | None = None,
language_type_suffix: bool = False,
language_format: str = 'alpha2',
) -> list[Subtitle]:
"""Save subtitles on filesystem.
Expand All @@ -855,6 +857,8 @@ def save_subtitles(
:param str directory: path to directory where to save the subtitles, default is next to the video.
:param str encoding: encoding in which to save the subtitles, default is to keep original encoding.
:param (str | None) extension: the subtitle extension, default is to match to the subtitle format.
:param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False.
:param str language_format: format of the language suffix. Default to 'alpha2'.
:return: the saved subtitles
:rtype: list of :class:`~subliminal.subtitle.Subtitle`
Expand All @@ -872,7 +876,13 @@ def save_subtitles(
continue

# create subtitle path
subtitle_path = subtitle.get_path(video, single=single, extension=extension)
subtitle_path = subtitle.get_path(
video,
single=single,
extension=extension,
language_type_suffix=language_type_suffix,
language_format=language_format,
)
if directory is not None:
subtitle_path = os.path.join(directory, os.path.split(subtitle_path)[1])

Expand Down
95 changes: 84 additions & 11 deletions subliminal/subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,20 +425,41 @@ def guess_encoding(self) -> str | None:

return encoding_or_none

def get_path(self, video: Video, *, single: bool = False, extension: str | None = None) -> str:
def get_path(
self,
video: Video,
*,
single: bool = False,
extension: str | None = None,
language_type_suffix: bool = False,
language_format: str = 'alpha2',
) -> str:
"""Get the subtitle path using the `video`, `language` and `extension`.
:param video: path to the video.
:type video: :class:`~subliminal.video.Video`
:param bool single: save a single subtitle, default is to save one subtitle per language.
:param (str | None) extension: the subtitle extension, default is to match to the subtitle format.
:param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False.
:param str language_format: format of the language suffix. Default to 'alpha2'.
:return: path of the subtitle.
:rtype: str
"""
if extension is None:
extension = FORMAT_TO_EXTENSION.get(self.subtitle_format, '.srt') # type: ignore[arg-type]
return get_subtitle_path(video.name, None if single else self.language, extension=extension)

suffix = (
''
if single
else get_subtitle_suffix(
self.language,
language_format=language_format,
language_type=self.language_type,
language_type_suffix=language_type_suffix,
)
)
return get_subtitle_path(video.name, suffix=suffix, extension=extension)

def get_matches(self, video: Video) -> set[str]:
"""Get the matches against the `video`.
Expand Down Expand Up @@ -513,30 +534,82 @@ def get_subtitle_format(
except UnknownFPSError:
default_fps = 24
return get_subtitle_format(text, subtitle_format=subtitle_format, fps=default_fps)
except Exception:
except Exception: # pragma: no cover
logger.exception('not a valid subtitle.')
else:
return str(obj.format)
return None
return None # pragma: no cover


def get_subtitle_path(video_path: str | os.PathLike, language: Language | None = None, extension: str = '.srt') -> str:
"""Get the subtitle path using the `video_path` and `language`.
def get_subtitle_suffix(
language: Language,
*,
language_format: str = 'alpha2',
language_type: LanguageType = LanguageType.UNKNOWN,
language_type_suffix: bool = False,
) -> str:
"""Get the subtitle suffix using the `language` and `language_type`.
:param str video_path: path to the video.
:param language: language of the subtitle to put in the path.
:type language: :class:`~babelfish.language.Language`
:param str language_format: format of the language suffix. Default to 'alpha2'.
:param LanguageType language_type: the language type of the subtitle (hearing impaired or forced).
:param bool language_type_suffix: add a suffix 'hi' or 'forced' if needed. Default to False.
:return: suffix to the subtitle name.
:rtype: str
"""
only_language_formats = ('alpha2', 'alpha3', 'alpha3b', 'alpha3t', 'name')

# Language part
language_part = ''
if language:
# Defined language, not Language('und')
try:
language_str = getattr(language, language_format)
except AttributeError: # pragma: no cover
logger.warning('cannot convert language %s using scheme: %s', language, language_format)
language_str = str(language)

language_part = f'.{language_str}'
if language_format in only_language_formats: # pragma: no branch
# Add country and script if present
if language.country is not None:
# add country
language_part += f'-{language.country!s}'
if language.script is not None:
# add script
language_part += f'-{language.script!s}'

# Language type part
language_type_part = ''
if language_type_suffix:
if language_type == LanguageType.HEARING_IMPAIRED:
language_type_part = '.hi'
elif language_type == LanguageType.FORCED:
language_type_part = '.forced'

return language_type_part + language_part


def get_subtitle_path(
video_path: str | os.PathLike,
suffix: str = '',
extension: str = '.srt',
) -> str:
"""Get the subtitle path using the `video_path` and `language`.
:param str video_path: path to the video.
:param str suffix: suffix with the language of the subtitle to put in the path.
:param str extension: extension of the subtitle.
:return: path of the subtitle.
:rtype: str
"""
# Full name and path
subtitle_root = os.path.splitext(video_path)[0]

if language:
subtitle_root += '.' + str(language)

return subtitle_root + extension
return subtitle_root + suffix + extension


def find_encoding_with_bom(data: bytes) -> list[str]:
Expand Down
40 changes: 37 additions & 3 deletions tests/test_subtitle.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest
from babelfish import Language # type: ignore[import-untyped]
from subliminal.subtitle import Subtitle, fix_line_ending, get_subtitle_path
from subliminal.subtitle import LanguageType, Subtitle, fix_line_ending, get_subtitle_path, get_subtitle_suffix


def test_subtitle_text():
Expand Down Expand Up @@ -84,12 +84,46 @@ def test_get_subtitle_path(movies):

def test_get_subtitle_path_language(movies):
video = movies['man_of_steel']
assert get_subtitle_path(video.name, Language('por', 'BR')) == os.path.splitext(video.name)[0] + '.pt-BR.srt'
suffix = get_subtitle_suffix(Language('por', 'BR'))
assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.pt-BR.srt'


def test_get_subtitle_path_language_undefined(movies):
video = movies['man_of_steel']
assert get_subtitle_path(video.name, Language('und')) == os.path.splitext(video.name)[0] + '.srt'
suffix = get_subtitle_suffix(Language('und'))
assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.srt'


def test_get_subtitle_path_hearing_impaired(movies):
video = movies['man_of_steel']
suffix = get_subtitle_suffix(
Language('deu', 'CH', 'Latn'),
language_type=LanguageType.HEARING_IMPAIRED,
language_type_suffix=True,
)
assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.hi.de-CH-Latn.srt'


def test_get_subtitle_path_forced(movies):
video = movies['man_of_steel']
suffix = get_subtitle_suffix(
Language('srp', None, 'Cyrl'),
language_type=LanguageType.FORCED,
language_type_suffix=True,
)
assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.forced.sr-Cyrl.srt'


def test_get_subtitle_path_alpha3(movies):
video = movies['man_of_steel']
suffix = get_subtitle_suffix(Language('fra', 'CA'), language_format='alpha3')
assert get_subtitle_path(video.name, suffix) == os.path.splitext(video.name)[0] + '.fra-CA.srt'


def test_get_subtitle_path_extension(movies):
video = movies['man_of_steel']
suffix = get_subtitle_suffix(Language('zho', 'CN'), language_type_suffix=True)
assert get_subtitle_path(video.name, suffix, extension='.sub') == os.path.splitext(video.name)[0] + '.zh-CN.sub'


def test_fix_line_ending():
Expand Down

0 comments on commit 92a4022

Please sign in to comment.