From 2c453b8540d5d5d273573bbf5bd42c9ea3e6cafd Mon Sep 17 00:00:00 2001 From: Zach Stultz Date: Wed, 1 May 2024 09:57:00 -0500 Subject: [PATCH] v2.5.10 --- komga_cover_extractor.py | 368 ++++++++++++++++++++++----------------- 1 file changed, 212 insertions(+), 156 deletions(-) diff --git a/komga_cover_extractor.py b/komga_cover_extractor.py index f04d905..1f6d2b2 100644 --- a/komga_cover_extractor.py +++ b/komga_cover_extractor.py @@ -46,7 +46,7 @@ import settings as settings_file # Version of the script -script_version = (2, 5, 9) +script_version = (2, 5, 10) script_version_text = "v{}.{}.{}".format(*script_version) # Paths = existing library @@ -471,6 +471,8 @@ def __str__(self): # instead of jpg format. output_covers_as_webp = False +series_cover_path = "" + # Folder Class class Folder: @@ -1780,6 +1782,16 @@ def remove_hidden_folders(dirs): return [x for x in dirs if not x.startswith(".")] +# Determines if the string starts with a bracket +def starts_with_bracket(s): + return s.startswith(("(", "[", "{")) + + +# Determines if the string ends with a bracket +def ends_with_bracket(s): + return s.endswith((")", "]", "}")) + + volume_year_regex = r"(\(|\[|\{)(\d{4})(\)|\]|\})" @@ -1806,9 +1818,15 @@ def contains_chapter_keywords(file_name): found = False for pattern in chapter_search_patterns_comp: result = pattern.search(file_name_clean) - if result and not re.search(r"^((\(|\{|\[)\d{4}(\]|\}|\)))$", result.group()): - found = True - break + if result: + result = result.group() + if not ( + starts_with_bracket(result) + and ends_with_bracket(result) + and re.search(r"^((\(|\{|\[)\d{4}(\]|\}|\)))$", result) + ): + found = True + break if not found and not contains_volume_keywords(file_name): # Remove volume year @@ -1837,9 +1855,15 @@ def contains_brackets(s): return bool(brackets_pattern.search(s)) -# Determines if the string starts with a bracket -def starts_with_bracket(s): - return s.startswith(("(", "[", "{")) +# Pre-combiled remove_brackets() patterns +bracket_removal_pattern = re.compile( + r"((((? test .cbz + string = ( + bracket_against_extension_pattern.sub(r"\2", string).strip() + if contains_brackets(string) + else string + ) - # Remove ending bracket against the extension - # EX: test (digital).cbz -> test .cbz - string = re.sub(r"(\[[^\]]*\]|\([^\)]*\)|\{[^}]*\})(\.\w+$)", r"\2", string).strip() + # Remove the extension + # EX: test.cbz -> test + string = string.replace(ext, "").strip() - # Remove any space before the extension from having removed bracketed content - # EX: test .cbz -> test.cbz - string = re.sub(r"\s\.(\w+)$", r".\1", string).strip() + # Re-add the extension + # EX: test -> test.cbz + string = f"{string}{ext}" # Return the modified string return string @@ -2505,7 +2541,7 @@ def get_series_name_from_volume(name, root, test_mode=False, second=False): # Remove "- Complete" from the end # "Series Name - Complete" -> "Series Name" # EX File: Series Name - Complete v01 [Premium] [Publisher].epub - if "complete" in name.lower(): + if name.lower().endswith("complete"): name = re.sub(r"(-|:)\s*Complete$", "", name, flags=re.IGNORECASE).strip() # Default to the root folder name if we have nothing left @@ -2553,7 +2589,7 @@ def chapter_file_name_cleaning( # Remove number and dash at the end # EX: "Series Name 54 -" -> "Series Name" - if regex_matched != 0: + if regex_matched != 0 and file_name.endswith("-"): file_name = re.sub( r"(#)?([0-9]+)([-_.][0-9]+)*((x|#)([0-9]+)([-_.][0-9]+)*)*\s*-$", "", @@ -2562,7 +2598,7 @@ def chapter_file_name_cleaning( # Remove - at the end of the file_name # EX: " One Piece -" -> "One Piece" - if "-" in file_name: + if file_name.endswith("-"): file_name = re.sub(r"(? "'. 031 (2023)"" -> "031 (2023)" + if "." in name: + name = re.sub(r"^\s*(\.)", "", name, re.IGNORECASE).strip() + + # Remove any subtitle + # EX: "series_name 179.1 - Epilogue 01 (2023) (Digital) (release_group).cbz" -> + # "" 179.1 - Epilogue 01" -> "179.1" + if ("-" in name or ":" in name) and re.search(r"(^\d+)", name.strip()): + name = re.sub(r"((\s+(-)|:)\s+).*$", "", name, re.IGNORECASE).strip() + + # Removes # from the number + # EX: #001 -> 001 + if "#" in name: + name = re.sub(r"($#)", "", name, re.IGNORECASE).strip() + + # Removes # from bewteen the numbers + # EX: 154#3 -> 154 + if re.search(r"(\d+#\d+)", name): + name = re.sub(r"((#)([0-9]+)(([-_.])([0-9]+)|)+)", "", name).strip() + + # removes part from chapter number + # EX: 053x1 or c053x1 -> 053 or c053 + if "x" in name: + name = re.sub(r"(x[0-9]+)", "", name, re.IGNORECASE).strip() + + # removes the bracketed info from the end of the string, empty or not + if contains_brackets(name): + name = remove_brackets(name).strip() + + # Removes the - characters.extension from the end of the string, with + # the dash and characters being optional + # EX: - prologue.extension or .extension + name = re.sub( + r"(((\s+)?-(\s+)?([A-Za-z]+))?(%s))" % file_extensions_regex, + "", + name, + re.IGNORECASE, + ).strip() + + if "-" in name: + # - #404 - -> #404 + if name.startswith("- "): + name = name[1:].strip() + if name.endswith(" -"): + name = name[:-1].strip() + + # remove # at the beginning of the string + # EX: #001 -> 001 + if name.startswith("#"): + name = name[1:].strip() + + return name + results = [] is_multi_volume = False keywords = volume_regex_keywords if not chapter else chapter_regex_keywords @@ -2778,27 +2899,14 @@ def get_release_number(file, chapter=False): check_for_multi_volume_file(file, chapter=chapter) if "-" in file else False ) - if not chapter: - result = re.search( - r"\b({})((\.)|)(\s+)?([0-9]+)(([-_.])([0-9]+)|)+\b".format(keywords), - file, - re.IGNORECASE, - ) - else: + if not chapter: # Search for a volume number + result = volume_number_search_pattern.search(file) + else: # Prep for a chapter search if has_multiple_numbers(file): - if re.search( - r"((%s)(\.)?(\s+)?(#)?(([0-9]+)(([-_.])([0-9]+)|)+))$" - % exclusion_keywords_joined, - re.sub(r"(%s)" % file_extensions_regex, "", file), - re.IGNORECASE, - ): - file = re.sub( - r"((%s)(\.)?(\s+)?(#)?(([0-9]+)(([-_.])([0-9]+)|)+))$" - % exclusion_keywords_joined, - "", - re.sub(r"(%s)" % file_extensions_regex, "", file), - re.IGNORECASE, - ).strip() + extension_less_file = get_extensionless_name(file) + + if chapter_number_search_pattern.search(extension_less_file): + file = chapter_number_search_pattern.sub("", extension_less_file) # remove - at the end of the string if file.endswith("-") and not re.search( @@ -2806,94 +2914,46 @@ def get_release_number(file, chapter=False): ): file = file[:-1].strip() - # With a chapter keyword, without, but before bracketed info, or without and with a manga extension or a novel exteion after the number - # Series Name c001.extension or Series Name 001 (2021) (Digital) (Release).extension or Series Name 001.extension - for pattern in chapter_search_patterns_comp: - search_result = pattern.search(file) - if search_result: - result = search_result - break + # Search for a chapter match + result = next( + ( + r + for pattern in chapter_search_patterns_comp + if (r := pattern.search(file)) + ), + None, + ) if result: try: file = result.group().strip() if hasattr(result, "group") else "" - if chapter: - # Removes starting period - # EX: "series_name. 031 (2023).cbz" -> "'. 031 (2023)"" -> "031 (2023)" - if "." in file: - file = re.sub(r"^\s*(\.)", "", file, re.IGNORECASE).strip() - - # Remove any subtitle - # EX: "series_name 179.1 - Epilogue 01 (2023) (Digital) (release_group).cbz" -> - # "" 179.1 - Epilogue 01" -> "179.1" - if ("-" in file or ":" in file) and re.search(r"(^\d+)", file.strip()): - file = re.sub( - r"((\s+(-)|:)\s+).*$", "", file, re.IGNORECASE - ).strip() - - # Removes # from the number - # EX: #001 -> 001 - if "#" in file: - file = re.sub(r"($#)", "", file, re.IGNORECASE).strip() - # Removes # from bewteen the numbers - # EX: 154#3 -> 154 - if re.search(r"(\d+#\d+)", file): - file = re.sub( - r"((#)([0-9]+)(([-_.])([0-9]+)|)+)", "", file - ).strip() - - # removes part from chapter number - # EX: 053x1 or c053x1 -> 053 or c053 - if "x" in file: - file = re.sub(r"(x[0-9]+)", "", file, re.IGNORECASE).strip() - - # removes the bracketed info from the end of the string, empty or not - if contains_brackets(file): - file = remove_brackets(file).strip() + # Clean the series name + if chapter: + file = clean_series_name(file) - # Removes the - characters.extension from the end of the string, with - # the dash and characters being optional - # EX: - prologue.extension or .extension + # Remove volume/chapter keywords from the file name + if contains_non_numeric(file): file = re.sub( - r"(((\s+)?-(\s+)?([A-Za-z]+))?(%s))" % file_extensions_regex, + r"\b({})(\.|)([-_. ])?".format(keywords), "", file, - re.IGNORECASE, + flags=re.IGNORECASE, ).strip() - if "-" in file: - # - #404 - -> #404 - if file.startswith("- "): - file = file[1:].strip() - if file.endswith(" -"): - file = file[:-1].strip() - - # remove # at the beginning of the string - # EX: #001 -> 001 - if file.startswith("#"): - file = file[1:].strip() - - file = re.sub( - r"\b({})(\.|)([-_. ])?".format(keywords), - "", - file, - flags=re.IGNORECASE, - ).strip() - - if re.search( - r"\b[0-9]+({})[0-9]+\b".format(keywords), - file, - re.IGNORECASE, - ): - file = ( - re.sub( - r"({})".format(keywords), - ".", - file, - flags=re.IGNORECASE, - ) - ).strip() + if contains_non_numeric(file) and re.search( + r"\b[0-9]+({})[0-9]+\b".format(keywords), + file, + re.IGNORECASE, + ): + file = ( + re.sub( + r"({})".format(keywords), + ".", + file, + flags=re.IGNORECASE, + ) + ).strip() try: if is_multi_volume or ( @@ -2904,6 +2964,7 @@ def get_release_number(file, chapter=False): ): if not is_multi_volume: is_multi_volume = True + multi_numbers = get_min_and_max_numbers(file) if multi_numbers: results.extend( @@ -2921,6 +2982,7 @@ def get_release_number(file, chapter=False): if file.endswith("0") and ".0" in file: file = file.split(".0")[0] results = int(file) if float(file).is_integer() else float(file) + except ValueError as v: send_message(f"Not a float: {file}: ERROR: {v}", error=True) except AttributeError: @@ -2933,10 +2995,8 @@ def get_release_number(file, chapter=False): return results elif results < 2000: return results - else: - return "" - else: - return "" + + return "" # Allows get_release_number() to use a cache @@ -4455,7 +4515,7 @@ def remove_dual_space(s): if " " not in s: return s - return re.sub(dual_space_pattern, " ", s) + return dual_space_pattern.sub(" ", s) # Removes common words to improve string matching accuracy between a series_name @@ -4555,10 +4615,8 @@ def normalize_str( words_to_remove.extend(storefront_keywords) for word in words_to_remove: - if word in type_keywords: - s = re.sub(rf"{word}\s", " ", s, flags=re.IGNORECASE).strip() - else: - s = re.sub(rf"\b{word}\b", " ", s, flags=re.IGNORECASE).strip() + pattern = rf"\b{word}\b" if word not in type_keywords else rf"{word}\s" + s = re.sub(pattern, " ", s, flags=re.IGNORECASE).strip() s = remove_dual_space(s) @@ -5565,10 +5623,7 @@ def organize_by_first_letter(array_list, string, position_to_insert_at, exclude= items_to_move = [] for item in array_list: - if item == exclude: - continue - - if item == array_list[position_to_insert_at]: + if item in [exclude, array_list[position_to_insert_at]]: continue first_letter_of_dir = os.path.basename(item)[0].lower() @@ -7338,7 +7393,16 @@ def rename_files( not result and file.file_type == "chapter" and ( - has_one_set_of_numbers( + ( + file.volume_number + and ( + extract_all_numbers( + file.name, subtitle=file.subtitle + ).count(file.volume_number) + == 1 + ) + ) + or has_one_set_of_numbers( remove_brackets( re.sub( r"((\[[^\]]*\]|\([^\)]*\)|\{[^}]*\})\s?){2,}.*", @@ -7356,15 +7420,6 @@ def rename_files( file=file, subtitle=file.subtitle, ) - or ( - file.volume_number - and ( - extract_all_numbers( - file.name, subtitle=file.subtitle - ).count(file.volume_number) - == 1 - ) - ) ) ): full_chapter_match_attempt_allowed = True @@ -8204,6 +8259,7 @@ def print_execution_time(start_time, function_name): # Extracts the covers out from our manga and novel files. def extract_covers(paths_to_process=paths): global checked_series, root_modification_times + global series_cover_path # Finds the series cover image in the given folder def find_series_cover(folder_accessor, image_extensions): @@ -8446,7 +8502,6 @@ def contains_multiple_volume_ones( has_multiple_volume_ones, highest_index_number, is_chapter_directory, - series_cover_path, volume_paths, clean_basename, same_series_name, @@ -8502,13 +8557,13 @@ def process_cover_extraction( has_multiple_volume_ones, highest_index_number, is_chapter_directory, - series_cover_path, volume_paths, clean_basename, same_series_name, contains_subfolders, ): - global image_count + global image_count, series_cover_path + update_stats(file) # Gets the first word in the string. @@ -9214,7 +9269,8 @@ def get_subtitle_from_title(file, publisher=None): # remove the file extension, using file.extension # EX: series_name c001 (2021) (Digital) - Instincts.cbz -> Instincts.cbz - subtitle = re.sub(rf"\{file.extension}$", "", subtitle).strip() + if subtitle.endswith(file.extension): + subtitle = get_extensionless_name(subtitle) if not publisher_search: # remove everything to the right of the release year/digital