Skip to content

Commit

Permalink
v2.5.12: 更新禁漫APP v1.7.0的最新APP域名; 新增插件【删除重复文件】(#244); 优化代码. (#245)
Browse files Browse the repository at this point in the history
  • Loading branch information
hect0x7 authored May 27, 2024
1 parent e0652a9 commit 88ad684
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 25 deletions.
11 changes: 11 additions & 0 deletions assets/docs/sources/option_file_syntax.md
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,17 @@ plugins:

zip_dir: D:/jmcomic/zip/ # 压缩文件存放的文件夹
delete_original_file: true # 压缩成功后,删除所有原文件和文件夹

# 删除重复文件插件
# 参考 → [https://github.com/hect0x7/JMComic-Crawler-Python/issues/244]
- plugin: delete_duplicated_files
kwargs:
# limit: 必填,表示对md5出现次数的限制
limit: 3
# 如果文件的md5的出现次数 >= limit,是否要删除
# 如果delete_original_file不配置,此插件只会打印信息,不会执行其他操作
# 如果limit=1, delete_original_file=true 效果会是删除所有文件
delete_original_file: true

- plugin: send_qq_email # 发送qq邮件插件
kwargs:
Expand Down
2 changes: 1 addition & 1 deletion assets/option/option_workflow_download.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ plugins:
msg_to: ${EMAIL_TO}
password: ${EMAIL_PASS}
title: ${EMAIL_TITLE}
content: ${EMAIL_CONTENT}
content: ${EMAIL_CONTENT}
2 changes: 1 addition & 1 deletion src/jmcomic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# 被依赖方 <--- 使用方
# config <--- entity <--- toolkit <--- client <--- option <--- downloader

__version__ = '2.5.11'
__version__ = '2.5.12'

from .api import *
from .jm_plugin import *
Expand Down
2 changes: 1 addition & 1 deletion src/jmcomic/jm_client_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,6 @@ def is_given_type(self, ctype: Type['JmcomicClient']) -> bool:
"""
if isinstance(self, ctype):
return True
if self.client_key == instance.client_key:
if self.client_key == ctype.client_key:
return True
return False
7 changes: 5 additions & 2 deletions src/jmcomic/jm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,14 +109,17 @@ class JmModuleConfig:
DOMAIN_IMAGE_LIST = str_to_list('''
cdn-msp.jmapinodeudzn.net
cdn-msp2.jmapinodeudzn.net
cdn-msp2.jmapiproxy3.cc
cdn-msp3.jmapinodeudzn.net
''')

# 移动端API域名
DOMAIN_API_LIST = str_to_list('''
www.jmapinodeudzn.xyz
www.jmapinode.vip
www.jmapinode.biz
www.cdn-eldenringproxy.xyz
www.cdn-eldenringproxy.me
www.cdn-eldenringproxy.vip
www.jmapinode.xyz
''')

Expand Down
43 changes: 25 additions & 18 deletions src/jmcomic/jm_option.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,9 @@ class DirRule:

Detail = Union[JmAlbumDetail, JmPhotoDetail, None]
RuleFunc = Callable[[Detail], str]
RuleSolver = Tuple[int, RuleFunc, str]
RuleSolver = Tuple[str, RuleFunc, str]
RuleSolverList = List[RuleSolver]

rule_solver_cache: Dict[str, RuleSolver] = {}

def __init__(self, rule: str, base_dir=None):
base_dir = JmcomicText.parse_to_abspath(base_dir)
self.base_dir = base_dir
Expand All @@ -100,6 +98,25 @@ def decide_image_save_dir(self,

return fix_filepath('/'.join(path_ls), is_dir=True)

def decide_album_root_dir(self, album: JmAlbumDetail) -> str:
path_ls = []
for solver in self.solver_list:
key, _, rule = solver

if key != 'Bd' and key != 'A':
continue

try:
ret = self.apply_rule_solver(album, None, solver)
except BaseException as e:
# noinspection PyUnboundLocalVariable
jm_log('dir_rule', f'路径规则"{rule}"的解析出错: {e}, album={album}')
raise e

path_ls.append(str(ret))

return fix_filepath('/'.join(path_ls), is_dir=True)

def get_role_solver_list(self, rule_dsl: str, base_dir: str) -> RuleSolverList:
"""
解析下载路径dsl,得到一个路径规则解析列表
Expand All @@ -111,7 +128,7 @@ def get_role_solver_list(self, rule_dsl: str, base_dir: str) -> RuleSolverList:
for rule in rule_list:
rule = rule.strip()
if rule == 'Bd':
solver_ls.append((0, lambda _: base_dir, 'Bd'))
solver_ls.append(('Bd', lambda _: base_dir, 'Bd'))
continue

rule_solver = self.get_rule_solver(rule)
Expand All @@ -137,24 +154,14 @@ def split_rule_dsl(self, rule_dsl: str) -> List[str]:

@classmethod
def get_rule_solver(cls, rule: str) -> Optional[RuleSolver]:
# 查找缓存
if rule in cls.rule_solver_cache:
return cls.rule_solver_cache[rule]

# 检查dsl
if not rule.startswith(('A', 'P')):
return None

# Axxx or Pyyy
key = 1 if rule[0] == 'A' else 2

def solve_func(detail):
return fix_windir_name(str(DetailEntity.get_dirname(detail, rule[1:])))

# 保存缓存
rule_solver = (key, solve_func, rule)
cls.rule_solver_cache[rule] = rule_solver
return rule_solver
return rule[0], solve_func, rule

@classmethod
def apply_rule_solver(cls, album, photo, rule_solver: RuleSolver) -> str:
Expand All @@ -168,11 +175,11 @@ def apply_rule_solver(cls, album, photo, rule_solver: RuleSolver) -> str:
"""

def choose_detail(key):
if key == 0:
if key == 'Bd':
return None
if key == 1:
if key == 'A':
return album
if key == 2:
if key == 'P':
return photo

key, func, _ = rule_solver
Expand Down
59 changes: 59 additions & 0 deletions src/jmcomic/jm_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,3 +1035,62 @@ def try_mark_photo_skip_and_log(self, photo: JmPhotoDetail, at_least_image_count
@field_cache() # 单例
def build(cls, option: JmOption) -> 'JmOptionPlugin':
return super().build(option)


class DeleteDuplicatedFilesPlugin(JmOptionPlugin):
"""
https://github.com/hect0x7/JMComic-Crawler-Python/issues/244
"""
plugin_key = 'delete_duplicated_files'

@classmethod
def calculate_md5(cls, file_path):
import hashlib

"""计算文件的MD5哈希值"""
hash_md5 = hashlib.md5()
with open(file_path, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()

@classmethod
def find_duplicate_files(cls, root_folder):
"""递归读取文件夹下所有文件并计算MD5出现次数"""
import os
from collections import defaultdict
md5_dict = defaultdict(list)

for root, _, files in os.walk(root_folder):
for file in files:
file_path = os.path.join(root, file)
file_md5 = cls.calculate_md5(file_path)
md5_dict[file_md5].append(file_path)

return md5_dict

def invoke(self,
limit,
album=None,
downloader=None,
delete_original_file=True,
**kwargs,
) -> None:
if album is None:
return

self.delete_original_file = delete_original_file
# 获取到下载本子所在根目录
root_folder = self.option.dir_rule.decide_album_root_dir(album)
self.find_duplicated_files_and_delete(limit, root_folder, album)

def find_duplicated_files_and_delete(self, limit: int, root_folder: str, album: Optional[JmAlbumDetail] = None):
md5_dict = self.find_duplicate_files(root_folder)
# 打印MD5出现次数大于等于limit的文件
for md5, paths in md5_dict.items():
if len(paths) >= limit:
prefix = '' if album is None else f'({album.album_id}) '
message = [prefix + f'MD5: {md5} 出现次数: {len(paths)}'] + \
[f' {path}' for path in paths]
self.log('\n'.join(message))
self.execute_deletion(paths)
4 changes: 2 additions & 2 deletions src/jmcomic/jm_toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,7 +707,7 @@ def save_resp_img(cls, resp: Any, filepath: str, need_convert=True):
如果需要改变图片的文件格式,比如 .jpg → .png,则需要指定参数 neet_convert=True.
如果不需要改变图片的文件格式,使用 need_convert=False,可以跳过PIL解析图片,效率更高.
:param resp: HTTP响应对象
:param resp: JmImageResp
:param filepath: 图片文件路径
:param need_convert: 是否转换图片
"""
Expand Down Expand Up @@ -746,7 +746,7 @@ def decode_and_save(cls,

# 无需解密,直接保存
if num == 0:
img_src.save(decoded_save_path)
cls.save_image(img_src, decoded_save_path)
return

import math
Expand Down

0 comments on commit 88ad684

Please sign in to comment.