7
7
import time
8
8
import re
9
9
import os
10
+ import json
10
11
11
12
# 默认配置
12
- template = 'RJ号 [社团] 标题 (声优 )' # 默认命名格式
13
+ template = 'workno [circle] title (cv )' # 默认命名模板
13
14
14
15
RJ_WEBPATH = 'https://www.dlsite.com/maniax/work/=/product_id/'
15
16
RT_WEBPATH = 'https://www.dlsite.com.tw/work/product_id/'
23
24
24
25
25
26
# 避免ERROR: Max retries exceeded with url
26
- requests .adapters .DEFAULT_RETRIES = 5 # 增加重连次数
27
+ requests .adapters .DEFAULT_RETRIES = 5 # 增加重连次数
27
28
s = requests .session ()
28
- s .keep_alive = False # 关闭多余连接
29
- #s.get(url) # 你需要的网址
29
+ s .keep_alive = False # 关闭多余连接
30
+ # s.get(url) # 你需要的网址
30
31
31
32
# 查找母串内所有子串的位置, 查找失败返回-1
32
- def find_all (source ,dest ):
33
- length1 ,length2 = len (source ),len (dest )
33
+
34
+
35
+ def find_all (source , dest ):
36
+ length1 , length2 = len (source ), len (dest )
34
37
dest_list = []
35
38
temp_list = []
36
39
if length1 < length2 :
@@ -47,13 +50,15 @@ def find_all(source,dest):
47
50
if source [x :x + length2 ] != dest :
48
51
#print(" dest != slice")
49
52
temp_list .append (x )
50
- #else:
51
- #print(" dest == slice")
53
+ # else:
54
+ #print(" dest == slice")
52
55
for x in temp_list :
53
56
dest_list .remove (x )
54
57
return dest_list
55
58
56
59
# 从文件夹名称中提取r_code
60
+
61
+
57
62
def get_r_code (originalName , matchCode ):
58
63
index_list = find_all (originalName , matchCode )
59
64
if index_list == - 1 :
@@ -64,49 +69,55 @@ def get_r_code(originalName, matchCode):
64
69
pattern = re .compile ("^" + matchCode + "\d{6}$" )
65
70
if pattern .match (r_code ):
66
71
return r_code .upper ()
67
- return ""
72
+ return ""
73
+
68
74
69
75
def match_rj (rj_code ):
70
- # requests库是一个常用于http请求的模块
76
+ # requests库是一个常用于http请求的模块
71
77
url = RJ_WEBPATH + rj_code
72
78
try :
73
- r = s .get (url , allow_redirects = False , cookies = R_COOKIE ) # allow_redirects=False 禁止重定向
79
+ # allow_redirects=False 禁止重定向
80
+ r = s .get (url , allow_redirects = False , cookies = R_COOKIE )
74
81
# HTTP状态码==200表示请求成功
75
82
if r .status_code != 200 :
76
83
#print(" Status code:", r.status_code, "\nurl:", url)
77
84
return r .status_code , "" , "" , []
78
-
85
+
79
86
# fromstring()在解析xml格式时, 将字符串转换为Element对象, 解析树的根节点
80
87
# 在python中, 对get请求返回的r.content做fromstring()处理, 可以方便进行后续的xpath()定位等
81
88
tree = html .fromstring (r .content )
82
89
title = tree .xpath ('//a[@itemprop="url"]/text()' )[0 ]
83
- circle = tree .xpath ('//span[@itemprop="brand" and @class="maker_name"]/*/text()' )[0 ]
84
- cvList = tree .xpath ('//*[@id="work_outline"]/tr/th[contains(text(), "声優")]/../td/a/text()' )
90
+ circle = tree .xpath (
91
+ '//span[@itemprop="brand" and @class="maker_name"]/*/text()' )[0 ]
92
+ cvList = tree .xpath (
93
+ '//*[@id="work_outline"]/tr/th[contains(text(), "声優")]/../td/a/text()' )
85
94
return 200 , title , circle , cvList
86
95
87
96
except os .error as err :
88
97
text .insert (tk .END , "**请求超时!\n " )
89
98
text .insert (tk .END , " 请检查网络连接\n " )
90
99
return "" , "" , "" , []
91
-
100
+
101
+
92
102
def match_rt (rt_code ):
93
103
url = RT_WEBPATH + rt_code
94
104
try :
95
105
r = s .get (url + '.html' , allow_redirects = False , cookies = R_COOKIE )
96
106
if r .status_code != 200 :
97
107
#print(" Status code:", r.status_code, "\nurl:", url)
98
108
return r .status_code , "" , "" , []
99
-
109
+
100
110
tree = html .fromstring (r .content )
101
111
title = tree .xpath ('//div[@class="works_summary"]/h3/text()' )[0 ]
102
112
circle = tree .xpath ('//a[@class="summary_author"]/text()' )[0 ]
103
113
return 200 , title , circle , []
104
-
114
+
105
115
except os .error as err :
106
116
text .insert (tk .END , "**请求超时!\n " )
107
117
text .insert (tk .END , " 请检查网络连接\n " )
108
118
return "" , "" , "" , []
109
119
120
+
110
121
def nameChange ():
111
122
# askdirectory()文件对话框, 选择目录, 返回目录名
112
123
path = filedialog .askdirectory ()
@@ -121,84 +132,89 @@ def nameChange():
121
132
files = os .listdir (path )
122
133
for file in files :
123
134
# os.path.isdir()用于判断对象是否为一个目录。
124
- if os .path .isdir (os .path .join (path ,file )):
135
+ if os .path .isdir (os .path .join (path , file )):
125
136
# 获取文件夹原始名称
126
137
originalName = file
127
138
# 尝试获取r_code
128
139
r_code = ""
129
- for matchCode in ['RJ' ,'rj' ,'RT' ,'rt' ]:
140
+ for matchCode in ['RJ' , 'rj' , 'RT' , 'rt' ]:
130
141
r_code = get_r_code (originalName , matchCode )
131
142
if r_code :
132
143
break
133
144
# 如果没能提取到r_code
134
145
if r_code == "" :
135
- continue # 跳过该文件夹
146
+ continue # 跳过该文件夹
136
147
else :
137
148
#print('Processing: ' + r_code)
138
149
text .insert (tk .END , 'Processing: ' + r_code + '\n ' )
139
- if r_code [1 ] == "J" :
150
+ if r_code [1 ] == "J" :
140
151
r_status , title , circle , cvList = match_rj (r_code )
141
- elif r_code [1 ] == "T" :
152
+ elif r_code [1 ] == "T" :
142
153
r_status , title , circle , cvList = match_rt (r_code )
143
154
# 如果顺利爬取网页信息
144
155
if r_status == 200 and title and circle :
145
- if var1 .get ():
156
+ if var1 .get ():
146
157
# 删除title中的【.*?】
147
158
title = re .sub (u"\\ 【.*?】" , "" , title )
148
159
149
- new_name = template .replace ("RJ号 " , r_code )
150
- new_name = new_name .replace ("标题 " , title )
151
- new_name = new_name .replace ("社团 " , circle )
152
-
160
+ new_name = template .replace ("workno " , r_code )
161
+ new_name = new_name .replace ("title " , title )
162
+ new_name = new_name .replace ("circle " , circle )
163
+
153
164
cv = ""
154
- if cvList : # 如果cvList非空
155
- for name in cvList :
165
+ if cvList : # 如果cvList非空
166
+ for name in cvList :
156
167
cv += " " + name
157
- new_name = new_name .replace ("声优 " , cv [1 :])
158
- # else:
159
- # new_name = new_name.replace("(声优 )", "")
160
-
161
-
162
- # 将Windows文件名中的非法字符替换
163
- new_name = re .sub (filter , " " , new_name ) # re.sub(pattern, repl, string)
168
+ new_name = new_name .replace ("cv " , cv [1 :])
169
+ else :
170
+ new_name = new_name .replace ("(cv )" , "" )
171
+
172
+ # 将Windows文件名中的非法字符替换
173
+ # re.sub(pattern, repl, string)
174
+ new_name = re .sub (filter , " " , new_name )
164
175
# 尝试重命名
165
176
try :
166
177
# strip() 去掉字符串两边的空格
167
- os .rename (os .path .join (path , originalName ), os .path .join (path , new_name .strip ()))
178
+ os .rename (os .path .join (path , originalName ),
179
+ os .path .join (path , new_name .strip ()))
168
180
except os .error as err :
169
- text .insert (tk .END , "**重命名失败!\n " )
170
- text .insert (tk .END , " " + os .path .join (path , originalName ) + "\n " )
181
+ text .insert (tk .END , "**重命名失败!\n " )
182
+ text .insert (
183
+ tk .END , " " + os .path .join (path , originalName ) + "\n " )
171
184
text .insert (tk .END , " 请检查是否存在重复的名称\n " )
172
185
elif r_status == 404 :
173
186
text .insert (tk .END , "**爬取DLsite过程中出现错误!\n " )
174
187
text .insert (tk .END , " 请检查本作是否已经下架或被收入合集\n " )
175
188
elif r_status != "" :
176
189
text .insert (tk .END , "**爬取DLsite过程中出现错误!\n " )
177
- text .insert (tk .END , " 网页 URL: " + RJ_WEBPATH + r_code + "\n " )
178
- text .insert (tk .END , " HTTP 响应代码: " + str (r_status ) + "\n " )
179
-
180
- time .sleep (0.1 ) #set delay to avoid being blocked from server
181
- #print("~Finished.")
190
+ text .insert (tk .END , " 网页 URL: " +
191
+ RJ_WEBPATH + r_code + "\n " )
192
+ text .insert (tk .END , " HTTP 响应代码: " +
193
+ str (r_status ) + "\n " )
194
+
195
+ # set delay to avoid being blocked from server
196
+ time .sleep (0.1 )
197
+ # print("~Finished.")
182
198
text .insert (tk .END , "*******完成!*******\n \n \n \n " )
183
199
tk .messagebox .showinfo (title = "提示" , message = "完成!" )
184
-
200
+
185
201
cbtn .config (state = tk .NORMAL )
186
202
btn .config (state = tk .NORMAL )
187
203
btn ['text' ] = "选择路径"
188
-
204
+
205
+
189
206
def thread_it (func , * args ):
190
207
'''将函数打包进线程'''
191
208
# 创建
192
- t = threading .Thread (target = func , args = args )
209
+ t = threading .Thread (target = func , args = args )
193
210
# 守护 !!!
194
- t .setDaemon (True )
211
+ t .setDaemon (True )
195
212
# 启动
196
213
t .start ()
197
214
# 阻塞--卡死界面!
198
215
# t.join()
199
216
200
217
201
-
202
218
root = tk .Tk () # 实例化object,建立窗口root
203
219
root .title ('DLsite重命名工具 v1.0' ) # 给窗口的可视化起名字
204
220
root .geometry ('300x375' ) # 设定窗口的大小(横向 * 纵向)
@@ -210,38 +226,37 @@ def thread_it(func, *args):
210
226
# os.path.dirname(__file__) 当前脚本所在路径
211
227
basedir = os .path .abspath (os .path .dirname (__file__ ))
212
228
try :
213
- fname = os .path .join (basedir , '配置文件.txt' )
214
-
215
- with open (fname , 'r' , encoding = 'utf-8' ) as f : # 打开配置文件
216
- lines = f .readlines () # 读取所有行
217
- first_line = lines [0 ] # 取第一行
218
- if first_line != '\n ' : # 第一行非空
219
- if ("RJ号" in first_line ):
220
- template = first_line
221
- text .insert (tk .END , "**使用自定义命名格式:\n " )
229
+ fname = os .path .join (basedir , 'config.json' )
230
+ with open (fname , 'r' ) as f :
231
+ config = json .load (f )
232
+ if config ["template" ]: # 模板非空
233
+ if ("workno" in config ["template" ]):
234
+ template = config ["template" ]
235
+ text .insert (tk .END , "**使用自定义命名模板:\n " )
222
236
text .insert (tk .END , " " + template .strip () + "\n \n " )
223
237
else :
224
- text .insert (tk .END , "**配置文件第一行格式错误\n " )
225
- text .insert (tk .END , " 请修改配置文件\n " )
226
- text .insert (tk .END , " 否则将使用默认命名格式:\n " )
227
- text .insert (tk .END , " RJ号 [社团] 标题 (声优)\n \n " )
238
+ text .insert (tk .END , "**模板格式错误: 模板中必须包含\" workno\" !\n " )
239
+ text .insert (tk .END , " 使用默认命名模板:\n " )
240
+ text .insert (tk .END , " workno [circle] title (cv)\n \n " )
228
241
else :
229
- text .insert (tk .END , "**配置文件第一行为空!\n " )
230
- text .insert (tk .END , " 请修改配置文件\n " )
231
- text .insert (tk .END , " 否则将使用默认命名格式:\n " )
232
- text .insert (tk .END , " RJ号 [社团] 标题 (声优)\n \n " )
233
-
242
+ text .insert (tk .END , "**使用默认命名模板:\n " )
243
+ text .insert (tk .END , " workno [circle] title (cv)\n \n " )
244
+
234
245
except os .error as err :
235
- text .insert (tk .END , "**配置文件缺失!\n " )
236
- text .insert (tk .END , "**将使用默认命名格式:\n " )
237
- text .insert (tk .END , " RJ号 [社团] 标题 (声优)\n " )
246
+ # 生成配置文件
247
+ with open (fname , "w" ) as f :
248
+ json .dump ({'template' : '' }, f , sort_keys = True ,
249
+ indent = 4 , separators = (',' , ': ' ))
250
+ text .insert (tk .END , "**使用默认命名模板:\n " )
251
+ text .insert (tk .END , " workno [circle] title (cv)\n " )
238
252
239
253
var1 = tk .IntVar () # 定义var1整型变量用来存放选择行为返回值
240
- cbtn = tk .Checkbutton (root , text = '去除标题中【】之间的内容' , variable = var1 , onvalue = 1 , offvalue = 0 ) # 传值原理类似于radiobutton部件
254
+ cbtn = tk .Checkbutton (root , text = '去除title中【】之间的内容' , variable = var1 ,
255
+ onvalue = 1 , offvalue = 0 ) # 传值原理类似于radiobutton部件
241
256
242
- btn = tk .Button (root , text = '选择路径' , command = lambda : thread_it (nameChange ))
257
+ btn = tk .Button (root , text = '选择路径' , command = lambda : thread_it (nameChange ))
243
258
244
259
btn .pack ()
245
260
cbtn .pack ()
246
261
247
- root .mainloop ()
262
+ root .mainloop ()
0 commit comments