-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmain.py
278 lines (247 loc) · 7.38 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# -*- encoding: utf-8 -*-
'''
@File : main.py
@Time : 2022/04/21 19:08:04
@Author : Coder-Sakura
@Version : 1.0
@Desc : None
'''
# here put the import lib
import os
import re
import time
import json
from sjk_tool import tool, logger, \
folder, network_connect, default_headers, COURSE_SLEEP, START_PAGE
from sjk_content import SJK_CONTENT
from sjk_video import SJK_VIDEO
from thread_pool import callback
class CourseHandler:
def __init__(self, index, len, course_data) -> None:
self.index = index
self.len = len
self.course_data = course_data
logger.debug(self.course_data)
def __enter__(self):
# 创建课程根目录
self.course_path = folder(self.course_data["course"])
self.cid = self.get_sku()
return self
def __exit__(self,exc_type,exc_value,exc_trackback):
pass
def get_sku(self):
# logger.debug("get_sku")
_headers = default_headers.copy()
_headers["sjk-apikey"] = "tSiXjbGTHnzJno0Fa1ucHJ56QQ1Xw90D"
data = {
"product_id": self.course_data["course_id"]
}
resp = network_connect(
tool.config["sku_api"],
type="POST",
**{"data": data, "headers": _headers, "cookies": tool.config["cookie"]}
)
try:
res = json.loads(resp.text)
except Exception as e:
logger.warning(f"{self.course_data['title']} 获取sku失败")
logger.warning(f"请使用三节课vip会员账号登录 或 检查网络是否异常")
return ""
logger.debug(res)
if res["data"] == None and res["msg"] == "请登录":
tool.config["cookie"] = tool.user_info_handler()
return self.get_sku()
if res:
cid = res["data"]["return_url"].split("cid/")[-1].rsplit("/")[0]
return cid
return ""
# 获取每门课程的章节目录
def course_tree(self):
"""
获取每个章节的数据
:return: <exp> or []
<exp>
[
{
"node_id": node_id,
"title": title,
"children": [
{
"node_id":node_id,
"title":title,
"type":type
},{...}
]
}
]
"""
params = {
"cid": self.cid,
}
resp = network_connect(
tool.config["course_tree_url"],
**{"params": params}
)
res = json.loads(resp.text)
if res["info"] != "OK" or not res["data"]:
return []
tree_info = []
for _ in res["data"]["tree"]:
node_info = {}
node_info["node_id"] = _["node_id"]
node_info["title"] = _["title"]
node_info["children"] = [
{"node_id": children_info["node_id"], "title": children_info["title"],
"type": children_info["type"]} for children_info in _["children"]
]
tree_info.append(node_info)
# logger.debug(f"<tree_info> - {tree_info}")
return tree_info
# 获取章节内容
@logger.catch
def get_section_data(self, node_path, section_data):
"""
:params section_data: 小章节数据
"""
params = {
"cid": self.cid,
"section_id": section_data["node_id"],
}
resp = network_connect(
tool.config["section"],
**{"params": params}
)
res = json.loads(resp.text)
if res["info"] != "OK" or resp.status_code != 200 or not res["data"]:
logger.warning(f"小节:{section_data['title']} 获取内容出错")
logger.debug(resp.text)
return
else:
nodes = res["data"]["nodes"]
# 原生html代码
text_list = [node["content"] if node["content_type"] == 1 else node["content"]["content"] if node["content_type"] == 3 else "" for node in nodes]
# video_id列表
video_id_list = [node["content"]["id"] for node in nodes if node["content_type"] == 2]
# return text_list,video_id_list
logger.debug(f"<len(text_list)> - {len(text_list)} | <len(video_id_list)> - {len(video_id_list)} ")
# 内容处理
if text_list != []:
section_name = re.sub('[\/:*?"<>|]', '_', section_data["title"]).replace("\t","")
docx_path = os.path.join(node_path, f"{section_name}.docx")
SJK_CONTENT().main(section_name, docx_path, text_list)
# tool.pool.put(SJK_CONTENT().main, (section_name, docx_path, text_list, ), callback)
if video_id_list != []:
for _ in video_id_list:
video_data = {}
section_name = re.sub('[\/:*?"<>|]', '_', section_data["title"]).replace("\t","")
video_data["video_path"] = os.path.join(node_path, f"{section_name}.mp4")
video_data["params"] = {"class_id": self.cid, "video_id": _,}
tool.pool.put(SJK_VIDEO().main, (video_data, ), callback)
time.sleep(0.1)
@logger.catch
def main(self):
if self.cid:
logger.debug(f"cid - {self.cid}")
elif not self.cid:
return
tree = self.course_tree()
if not tree:
logger.warning(f"{self.course_data['course']} - Not Course Tree")
logger.info(f"({self.index+1}/{self.len})当前下载课程: 《{self.course_data['title']}》 - 共{len(tree)}章")
for node in tree:
# 创建大章根目录
node_path = folder(node["title"], self.course_path)
# 课程:大章节:仅一小章节
if not node["children"]:
section = {"node_id":node["node_id"], "title":node["title"]}
logger.debug(f"<section> - {section}")
self.get_section_data(node_path, section)
time.sleep(0.5)
else:
for section in node["children"]:
logger.debug(f"<section> - {section}")
self.get_section_data(node_path, section)
# tool.pool.put(self.get_section_data, (node_path, section, ), callback)
time.sleep(0.5)
class Handler:
"""
默认下载'发现课程页面'中第一页的所有课程
vip课程请使用vip账号进行下载,否则将忽略
"""
def __init__(self):
pass
def discoverInfo(self, page=1):
"""
获取<发现课程>页面的课程信息
:paramas page: 页数
:return: <exp> or []
<exp>
[
{
# "href": href, # 课程链接
"course_id": course_id, # 课程id
"title": title, # 课程标题
"nums": nums, # 课程节数
"duration": duration, # 课程时长
"teacher": teacher, # 课程讲师
"course": course # 课程信息汇总
}
...
]
"""
params = {
"sort": "sold_count",
"sort_direction": "desc",
"vip_free_flag": 0,
"page": page,
"per_page": 20,
}
_headers = default_headers.copy()
_headers["sjk-apikey"] = "tSiXjbGTHnzJno0Fa1ucHJ56QQ1Xw90D"
resp = network_connect(
tool.config["discover_url"],
**{"params": params, "headers": _headers}
)
data = json.loads(resp.text)
# logger.debug(f"<json data> - {data}")
course_list = data["data"]["list"]
if data["msg"] != "ok" or not course_list:
logger.warning(f"<course_list> - {course_list} NOT DATA.")
return []
else:
exp = []
for _ in course_list:
# href = _.xpath("""./@href""")[0]
course_id = _["id"]
title = _["title"]
nums = _["section_count"]
duration = _["video_duration"]
teacher = _["teachers"][0]["name"]
course = f"{title}-{course_id}-{teacher}-共{nums}节-{duration}"
_info = {
"course_id":course_id, "title":title, "nums":nums,
"duration":duration, "teacher":teacher, "course":course
}
exp.append(_info)
return exp
def main(self):
page = START_PAGE
while True:
course_list = self.discoverInfo(page=page)
logger.info(f"当前下载第{page}页, 共{len(course_list)}门课程.")
for i, _ in enumerate(course_list):
with CourseHandler(i, len(course_list), _) as h:
try:
h.main()
except Exception as e:
logger.warning(f"{_['title']} - 下载异常出错 - {e}")
logger.info(f"每下载完一门课程,将休眠{COURSE_SLEEP}秒 zzz....")
if i+1 != len(course_list):
time.sleep(COURSE_SLEEP)
if len(course_list) < 20 or not course_list:
break
else:
page += 1
if __name__ == '__main__':
p = Handler()
p.main()