-
Notifications
You must be signed in to change notification settings - Fork 1
/
CourseSpider2.py
293 lines (270 loc) · 11.5 KB
/
CourseSpider2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# -*- coding: utf-8 -*-
"""
@Time : 2023/12/8
@Auth : Schwaze Katze
@File :CourseSpider2.py
@IDE :Pycharm(CE)
@Motto:ABC(Always Be Coding)
"""
import json
from splinter import Browser
from selenium.webdriver.chrome.service import Service
# from selenium.webdriver.common.by import By
import time
import re
import os
def read_cookie():
with open("cookies.json", "r") as f:
cookies = json.load(f) # 将文件字符串转换成python对象
return cookies
def save_cookie(cookies):
with open("cookies.json", "w") as f:
f.write(json.dumps(cookies)) # json.dumps:convert dict into str
f.close()
class Spider_Dean_Office:
def __init__(self, driver='edge', driver_dir=None, user_id=None, user_password=None, course=None):
# System Initiating
if driver_dir: # 添加驱动路径
if driver == 'edge':
self.driver = './edgedriver/' + driver_dir
elif driver == 'chrome':
self.driver = './chromedriver/' + driver_dir
else:
self.driver = './' + driver_dir
else:
self.driver_dir = './edgedriver/msedgedriver.exe'
self.current_path = os.path.dirname(os.path.abspath(__file__))
# Browser Data
if driver == 'edge':
local_service = Service(executable_path=self.driver)
self.browser = Browser(driver_name='edge', service=local_service)
elif driver == 'chrome':
local_service = Service(executable_path=self.driver) # 参考:https://splinter.readthedocs.io/en/latest/drivers/chrome.html
self.browser = Browser(driver_name='chrome', service=local_service)
# Web data
self.url = "http://jwgl.usst.edu.cn/sso/jziotlogin"
if user_id and user_password:
self.user_id = user_id
self.user_password = user_password
else:
self.user_id = ""
self.user_password = ""
if course:
self.courseInfo = course
else:
self.courseInfo = []
def log(self, log, path=None):
"""
Output the Log.
:param log: Type of Log
:param path: Prescribed route
:return:
"""
log_dict = {"Success": "目标课程选上成功", "Lesson_Chosen": "检测到目标课程已选上", "Lesson_Repeated": "检测到重复选课行为"}
if log not in log_dict.keys():
log_content = "Unknown Operation"
else:
log_content = log_dict[log]
if not path:
path = self.current_path + r"./operations.log"
with open(path, "a+") as f:
f.write(time.strftime('[%Y-%m-%d %H:%M:%S] ') + "Operation: " + log_content + "\n")
def quit(self):
time.sleep(5)
self.browser.quit()
def visitUrl(self, url=None):
"""
Enter in the operations.
:return:
"""
if url is None:
default_url = self.url
else:
default_url = url
if self.user_id == '':
self.getPersonInfo()
# print("absolute dir without file name", self.current_path)
# print(self.user_id)
# print(self.user_password)
# """
self.browser.reload()
self.browser.visit(default_url)
self.getLogin()
self.browser.reload()
self.getCourseSelection()
# """
# test
# self.browser.get("file:///E:/School/%E6%95%99%E5%8A%A1%E5%A4%84%E6%95%B0%E6%8D%AE/jwc4.html")
self.concurrent_search()
# if "404" in self.browser.current_url:
# self.browser.get()
# cookies_unlogged = self.browser.get_cookies()
# print("未登录的Cookies:") # 此处获取的是登陆前的cookies
# print(cookies_unlogged)
# time.sleep(60) # 需要在60秒内,手动输入账号密码完成登录才会有cookies信息
# cookies_logged = self.browser.get_cookies()
# print("登录完成后的Coookies:") # 此处获取的是登陆后的cookies
# print(cookies_logged)
# save_cookie(cookies_logged) # 保存登录的cookies
# self.browser.quit()
def getLogin(self):
"""
信息门户登录
:return:
"""
if self.browser.title == '统一身份认证':
self.browser.find_by_xpath('//*[@id="username"]').fill(self.user_id) # input user's ID
self.browser.find_by_xpath('//*[@id="password"]').fill(self.user_password) # input password
time.sleep(1)
self.browser.find_by_xpath('//*[@id="casLoginForm"]/p[5]/button').first.click() # press button to login
time.sleep(1)
else:
pass
def getCourseSelection(self):
"""
切换至选课页面
:return:
"""
self.browser.find_by_css('#cdNav > ul > li:nth-child(3)').click()
self.browser.find_by_css('#cdNav > ul > li.dropdown.open > ul').click()
time.sleep(1)
# switch to the target handle
window = self.browser.windows[0]
self.browser.windows.current = window.next # 切换至下个标签页
# all_handles = self.browser.window_handles
# for handle in all_handles:
# self.browser.switch_to.window(handle)
# if "xsxk" in self.browser.current_url:
# break
# self.browser.find_element(By.LINK_TEXT, ' 自主选课 ').click() # click to select course
# js = 'return document.getElementsByClassName("dropdown-menu")[2].click()'
# self.browser.execute_script(js)
def getLogin_2(self):
"""
Deserted Function
:return:
"""
self.browser.find_by_xpath('//*[@id="yhm"]').send_keys(self.user_id) # input user's ID
self.browser.find_by_xpath('//*[@id="mm"]').send_keys(self.user_password) # input password
time.sleep(1)
if not self.browser.find_by_xpath('//*[@id="agreePolicy"]').is_selected():
self.browser.find_by_xpath('//*[@id="agreePolicy"]').click() # agree the policy
self.browser.find_by_xpath('//*[@id="dl"]').click() # press button to login
time.sleep(1)
def start(self):
"""
Deserted Function
:return:
"""
self.browser.refresh()
self.browser.get(self.url)
self.browser.delete_all_cookies() # delete all cookies
cookies = read_cookie()
for cookie in cookies:
self.browser.add_cookie(cookie) # add cookie
def getPersonInfo(self, path=None):
if not path:
path = r"./PersonInfo.txt"
with open(path, 'r', encoding='utf-8') as fp:
contents = fp.readlines()
self.user_id = re.findall(r"=(.*?)$", contents[0].replace("\n", "").replace(" ", ""))[0]
self.user_password = re.findall(r"=(.*?)$", contents[1].replace("\n", "").replace(" ", ""))[0]
return list((self.user_id, self.user_password))
def getCourseInfo(self, path=None):
if not path:
path = r"./CourseInfo.txt"
with open(path, 'r', encoding='utf-8') as fp:
contents = fp.readlines()
for content in contents:
if content != "\n":
content = content.replace("\n", "")
print(content)
teacher_name = re.findall(r"(.*?)-", content)[0]
lesson_name = re.findall(r"-(.*?)$", content)[0]
self.courseInfo.append(tuple((teacher_name, lesson_name)))
return self.courseInfo
def concurrent_search(self):
"""
Concurrence with simple loop.
:return:
"""
if not self.courseInfo:
self.getCourseInfo() # Load the data
print(self.courseInfo)
courseInfo = self.courseInfo
while True:
courses = courseInfo
if courses:
for (teacher_name, lesson_name) in courses:
Shot = self.send_search(teacher_name=teacher_name, lesson_name=lesson_name)
if Shot:
courseInfo.remove(tuple((teacher_name, lesson_name)))
break
else:
break
def send_search(self, teacher_name, lesson_name):
Shot = False
inputBox = self.browser.find_by_xpath('//*[@id="searchBox"]/div/div[1]/div/div/div/div/input')
# //*[@id="searchBox"]/div/div[1]/div/div/div/div/input
inputBox.clear()
inputBox.fill(teacher_name)
time.sleep(0.1)
self.browser.find_by_xpath('//*[@id="searchBox"]/div/div[1]/div/div/div/div/span/button[1]').first.click()
time.sleep(0.3)
self.browser.find_by_xpath('//*[@id="nav_tab"]/li[2]').first.click() # select from general lessons
time.sleep(0.5)
courseBoxes = self.browser.find_by_css('#contentBox > div.tjxk_list > div.panel.panel-info') # box: one kind of course
for box in courseBoxes:
if lesson_name in box.text: # confirm the lesson name
box.click()
time.sleep(0.3)
courseBars = box.find_by_css('tr[class="body_tr"]') # bar: one specific course
time.sleep(0.3)
for bar in courseBars:
if teacher_name in bar.text: # confirm the teacher name
# print(bar.text)
if '已满' in bar.text:
continue
if '退选' in bar.text:
Shot = True
self.log("Success")
break
# search for location of the button and click it
button_box = bar.find_by_css('td[class="an"]').first
time.sleep(0.1)
btn = button_box.find_by_css('.btn').first
# print(btn.get_attribute('disabled'))
# if btn.get_attribute('disabled') == 'disabled':
# Shot = True
# break
btn.click()
time.sleep(0.1)
# deal with possible alerts
try:
alert = self.browser.find_by_css('.modal-content')
except:
alert = None
time.sleep(0.1)
if alert:
# Todo:写选课时间冲突的问题
if "最多可选" in alert.text: # "一门课程最多可选1个志愿"
print("你已经选了这门课")
alert.find_by_id('btn_ok').first.click()
Shot = True
self.log("Lesson_Chosen")
break
# print(alert.text)
# alert.find_element(By.ID, 'btn_ok').click()
return Shot
if __name__ == '__main__':
spider = Spider_Dean_Office()
spider.visitUrl()
spider.quit()
# Sturzkampf.quit()
# try:
# Sturzkampf.getUrl()
# except Exception as e:
# print(e)
# finally:
# Sturzkampf.quit()
# pass