-
Notifications
You must be signed in to change notification settings - Fork 0
/
nonSubjectCrawler.py
44 lines (37 loc) · 1.52 KB
/
nonSubjectCrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import psycopg2
import re
from dotenv import load_dotenv
import os
import time
load_dotenv()
class NonSubjectCrawler:
def crawl():
db = psycopg2.connect(host=os.environ.get('DB_ADDRESS'), dbname=os.environ.get('DB_NAME'),user=os.environ.get('DB_USER'),password=os.environ.get('DB_PASSWORD'),port=os.environ.get('DB_PORT'))
cursor = db.cursor()
driver = webdriver.Chrome()
driver.get("https://edu.inje.ac.kr/AllUsers/PreProgramList.aspx")
_class = driver.find_element(By.CLASS_NAME, "tab-list")
classes = _class.find_elements(By.TAG_NAME, "li")
cursor.execute('DELETE FROM university_nonsubjects')
db.commit()
time.sleep(1)
for idx, i in enumerate(classes):
classA = i.find_element(By.CLASS_NAME, "tit").text.split('\n')
classB = i.find_element(By.CLASS_NAME, "btn-group")
classBB = str(classB.find_element(By.CLASS_NAME, "btn-info").get_attribute("onclick"))
datas = classA + [re.search("\('(.*?)'", classBB).group(1)]
lastData = {
'id': idx,
'title': datas[2],
'content_url': datas[4],
'end_at': datas[3].split(' ~ ')[1],
}
cursor.execute('INSERT INTO university_nonsubjects (id, title, content_url, end_at) VALUES (%s, %s, %s, %s)', (lastData['id'], lastData['title'], lastData['content_url'], lastData['end_at']))
print()
db.commit()
cursor.close()
db.close()
driver.close()