-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMainfunc.py
78 lines (69 loc) · 2.49 KB
/
Mainfunc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import requests
import bs4
def get_soup(url):
res = requests.get(url)
res.raise_for_status() #エラーチェック
soup = bs4.BeautifulSoup(res.text,"lxml")
return soup
def rename_url(url):
return 'https://websunday.net/' + url
"""
def remove_unneslet(s):
replacements = {
'\r\n' :'',
' ' :'',
' ' :''
}
print('{}'.format(''.join(map(re.escape, replacements.keys()))))
return re.sub('{}'.format(''.join(map(re.escape, replacements.keys()))), lambda m: replacements[m.group()], s)
"""
def remove_unneslet(s):
#print(s)
s = s.replace('\n\n','').replace('\r\n','').replace(' ','').replace('\n','').replace('\u7c31','').replace('\U00020bb7','').replace('\u2661','').replace('\U000e0100','').replace('\u3000','')
return s
def get_title(case_detail):
case_title = case_detail.find("h1").get_text()
#print(f'case_title:{case_title}')
case_title = remove_unneslet(case_title)
return case_title
def get_files(case_detail):
case_files = case_detail.find("div",attrs={"class","file"}).find_all("li")
rtu = []
for file in case_files:
#print(file.get_text())
f=remove_unneslet(file.get_text())
#print(f)
file_idx = f.find('「')
file_volume = f.find('巻')
title = f[file_idx:]
volume = f[1:file_volume]
index = f[file_volume+6:file_idx]
rtu.append({'Volume':volume,'Index':index,'Title':title})
return rtu
def get_mchara(case_detail):
case_mchara = case_detail.find("div",attrs={"class","mchara"}).find_all("li")
rtu = []
for chara in case_mchara:
c = remove_unneslet(chara.get_text())
#print(list(c))
rtu.append(c)
#print(rtu)
return rtu
def get_venue(case_detail):
case_venue = case_detail.find("div",attrs={"class","venue"}).find("p").get_text()
case_venue = remove_unneslet(case_venue)
if '/' in case_venue:
case_venue = case_venue.split('/')
else:
case_venue = [case_venue]
return case_venue
def get_gchara(case_detail):
case_gchara = case_detail.find("div",attrs={"class","gchara"}).find("p").get_text()
case_gchara = remove_unneslet(case_gchara)
#print(case_gchara.split('/'))
return case_gchara.split('/')
def get_explain(case_detail):
case_explain = case_detail.find("div",attrs={"class","naiyo__item"}).find("p").get_text()
case_explain = remove_unneslet(case_explain)
#print(case_explain)
return case_explain