-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrejal.py
117 lines (95 loc) · 3.91 KB
/
rejal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import re
import httpx
import asyncio
from bs4 import BeautifulSoup
from typing import List, Any
class Mfeed:
def __init__(self)-> None:
self.session = None
@property
def Client(self)-> httpx.AsyncClient:
if self.session:
return self.session
else:
self.session = httpx.AsyncClient(cookies={'__utmc':'1',},)
return self.session
def get_id_from_loop_text(self, text: str)-> str:
length = len(text)
if length % 3 == 0:
pattern_length = int(length / 3)
pattern = text[:pattern_length]
return pattern
else:
print("The string length is not divisible by 3.")
async def get(
self,
url:str,
) -> httpx.Response:
try:
res = await self.Client.get(url=url,)
if res.status_code != 200:
return res.status_code
return res
except httpx.TimeoutException:
res = await self.Client.get(url=url,)
if res.status_code != 200:
return res.status_code
return res
def op(self,path:str) -> str:
with open(path, 'r', encoding='utf-8') as f:
if f.readable():
d = f.read()
else:
raise ValueError(f"The File `{path}` his Type Not Readable!")
return d
async def GET_PAGE_DATA(
self,
url:str,
)-> List[str]:
res = await self.get(url=url,)
if isinstance(res, int):
return {'error':True,'msg':"SCNOK"} # Status Codes Not OK(200)
self.soup = BeautifulSoup(res.content, "html.parser",)
TextDiv = self.soup.find('div', class_='text')
# <span class="index index-14451" title="علي بن الحسين" onclick="indexShow('14451');">علي بن الحسين</span>
lines = TextDiv.text.strip().split('\n')
print(len(lines))
result = await self.fix_text(lines)
return result
async def fix_text(
self,
list_: List[str]
)-> List[str]:
line_pattren1 = r'^(\d+) - (\d+) - (\d+) - (.*?)$' # 154 - 154 - 154 - (Text)
line_pattren2 = r'^(\d+) - (.*?)$' # 153153153 - (Text)
result_list = []
for line in list_:
if re.match(line_pattren1, line):
result_list.append(line)
elif re.match(line_pattren2, line):
unifx_part:list = list(line.split()[0]) # 1, 2, 3, 1, 2, 3, 1, 2, 3.
id = self.get_id_from_loop_text(line.split()[0]) # 123123123 -> 123
for i in [
len(id), # 123><123123
(len(id)*2)+1, # 123123><123
]: unifx_part.insert(i, ' - ') # add ' - ' for the text
new_text_part = ''.join(unifx_part) # make it a plain str text
# Add/Replace it on the line text
fixed_line = line.replace(line.split()[0], new_text_part) # 123123123 -> 123 - 123 - 123
print(new_text_part)
result_list.append(fixed_line)
else:
# TODO: Make This part mark this line for use it in the pev page
print('NOT MATCHED!')
return result_list
async def main()-> None :
c = Mfeed()
r = await c.GET_PAGE_DATA(
url='http://shiaonlinelibrary.com/%D8%A7%D9%84%D9%83%D8%AA%D8%A8/3021_%D8%A7%D9%84%D9%85%D9%81%D9%8A%D8%AF-%D9%85%D9%86-%D9%85%D8%B9%D8%AC%D9%85-%D8%B1%D8%AC%D8%A7%D9%84-%D8%A7%D9%84%D8%AD%D8%AF%D9%8A%D8%AB-%D9%85%D8%AD%D9%85%D8%AF-%D8%A7%D9%84%D8%AC%D9%88%D8%A7%D9%87%D8%B1%D9%8A/%D8%A7%D9%84%D8%B5%D9%81%D8%AD%D8%A9_764'
)
# TODO: make code thats get raoi from pev page / nex page
if __name__=='__main__':
asyncio.get_event_loop(
).run_until_complete(
main()
)