-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathLocal_Search.py
232 lines (183 loc) · 6.56 KB
/
Local_Search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
try:
import time
import sys
import os
from bs4 import BeautifulSoup
from concurrent.futures import ThreadPoolExecutor
import concurrent
import requests
from sdamgia import SdamGIA
except ModuleNotFoundError:
sys.exit("Required libraries are missing. Please install them using:\n"
"pip install -r requirements.txt\n"
"You can find the requirements.txt file at: https://github.com/zv3zdochka/LAW.git")
subject_url = ''
targets = []
def check_page(page_id: int) -> str:
"""
Check if the given page contains the target text or JavaScript.
Args:
page_id (int): The ID of the page to check.
Returns:
str: Result of the page check.
"""
url = f"{subject_url}/test?id={page_id}"
if page_id % 1000 == 0:
print(f"Processing page {page_id}...")
try:
with requests.Session() as session:
response = session.get(url)
if response.status_code == 200:
text = BeautifulSoup(response.text, 'html.parser').get_text()
if len(text) == 118:
return f"Page not found: {page_id}"
for target in targets:
if target in text:
return f"Target {target} found on page {page_id}"
if "JavaScript" in text:
return "JavaScript detected on the page"
except Exception as e:
return f"Error processing page {page_id}: {e}"
def get_current_test_num(subj: str) -> int:
"""
Get the current test number for the given subject.
Args:
subj (str): The subject for which to get the current test number.
Returns:
int: The current test number.
"""
try:
sdamgia = SdamGIA()
current_test_num = int(sdamgia.generate_test(subj, {1: 1}))
del sdamgia
return current_test_num
except Exception:
exit("Switch off your VPN and try again")
def id_generator_up(start_id: int, end_id: int):
"""
Generate IDs from start_id up to end_id.
Args:
start_id (int): The starting ID.
end_id (int): The ending ID.
"""
while start_id <= end_id:
yield start_id
start_id += 1
time.sleep(10)
exit("All variants have been iterated")
def id_generator_down(start_id: int, end_id: int):
"""
Generate IDs from start_id down to end_id.
Args:
start_id (int): The starting ID.
end_id (int): The ending ID.
"""
while start_id >= end_id:
yield start_id
start_id -= 1
time.sleep(10)
exit("All variants have been iterated")
def main(generator, start_id: int, end_id: int):
"""
Main function for processing pages.
Args:
generator (function): The generator function for generating IDs.
start_id (int): The starting ID.
end_id (int): The ending ID.
"""
threads = os.cpu_count() * 3
with ThreadPoolExecutor(max_workers=threads) as executor:
ids = generator(start_id, end_id)
futures = {executor.submit(check_page, next(ids)): id for id in range(100)}
while futures:
done, _ = concurrent.futures.wait(futures, return_when=concurrent.futures.FIRST_COMPLETED)
for future in done:
page_id = futures.pop(future)
try:
result = future.result()
if result:
print(result)
except Exception as exc:
print(f"Exception processing page {page_id}: {exc}")
new_id = next(ids)
futures[executor.submit(check_page, new_id)] = new_id
def search_from_to(start: int, end: int):
"""
Search for targets from start to end.
Args:
start (int): The starting ID.
end (int): The ending ID.
"""
start_time = time.time()
main(id_generator_up, start, end)
elapsed_time = time.time() - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")
def search_from_to_last(start: int):
"""
Search for targets from start to the current test number.
Args:
start (int): The starting ID.
"""
end = get_current_test_num(subject_name)
start_time = time.time()
main(id_generator_up, start, end)
elapsed_time = time.time() - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")
def search_from_current_to_end(end: int):
"""
Search for targets from the current test number to end.
Args:
end (int): The ending ID.
"""
start_id = get_current_test_num(subject_name)
start_time = time.time()
main(id_generator_down, start_id, end)
elapsed_time = time.time() - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")
def search_from_current_for_first():
"""
Search for targets from the current test number endlessly.
"""
start_id = get_current_test_num(subject_name)
start_time = time.time()
main(id_generator_down, start_id, 0)
elapsed_time = time.time() - start_time
print(f"Total execution time: {elapsed_time:.2f} seconds")
def subject_url_by_name(name: str):
"""
Get the URL of the subject by its name.
Args:
name (str): The name of the subject.
Returns:
str: The URL of the subject.
"""
subjects = {
'math': 'https://math-ege.sdamgia.ru',
'mathb': 'https://mathb-ege.sdamgia.ru',
'phys': 'https://phys-ege.sdamgia.ru',
'inf': 'https://inf-ege.sdamgia.ru',
'rus': 'https://rus-ege.sdamgia.ru',
'bio': 'https://bio-ege.sdamgia.ru',
'en': 'https://en-ege.sdamgia.ru',
'chem': 'https://chem-ege.sdamgia.ru',
'geo': 'https://geo-ege.sdamgia.ru',
'soc': 'https://soc-ege.sdamgia.ru',
'de': 'https://de-ege.sdamgia.ru',
'fr': 'https://fr-ege.sdamgia.ru',
'lit': 'https://lit-ege.sdamgia.ru',
'sp': 'https://sp-ege.sdamgia.ru',
'hist': 'https://hist-ege.sdamgia.ru',
}
return subjects.get(name)
if __name__ == "__main__":
# Don't forget to install requirements.txt from https://github.com/zv3zdochka/LAW.git
targets = ["Щербина", "Смирнова"]
subject_name = 'math'
start = 55120100
end = 7_439_3000
subject_url = subject_url_by_name(subject_name)
search_from_current_for_first() # Views all options from just created to the very first one
# search_from_current_to_end(end) # Views all options from just created to end
search_from_to(start, end) # Views all options from start to end
# search_from_to_last(start) # Views all options from start to just created
sys.exit(1)