Skip to content

Commit

Permalink
Code restructure
Browse files Browse the repository at this point in the history
  • Loading branch information
allala0 committed Feb 3, 2024
1 parent 3227946 commit 098e083
Show file tree
Hide file tree
Showing 16 changed files with 66 additions and 60 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
__pycache__/
chrome_profiles/
venv/
.venv/
Pipfile
Pipfile.lock
ignore/
ignore/
10 changes: 4 additions & 6 deletions example.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
from harvester_manager import HarvesterManger
from harvester import Harvester
import harvester

# Simple example of using captcha harvester with one Harvester and printing captcha responses to console.


def main():
url = 'https://www.google.com/recaptcha/api2/demo'
# Scraping sitekey from url
sitekey = Harvester.get_sitekey(url)
sitekey = harvester.Harvester.get_sitekey(url)

# Creating HarvesterManager object with additional argument response_callback which is function,
# that will be called everytime HarvesterManager pull captcha response from Harvseter ( after user solve captcha )
harvester_manager = HarvesterManger(response_callback=lambda x: print(x['response']))
harvester_manager = harvester.HarvesterManger(response_callback=lambda x: print(x['response']))
# Adding Harvester object to HarvesterManager object with url and sitekey as arguments
harvester_manager.add_harvester(Harvester(url, sitekey))
harvester_manager.add_harvester(harvester.Harvester(url, sitekey))
# Launching all harvesters
harvester_manager.start_harvesters()
# Starting main_loop inside HarvesterManager object, that will manage all Harvesters
Expand Down
22 changes: 10 additions & 12 deletions example_bot.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
# Importing local packages
from browser import Browser
from harvester_manager import HarvesterManger
from harvester import Harvester
import harvester
# Importing external packages
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
Expand All @@ -10,11 +8,10 @@
import datetime
from threading import Thread
import random
import zipfile


class Bot(Browser):
def __init__(self, harvester_manager: HarvesterManger, delay: int = 0.1):
class Bot(harvester.Browser):
def __init__(self, harvester_manager: harvester.HarvesterManger, delay: int = 0.1):

super(Bot, self).__init__()

Expand All @@ -32,8 +29,9 @@ def main_loop(self) -> None:
# Try except to be upgraded...
try:
self.tick()
except WebDriverException:
print('Some selenium exception (bot).')
except WebDriverException as e:
print(e)
self.looping = False
time.sleep(self.delay)

def tick(self):
Expand Down Expand Up @@ -126,14 +124,14 @@ def main():

url = 'https://www.google.com/recaptcha/api2/demo'
# Scraping sitekey from url
sitekey = Harvester.get_sitekey(url)
sitekey = harvester.Harvester.get_sitekey(url)

# Creating HarvesterManager object
harvester_manager = HarvesterManger()
harvester_manager = harvester.HarvesterManger()
# Adding Harvester object to HarvesterManager object with url and sitekey as arguments
harvester_manager.add_harvester(Harvester(url=url, sitekey=sitekey))
harvester_manager.add_harvester(harvester.Harvester(url=url, sitekey=sitekey))
# Adding Harvester object to HarvesterManager object with additional arguments to login to Google account and open window with Youtube.
harvester_manager.add_harvester(Harvester(url=url, sitekey=sitekey, log_in=True, open_youtube=True))
harvester_manager.add_harvester(harvester.Harvester(url=url, sitekey=sitekey, log_in=True, open_youtube=True))
# Launching all harvesters
harvester_manager.start_harvesters()
# Creating Bot object with HarvesterManager as argument so it can reach its response_queue
Expand Down
5 changes: 5 additions & 0 deletions harvester/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .harvester import Harvester
from .harvester_manager import HarvesterManger
from .browser import Browser

__all__ = ['Harvester', 'HarvesterManger', 'Browser']
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
10 changes: 6 additions & 4 deletions harvester.py → harvester/harvester.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Importing local packages
from browser import Browser
from .browser import Browser
# Importing external packages
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common.by import By
Expand Down Expand Up @@ -74,6 +74,7 @@ def __init__(self, url: str, sitekey: str, proxy: str = None, log_in: bool = Fal
self.control_element = f'controlElement{random.randint(0, 10 ** 10)}'
self.is_youtube_setup = False
self.ticking = False
self.closed = False

Harvester.harvester_count += 1
pathlib.Path(self.profile_path).mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -230,7 +231,7 @@ def pull_response(self) -> dict:
def reset_harvester(self) -> None:
if not self.is_open:
return

self.execute_script('grecaptcha.reset();')

def window_size_check(self) -> None:
Expand Down Expand Up @@ -275,8 +276,9 @@ def tick(self) -> None:
self.youtube_setup()
self.response_check()
self.window_size_check()
except WebDriverException:
print('Some Selenium error (harvester).')
except WebDriverException as e:
print(e)
self.closed = True

self.ticking = False

Expand Down
7 changes: 6 additions & 1 deletion harvester_manager.py → harvester/harvester_manager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Importing local packages
from harvester import Harvester
from .harvester import Harvester
# Importing standard packages
import time
from threading import Thread
Expand Down Expand Up @@ -41,12 +41,17 @@ def main_loop(self) -> None:
self.looping = True
while self.looping:
self.tick()
if len(self.harvesters) == 0:
break
time.sleep(self.delay)

def tick(self) -> None:
self.pull_responses_from_harvesters()
self.response_queue_check()
for harvester in self.harvesters:
if harvester.closed:
self.harvesters.remove(harvester)
continue
if not harvester.ticking:
Thread(target=harvester.tick).start()

Expand Down
69 changes: 33 additions & 36 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,42 +1,39 @@
appdirs==1.4.4
async-generator==1.10
attrs==21.4.0
beautifulsoup4==4.10.0
bs4==0.0.1
certifi>=2022.12.07
cffi==1.15.0
charset-normalizer==2.0.10
colorama==0.4.4
configparser==5.2.0
crayons==0.4.0
cryptography==36.0.1
cssselect==1.1.0
fake-useragent==0.1.11
h11==0.13.0
idna==3.3
importlib-metadata==4.10.1
lxml>=4.9.1
outcome==1.1.0
parse==1.19.0
attrs==23.2.0
beautifulsoup4==4.12.3
bs4==0.0.2
certifi==2024.2.2
cffi==1.16.0
charset-normalizer==3.3.2
colorama==0.4.6
cssselect==1.2.0
fake-useragent==1.4.0
h11==0.14.0
idna==3.6
importlib-metadata==7.0.1
lxml==5.1.0
outcome==1.3.0.post0
packaging==23.2
parse==1.20.1
pycparser==2.21
pyee==8.2.2
pyOpenSSL==21.0.0
pyppeteer==1.0.2
pyquery==1.4.3
requests==2.27.1
pyquery==2.0.0
PySocks==1.7.1
python-dotenv==1.0.1
requests==2.31.0
requests-html==0.10.0
selenium==4.1.0
six==1.16.0
sniffio==1.2.0
selenium==4.17.2
sniffio==1.3.0
sortedcontainers==2.4.0
soupsieve==2.3.1
termcolor==1.1.0
tqdm==4.62.3
trio==0.19.0
trio-websocket==0.9.2
urllib3==1.26.8
w3lib==1.22.0
webdriver-manager==3.5.2
websockets==10.1
wsproto==1.0.0
zipp==3.7.0
soupsieve==2.5
tqdm==4.66.1
trio==0.24.0
trio-websocket==0.11.1
typing_extensions==4.9.0
urllib3==1.26.18
w3lib==2.1.2
webdriver-manager==4.0.1
websockets==10.4
wsproto==1.2.0
zipp==3.17.0

0 comments on commit 098e083

Please sign in to comment.