From 098e083018375e6bd050ba56c872dcddbbab3e48 Mon Sep 17 00:00:00 2001 From: allala0 Date: Sat, 3 Feb 2024 21:09:27 +0100 Subject: [PATCH] Code restructure --- .gitignore | 3 +- example.py | 10 +-- example_bot.py | 22 +++--- harvester/__init__.py | 5 ++ browser.py => harvester/browser.py | 0 .../extension}/background.html | 0 .../extension}/background.js | 0 {extension => harvester/extension}/content.js | 0 {extension => harvester/extension}/icon.png | Bin .../extension}/jquery-3.6.0.min.js | 0 .../extension}/manifest.json | 0 {extension => harvester/extension}/popup.html | 0 {extension => harvester/extension}/popup.js | 0 harvester.py => harvester/harvester.py | 10 ++- .../harvester_manager.py | 7 +- requirements.txt | 69 +++++++++--------- 16 files changed, 66 insertions(+), 60 deletions(-) create mode 100644 harvester/__init__.py rename browser.py => harvester/browser.py (100%) rename {extension => harvester/extension}/background.html (100%) rename {extension => harvester/extension}/background.js (100%) rename {extension => harvester/extension}/content.js (100%) rename {extension => harvester/extension}/icon.png (100%) rename {extension => harvester/extension}/jquery-3.6.0.min.js (100%) rename {extension => harvester/extension}/manifest.json (100%) rename {extension => harvester/extension}/popup.html (100%) rename {extension => harvester/extension}/popup.js (100%) rename harvester.py => harvester/harvester.py (98%) rename harvester_manager.py => harvester/harvester_manager.py (91%) diff --git a/.gitignore b/.gitignore index fb35c4d..df5fd2a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ chrome_profiles/ venv/ +.venv/ Pipfile Pipfile.lock -ignore/ \ No newline at end of file +ignore/ diff --git a/example.py b/example.py index 8087e82..764c0ae 100644 --- a/example.py +++ b/example.py @@ -1,19 +1,17 @@ -from harvester_manager import HarvesterManger -from harvester import Harvester +import harvester # Simple example of using captcha harvester with one Harvester and printing captcha responses to console. - def main(): url = 'https://www.google.com/recaptcha/api2/demo' # Scraping sitekey from url - sitekey = Harvester.get_sitekey(url) + sitekey = harvester.Harvester.get_sitekey(url) # Creating HarvesterManager object with additional argument response_callback which is function, # that will be called everytime HarvesterManager pull captcha response from Harvseter ( after user solve captcha ) - harvester_manager = HarvesterManger(response_callback=lambda x: print(x['response'])) + harvester_manager = harvester.HarvesterManger(response_callback=lambda x: print(x['response'])) # Adding Harvester object to HarvesterManager object with url and sitekey as arguments - harvester_manager.add_harvester(Harvester(url, sitekey)) + harvester_manager.add_harvester(harvester.Harvester(url, sitekey)) # Launching all harvesters harvester_manager.start_harvesters() # Starting main_loop inside HarvesterManager object, that will manage all Harvesters diff --git a/example_bot.py b/example_bot.py index 3d6f21f..ce24763 100644 --- a/example_bot.py +++ b/example_bot.py @@ -1,7 +1,5 @@ # Importing local packages -from browser import Browser -from harvester_manager import HarvesterManger -from harvester import Harvester +import harvester # Importing external packages from selenium.webdriver.common.by import By from selenium.common.exceptions import WebDriverException @@ -10,11 +8,10 @@ import datetime from threading import Thread import random -import zipfile -class Bot(Browser): - def __init__(self, harvester_manager: HarvesterManger, delay: int = 0.1): +class Bot(harvester.Browser): + def __init__(self, harvester_manager: harvester.HarvesterManger, delay: int = 0.1): super(Bot, self).__init__() @@ -32,8 +29,9 @@ def main_loop(self) -> None: # Try except to be upgraded... try: self.tick() - except WebDriverException: - print('Some selenium exception (bot).') + except WebDriverException as e: + print(e) + self.looping = False time.sleep(self.delay) def tick(self): @@ -126,14 +124,14 @@ def main(): url = 'https://www.google.com/recaptcha/api2/demo' # Scraping sitekey from url - sitekey = Harvester.get_sitekey(url) + sitekey = harvester.Harvester.get_sitekey(url) # Creating HarvesterManager object - harvester_manager = HarvesterManger() + harvester_manager = harvester.HarvesterManger() # Adding Harvester object to HarvesterManager object with url and sitekey as arguments - harvester_manager.add_harvester(Harvester(url=url, sitekey=sitekey)) + harvester_manager.add_harvester(harvester.Harvester(url=url, sitekey=sitekey)) # Adding Harvester object to HarvesterManager object with additional arguments to login to Google account and open window with Youtube. - harvester_manager.add_harvester(Harvester(url=url, sitekey=sitekey, log_in=True, open_youtube=True)) + harvester_manager.add_harvester(harvester.Harvester(url=url, sitekey=sitekey, log_in=True, open_youtube=True)) # Launching all harvesters harvester_manager.start_harvesters() # Creating Bot object with HarvesterManager as argument so it can reach its response_queue diff --git a/harvester/__init__.py b/harvester/__init__.py new file mode 100644 index 0000000..2c9678b --- /dev/null +++ b/harvester/__init__.py @@ -0,0 +1,5 @@ +from .harvester import Harvester +from .harvester_manager import HarvesterManger +from .browser import Browser + +__all__ = ['Harvester', 'HarvesterManger', 'Browser'] diff --git a/browser.py b/harvester/browser.py similarity index 100% rename from browser.py rename to harvester/browser.py diff --git a/extension/background.html b/harvester/extension/background.html similarity index 100% rename from extension/background.html rename to harvester/extension/background.html diff --git a/extension/background.js b/harvester/extension/background.js similarity index 100% rename from extension/background.js rename to harvester/extension/background.js diff --git a/extension/content.js b/harvester/extension/content.js similarity index 100% rename from extension/content.js rename to harvester/extension/content.js diff --git a/extension/icon.png b/harvester/extension/icon.png similarity index 100% rename from extension/icon.png rename to harvester/extension/icon.png diff --git a/extension/jquery-3.6.0.min.js b/harvester/extension/jquery-3.6.0.min.js similarity index 100% rename from extension/jquery-3.6.0.min.js rename to harvester/extension/jquery-3.6.0.min.js diff --git a/extension/manifest.json b/harvester/extension/manifest.json similarity index 100% rename from extension/manifest.json rename to harvester/extension/manifest.json diff --git a/extension/popup.html b/harvester/extension/popup.html similarity index 100% rename from extension/popup.html rename to harvester/extension/popup.html diff --git a/extension/popup.js b/harvester/extension/popup.js similarity index 100% rename from extension/popup.js rename to harvester/extension/popup.js diff --git a/harvester.py b/harvester/harvester.py similarity index 98% rename from harvester.py rename to harvester/harvester.py index 30fdaa3..802dd66 100644 --- a/harvester.py +++ b/harvester/harvester.py @@ -1,5 +1,5 @@ # Importing local packages -from browser import Browser +from .browser import Browser # Importing external packages from selenium.common.exceptions import WebDriverException from selenium.webdriver.common.by import By @@ -74,6 +74,7 @@ def __init__(self, url: str, sitekey: str, proxy: str = None, log_in: bool = Fal self.control_element = f'controlElement{random.randint(0, 10 ** 10)}' self.is_youtube_setup = False self.ticking = False + self.closed = False Harvester.harvester_count += 1 pathlib.Path(self.profile_path).mkdir(parents=True, exist_ok=True) @@ -230,7 +231,7 @@ def pull_response(self) -> dict: def reset_harvester(self) -> None: if not self.is_open: return - + self.execute_script('grecaptcha.reset();') def window_size_check(self) -> None: @@ -275,8 +276,9 @@ def tick(self) -> None: self.youtube_setup() self.response_check() self.window_size_check() - except WebDriverException: - print('Some Selenium error (harvester).') + except WebDriverException as e: + print(e) + self.closed = True self.ticking = False diff --git a/harvester_manager.py b/harvester/harvester_manager.py similarity index 91% rename from harvester_manager.py rename to harvester/harvester_manager.py index 55ec3d3..0a10a51 100644 --- a/harvester_manager.py +++ b/harvester/harvester_manager.py @@ -1,5 +1,5 @@ # Importing local packages -from harvester import Harvester +from .harvester import Harvester # Importing standard packages import time from threading import Thread @@ -41,12 +41,17 @@ def main_loop(self) -> None: self.looping = True while self.looping: self.tick() + if len(self.harvesters) == 0: + break time.sleep(self.delay) def tick(self) -> None: self.pull_responses_from_harvesters() self.response_queue_check() for harvester in self.harvesters: + if harvester.closed: + self.harvesters.remove(harvester) + continue if not harvester.ticking: Thread(target=harvester.tick).start() diff --git a/requirements.txt b/requirements.txt index 23b740d..4c53d06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,42 +1,39 @@ appdirs==1.4.4 -async-generator==1.10 -attrs==21.4.0 -beautifulsoup4==4.10.0 -bs4==0.0.1 -certifi>=2022.12.07 -cffi==1.15.0 -charset-normalizer==2.0.10 -colorama==0.4.4 -configparser==5.2.0 -crayons==0.4.0 -cryptography==36.0.1 -cssselect==1.1.0 -fake-useragent==0.1.11 -h11==0.13.0 -idna==3.3 -importlib-metadata==4.10.1 -lxml>=4.9.1 -outcome==1.1.0 -parse==1.19.0 +attrs==23.2.0 +beautifulsoup4==4.12.3 +bs4==0.0.2 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +colorama==0.4.6 +cssselect==1.2.0 +fake-useragent==1.4.0 +h11==0.14.0 +idna==3.6 +importlib-metadata==7.0.1 +lxml==5.1.0 +outcome==1.3.0.post0 +packaging==23.2 +parse==1.20.1 pycparser==2.21 pyee==8.2.2 -pyOpenSSL==21.0.0 pyppeteer==1.0.2 -pyquery==1.4.3 -requests==2.27.1 +pyquery==2.0.0 +PySocks==1.7.1 +python-dotenv==1.0.1 +requests==2.31.0 requests-html==0.10.0 -selenium==4.1.0 -six==1.16.0 -sniffio==1.2.0 +selenium==4.17.2 +sniffio==1.3.0 sortedcontainers==2.4.0 -soupsieve==2.3.1 -termcolor==1.1.0 -tqdm==4.62.3 -trio==0.19.0 -trio-websocket==0.9.2 -urllib3==1.26.8 -w3lib==1.22.0 -webdriver-manager==3.5.2 -websockets==10.1 -wsproto==1.0.0 -zipp==3.7.0 \ No newline at end of file +soupsieve==2.5 +tqdm==4.66.1 +trio==0.24.0 +trio-websocket==0.11.1 +typing_extensions==4.9.0 +urllib3==1.26.18 +w3lib==2.1.2 +webdriver-manager==4.0.1 +websockets==10.4 +wsproto==1.2.0 +zipp==3.17.0