diff --git a/.env b/.env deleted file mode 100644 index e69de29..0000000 diff --git a/.gitignore b/.gitignore index e69de29..139ac7b 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,54 @@ +# === Python build artifacts === +*.pyc +*.pyo +*.pyd +__pycache__/ +*.log + +# === SQLite & output files === +*.sqlite3 +*.db +output.json + +# === Environment variables === +.env +.env.* +*.env + +# === Virtual environments === +venv/ +.venv/ +.env/ + +# === VSCode project settings === +.vscode/ + +# === macOS system files === +.DS_Store + +# === Pytest and test cache === +htmlcov/ +.coverage +.cache/ +pytest_cache/ +.tox/ + +# === Jupyter Notebook === +.ipynb_checkpoints/ + +# === Django migration artifacts (optional to ignore) === +# Uncomment the lines below if you want to regenerate migrations often +# **/migrations/*.py +# **/migrations/*.pyc +# !**/migrations/__init__.py + +# === FastAPI-specific artifacts === +fastapi_email/email_db.sqlite3 + +# === IDE-specific === +.idea/ +*.sublime-project +*.sublime-workspace + +# === GitHub Codespaces or devcontainers === +.devcontainer/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b2d293c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,14 @@ +{ + "python.analysis.extraPaths": [ + "./webscraper/ABC" + ], + "python.testing.unittestArgs": [ + "-v", + "-s", + "./webscraper", + "-p", + "*test*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/accounts/__pycache__/__init__.cpython-312.pyc b/accounts/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 228acde..0000000 Binary files a/accounts/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/admin.cpython-312.pyc b/accounts/__pycache__/admin.cpython-312.pyc deleted file mode 100644 index 5a69a0d..0000000 Binary files a/accounts/__pycache__/admin.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/apps.cpython-312.pyc b/accounts/__pycache__/apps.cpython-312.pyc deleted file mode 100644 index 33636a4..0000000 Binary files a/accounts/__pycache__/apps.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/models.cpython-312.pyc b/accounts/__pycache__/models.cpython-312.pyc deleted file mode 100644 index 8e8be97..0000000 Binary files a/accounts/__pycache__/models.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc b/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc deleted file mode 100644 index 2a79d5d..0000000 Binary files a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/__pycache__/__init__.cpython-312.pyc b/accounts/migrations/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index f9a196d..0000000 Binary files a/accounts/migrations/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/__init__.cpython-312.pyc b/cheaper/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 65ab433..0000000 Binary files a/cheaper/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/settings.cpython-312.pyc b/cheaper/__pycache__/settings.cpython-312.pyc deleted file mode 100644 index 048f033..0000000 Binary files a/cheaper/__pycache__/settings.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/urls.cpython-312.pyc b/cheaper/__pycache__/urls.cpython-312.pyc deleted file mode 100644 index 8e83205..0000000 Binary files a/cheaper/__pycache__/urls.cpython-312.pyc and /dev/null differ diff --git a/webscraper/ABC/Ebay_API.py b/webscraper/ABC/Ebay_API.py new file mode 100644 index 0000000..2be5a07 --- /dev/null +++ b/webscraper/ABC/Ebay_API.py @@ -0,0 +1,15 @@ +from abc import ABC,abstractmethod + +class EbayApi(ABC): + + @abstractmethod + def retrieve_access_token() -> str: + """ retrieves the user access token for sandbox environment it's a long line + of text, numbers, symbols + """ + pass + + @abstractmethod + def retrieve_ebay_response(httprequest:str,query:str) -> dict: + """ retrieves a json of large data with category ids, names, parentcategorynodes """ + pass \ No newline at end of file diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc deleted file mode 100644 index 2dc46e3..0000000 Binary files a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/EbayAPI.py b/webscraper/api/EbayAPI.py new file mode 100644 index 0000000..15f78e3 --- /dev/null +++ b/webscraper/api/EbayAPI.py @@ -0,0 +1,55 @@ +import requests +from requests.auth import HTTPBasicAuth +from dotenv import load_dotenv +import os + + +load_dotenv() #initialize + +class EbayAPI: + + client_secret_key = os.getenv("clientsecret") + client_id_key = os.getenv("clientid") + + get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) + + + def retrieve_access_token(): + try: + response = requests.post("https://api.sandbox.ebay.com/identity/v1/oauth2/token", + headers = {"Content-Type":"application/x-www-form-urlencoded"}, + data = { + "grant_type": "client_credentials", + "scope": "https://api.ebay.com/oauth/api_scope" + }, + auth=EbayAPI.get_user_key + ) + access_token = response.json().get("access_token") + status_code = response.status_code + if(status_code == 404): + raise Exception("404 error here") + return access_token + except Exception as e: + raise e + + def retrieve_ebay_response(httprequest:str,query:str): + auth = EbayAPI.retrieve_access_token() + try: + response = requests.get(httprequest, + headers={ + "Authorization": f"Bearer {auth}", + "Content-Type": "application/json" + }, + params= { + "q": query, + "category_tree_id": 0 + } + ) + status_code = response.status_code + if(status_code == 404): + raise Exception("not found 404 error") + + return response.json() + except Exception as e: + raise e + diff --git a/webscraper/api/__pycache__/interface.cpython-311.pyc b/webscraper/api/__pycache__/interface.cpython-311.pyc deleted file mode 100644 index 1b9240d..0000000 Binary files a/webscraper/api/__pycache__/interface.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/__pycache__/routes.cpython-311.pyc b/webscraper/api/__pycache__/routes.cpython-311.pyc deleted file mode 100644 index 5e18603..0000000 Binary files a/webscraper/api/__pycache__/routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc b/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc deleted file mode 100644 index 1ce37af..0000000 Binary files a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/tests/test_ebay_api.py b/webscraper/api/tests/test_ebay_api.py new file mode 100644 index 0000000..7651c13 --- /dev/null +++ b/webscraper/api/tests/test_ebay_api.py @@ -0,0 +1,35 @@ +import unittest +from unittest.mock import patch,Mock +import requests +from webscraper.api.EbayAPI import EbayAPI + +class EbayTestApi(unittest.TestCase): + + def setUp(self): + self.EbayAPI = EbayAPI + + + def test_retrieve_access_token(self): + self.EbayAPI.retrieve_access_token() + self.assertEqual(type(self.EbayAPI.retrieve_access_token()),str) + + @patch("webscraper.api.EbayAPI.requests.post") + def test_retrieve_access_token_invalid(self,mock_post): + mock_response = Mock() + mock_response.status_code = 404 + mock_response.json.return_value ={"error": "not found"} + mock_post.return_value = mock_response + + with self.assertRaises(Exception): + self.EbayAPI.retrieve_access_token() + + + + @patch("webscraper.api.EbayAPI.requests.get") + def test_retrieve_ebay_response_invalid(self,mock_get): + self.EbayAPI.retrieve_ebay_response("https://test","item") + self.assertRaises(Exception) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/webscraper/src/main.py b/webscraper/main.py similarity index 85% rename from webscraper/src/main.py rename to webscraper/main.py index 4a27839..0ae9fe5 100644 --- a/webscraper/src/main.py +++ b/webscraper/main.py @@ -1,4 +1,3 @@ - import json #import time // for testing # i added htese imports below becasue when i ran it it wasnt finding the folders @@ -9,16 +8,19 @@ def main(): + + + + # Set up the scraper for a simple legal-to-scrape website scraper = CheaperScraper("https://books.toscrape.com", - user_agent="CheaperBot/0.1", - delay=2.0) - + user_agent="CheaperBot/0.1", + delay=2.0) # Define which pages you want to scrape (you can use "/" for homepage) pages = ["/"] # Use the scraper to fetch and parse the pages - results = scraper.scrape(pages) + results = CheaperScraper.scraper.scrape(pages) # Show the output in the terminal for path, items in results.items(): diff --git a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc b/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc deleted file mode 100644 index f60b091..0000000 Binary files a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc deleted file mode 100644 index cc3aa5e..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc deleted file mode 100644 index 036324a..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/__init__.cpython-311.pyc b/webscraper/src/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index c420941..0000000 Binary files a/webscraper/src/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-311.pyc b/webscraper/src/__pycache__/robot_check.cpython-311.pyc deleted file mode 100644 index e7f181b..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-39.pyc b/webscraper/src/__pycache__/robot_check.cpython-39.pyc deleted file mode 100644 index d1557b9..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc b/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc deleted file mode 100644 index ef900b2..0000000 Binary files a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc and /dev/null differ