From 96855cce1104d61a27b649da54343e2b67eba770 Mon Sep 17 00:00:00 2001 From: James Cacapit Date: Thu, 1 May 2025 08:38:13 -0700 Subject: [PATCH 1/5] check if git ignore works --- .../__pycache__/base_scraper.cpython-39.pyc | Bin 1964 -> 1925 bytes .../api/__pycache__/interface.cpython-39.pyc | Bin 638 -> 599 bytes webscraper/main.py | 2 -- .../src/__pycache__/__init__.cpython-39.pyc | Bin 158 -> 140 bytes .../__pycache__/fetch_utils.cpython-39.pyc | Bin 782 -> 743 bytes .../tests/__pycache__/__init__.cpython-39.pyc | Bin 164 -> 146 bytes .../test_fetch_and_cache.cpython-39.pyc | Bin 2601 -> 2583 bytes 7 files changed, 2 deletions(-) diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc index dd1704d439473037f51337091f46e993989391f5..93f68b7ea1f4d0af94dd192898297982d34cad29 100644 GIT binary patch delta 28 icmZ3(-^$OO$ji&c00gfN@NeW^z{I4WxA_Xw4Hf`m%?GRi delta 67 zcmZqWU&GIx$ji&c00j0+xHfVxVA6DVwu%WYPAw{qaZAk2%&ClV$xklLP0cGQj&aW{ V@hDA-NzO=3EJ!Wdyq4((3jmM{7GnSa diff --git a/webscraper/api/__pycache__/interface.cpython-39.pyc b/webscraper/api/__pycache__/interface.cpython-39.pyc index 4597f1eeb5a3455a46e54e675609dd0afa97dbcf..ecfd58a6382569e0c3931fc3549532fe753a8aa1 100644 GIT binary patch delta 27 hcmeyza-D@Mk(ZZ?0SI0l*vMti$Rw+`xrmX65ddY<29f{( delta 66 zcmcc4@{ffpk(ZZ?0SE$iZsf9O)O2ySiU}=FEh>(2OU%v8sf=;SPcF?(%_}L6anCIA UC{2n<&PYuxNG;mz&B((D0H912bN~PV diff --git a/webscraper/main.py b/webscraper/main.py index 0ae9fe5..81fa95f 100644 --- a/webscraper/main.py +++ b/webscraper/main.py @@ -1,6 +1,4 @@ import json -#import time // for testing -# i added htese imports below becasue when i ran it it wasnt finding the folders import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) diff --git a/webscraper/src/__pycache__/__init__.cpython-39.pyc b/webscraper/src/__pycache__/__init__.cpython-39.pyc index d43fde4adb6f131a6895b08baaa604a0dfb64288..eb72cb2fb66814e6b45ed4ee06016c6f598d6516 100644 GIT binary patch delta 46 zcmbQo*u%)3$ji&c00gfN@K5Bn5jRY+igC_JO)N+)iYYEhP0T6D2+k}?omij;03Y=Z AT>t<8 delta 64 zcmeBSoX5zW$ji&c00j0+xF&MjXgWJv#e^2878S?1CFW-4RK~dECzs}?=9Lu3xM!Al SlqSU_XQU<;q!vw#(*poJ*c6oj diff --git a/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc b/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc index 8e69c484bfea4d5ab61881109c7886ea75770c10..c187d480318297de82ea1ef34d6ec16f883aeb71 100644 GIT binary patch delta 128 zcmeBUd(O(8$ji&c00fz*gf?ej&V!O&CIEcami0E%}vcK zDUNZ^Eb%B!ib>8$O)N+)nrzAB#w!BUqRCOj1Eh-hC-*R^iGoDHCLs~RlaDhg004xd BF!BHZ diff --git a/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc b/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc index ba417b4f25823975f9dd8e2fe78a99186cb9ae76..be884a59741c5ab5c5cf3abba483d4f081daa77f 100644 GIT binary patch delta 46 zcmZ3&IEj%vk(ZZ?0SI0l;Gf8CBW|8z72}+dnplup6jNN1nwV3P5u90)I(2OU%v8sf=;SPcF?(%_}L6anCIA SC{2n<&PYuxNG+NeX955~+7zt- diff --git a/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc b/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc index c22e0aa30d2f9a06a0926943d85cf34990cd4768..c810c00763f92a89caca5925a827303ec0b66e54 100644 GIT binary patch delta 215 zcmZ1}GF^l_k(ZZ?0SGcr32o$_!6@#WVin_@k(yYLS`<@UlA4%Pk`bI)k~;Ye<4?w0 zn~yOCGcihSmSVlk#3(WO9lLR`Bv5Y=D~R9&61P~3@=NnliljjT%pgJvM96>$Ss=n%K?cf){K(e9K#}cINPX50m!~JxrAens3?$CBn~1JL4*p3(46eb=_GKAqcktG eq$IVtqzGjBt;y3lRTytg-ot6Ys5SX5rwsruwKd!T delta 217 zcmbO(vQmUQk(ZZ?0SF}a^Kayy!KfMPY!wq)oLW>I zBBVit43N-dFX8~PWr0K#Yeq?Kj$x4;oNZJj4`NSF;@Bf90#YUhA{0P`GKf&0Y|QB- k5XDiNmswJhT3k{j0Thjz+{&rK7&UnnrvanJ Date: Thu, 5 Jun 2025 08:04:47 -0700 Subject: [PATCH 2/5] refactorin --- .gitignore | 2 +- cheaper_main/ABC/RetailerApi.py | 15 +++++++++ .../ABC/__init__.py | 0 .../ABC/base_scraper.py | 0 .../api => cheaper_main/ABC}/interface.py | 0 .../Scraper}/Cheaper_Scraper.py | 10 +++--- .../src => cheaper_main/Scraper}/__init__.py | 0 .../Scraper}/fetch_utils.py | 0 .../Scraper}/robot_check.py | 0 cheaper_main/api/Etsy/EtsyApi.py | 30 ++++++++++++++++++ cheaper_main/api/Etsy/__init__.py | 0 .../api/Etsy/generate_code_challenge.py | 14 ++++++++ cheaper_main/api/best_buy_api/__init__.py | 0 cheaper_main/api/best_buy_api/best_buy_api.py | 16 ++++++++++ .../api/ebay_api}/EbayAPI.py | 3 +- cheaper_main/api/ebay_api/__init__.py | 0 {webscraper => cheaper_main}/api/routes.py | 0 .../api/tests/test_ebay_api.py | 2 +- .../api/tests/test_routes.py | 0 {webscraper => cheaper_main}/main.py | 15 +++++---- cheaper_main/package.json | 0 .../src/tests/__init__.py | 0 .../src/tests/test_cheaper_scraper.py | 0 .../src/tests/test_fetch_and_cache.py | 0 {webscraper/database => database}/init_db.sql | 0 webscraper/.DS_Store | Bin 6148 -> 0 bytes .../__pycache__/base_scraper.cpython-39.pyc | Bin 1925 -> 0 bytes .../api/__pycache__/interface.cpython-39.pyc | Bin 599 -> 0 bytes webscraper/output.json | 24 -------------- .../src/__pycache__/__init__.cpython-39.pyc | Bin 140 -> 0 bytes .../__pycache__/fetch_utils.cpython-311.pyc | Bin 1254 -> 0 bytes .../__pycache__/fetch_utils.cpython-39.pyc | Bin 743 -> 0 bytes .../src/__pycache__/main.cpython-39.pyc | Bin 862 -> 0 bytes .../tests/__pycache__/__init__.cpython-39.pyc | Bin 146 -> 0 bytes .../test_fetch_and_cache.cpython-39.pyc | Bin 2583 -> 0 bytes 35 files changed, 93 insertions(+), 38 deletions(-) create mode 100644 cheaper_main/ABC/RetailerApi.py rename webscraper/package.json => cheaper_main/ABC/__init__.py (100%) rename {webscraper => cheaper_main}/ABC/base_scraper.py (100%) rename {webscraper/api => cheaper_main/ABC}/interface.py (100%) rename {webscraper/src => cheaper_main/Scraper}/Cheaper_Scraper.py (91%) rename {webscraper/src => cheaper_main/Scraper}/__init__.py (100%) rename {webscraper/src => cheaper_main/Scraper}/fetch_utils.py (100%) rename {webscraper/src => cheaper_main/Scraper}/robot_check.py (100%) create mode 100644 cheaper_main/api/Etsy/EtsyApi.py create mode 100644 cheaper_main/api/Etsy/__init__.py create mode 100644 cheaper_main/api/Etsy/generate_code_challenge.py create mode 100644 cheaper_main/api/best_buy_api/__init__.py create mode 100644 cheaper_main/api/best_buy_api/best_buy_api.py rename {webscraper/api => cheaper_main/api/ebay_api}/EbayAPI.py (96%) create mode 100644 cheaper_main/api/ebay_api/__init__.py rename {webscraper => cheaper_main}/api/routes.py (100%) rename {webscraper => cheaper_main}/api/tests/test_ebay_api.py (95%) rename {webscraper => cheaper_main}/api/tests/test_routes.py (100%) rename {webscraper => cheaper_main}/main.py (80%) create mode 100644 cheaper_main/package.json rename {webscraper => cheaper_main}/src/tests/__init__.py (100%) rename {webscraper => cheaper_main}/src/tests/test_cheaper_scraper.py (100%) rename {webscraper => cheaper_main}/src/tests/test_fetch_and_cache.py (100%) rename {webscraper/database => database}/init_db.sql (100%) delete mode 100644 webscraper/.DS_Store delete mode 100644 webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc delete mode 100644 webscraper/api/__pycache__/interface.cpython-39.pyc delete mode 100644 webscraper/output.json delete mode 100644 webscraper/src/__pycache__/__init__.cpython-39.pyc delete mode 100644 webscraper/src/__pycache__/fetch_utils.cpython-311.pyc delete mode 100644 webscraper/src/__pycache__/fetch_utils.cpython-39.pyc delete mode 100644 webscraper/src/__pycache__/main.cpython-39.pyc delete mode 100644 webscraper/src/tests/__pycache__/__init__.cpython-39.pyc delete mode 100644 webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc diff --git a/.gitignore b/.gitignore index 139ac7b..75b317f 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,7 @@ venv/ .vscode/ # === macOS system files === -.DS_Store +*.DS_Store # === Pytest and test cache === htmlcov/ diff --git a/cheaper_main/ABC/RetailerApi.py b/cheaper_main/ABC/RetailerApi.py new file mode 100644 index 0000000..a85cc57 --- /dev/null +++ b/cheaper_main/ABC/RetailerApi.py @@ -0,0 +1,15 @@ +from abc import ABC,abstractmethod + +class RetailerApi(ABC): + + @abstractmethod + def retrieve_access_token() -> str: + """ retrieves the user access token for sandbox environment it's a long line + of text, numbers, symbols + """ + pass + + @abstractmethod + def retrieve_response(httprequest:str,query:str) -> dict: + """ retrieves a json of large data with category ids, names, parentcategorynodes """ + pass \ No newline at end of file diff --git a/webscraper/package.json b/cheaper_main/ABC/__init__.py similarity index 100% rename from webscraper/package.json rename to cheaper_main/ABC/__init__.py diff --git a/webscraper/ABC/base_scraper.py b/cheaper_main/ABC/base_scraper.py similarity index 100% rename from webscraper/ABC/base_scraper.py rename to cheaper_main/ABC/base_scraper.py diff --git a/webscraper/api/interface.py b/cheaper_main/ABC/interface.py similarity index 100% rename from webscraper/api/interface.py rename to cheaper_main/ABC/interface.py diff --git a/webscraper/src/Cheaper_Scraper.py b/cheaper_main/Scraper/Cheaper_Scraper.py similarity index 91% rename from webscraper/src/Cheaper_Scraper.py rename to cheaper_main/Scraper/Cheaper_Scraper.py index 5aa7bc9..22bb722 100644 --- a/webscraper/src/Cheaper_Scraper.py +++ b/cheaper_main/Scraper/Cheaper_Scraper.py @@ -4,17 +4,17 @@ from urllib.parse import urlparse import logging from typing import Dict, List, Optional -from webscraper.ABC.base_scraper import BaseScraper -from webscraper.src.robot_check import RoboCheck -from webscraper.api.interface import ScraperAPIInterface -from webscraper.src.fetch_utils import cached_get +from cheaper_main.ABC.base_scraper import BaseScraper +from cheaper_main.Scraper.robot_check import RoboCheck +from ..ABC import base_scraper +from cheaper_main.Scraper.fetch_utils import cached_get from functools import lru_cache -class CheaperScraper(BaseScraper, ScraperAPIInterface): +class CheaperScraper(BaseScraper, base_scraper): def __init__(self, base_url: str = "", user_agent: str = "CheaperBot/0.1", delay: float = 2.0) -> None: """Initialize the scraper with base parameters. diff --git a/webscraper/src/__init__.py b/cheaper_main/Scraper/__init__.py similarity index 100% rename from webscraper/src/__init__.py rename to cheaper_main/Scraper/__init__.py diff --git a/webscraper/src/fetch_utils.py b/cheaper_main/Scraper/fetch_utils.py similarity index 100% rename from webscraper/src/fetch_utils.py rename to cheaper_main/Scraper/fetch_utils.py diff --git a/webscraper/src/robot_check.py b/cheaper_main/Scraper/robot_check.py similarity index 100% rename from webscraper/src/robot_check.py rename to cheaper_main/Scraper/robot_check.py diff --git a/cheaper_main/api/Etsy/EtsyApi.py b/cheaper_main/api/Etsy/EtsyApi.py new file mode 100644 index 0000000..247fd13 --- /dev/null +++ b/cheaper_main/api/Etsy/EtsyApi.py @@ -0,0 +1,30 @@ +from RetailerApi import RetailerApi +import requests +import os +from generate_code_challenge import generate_code_challenge + + +keystring = os.getenv("etsykeystring") +sharedsecret = os.getenv("etsysharedsecret") + +class Etsy(RetailerApi): + def retrieve_access_token(): + try: + response = requests.post("https://api.etsy.com/v3/public/oauth/token", + headers={"Content-Type': 'application/x-www-form-urlencoded"}, + data = {"grant_type":"client_credentials", + "scope":"listings_r", + "client_id":f"{keystring}", + "code_challenge":f"{generate_code_challenge}", + "code_challenge_method":"S256" + } + + ) + if(response.status_code == 200): + data = response.json() + except Exception as e: + raise e + + def retrieve_response(): return + # TODO: when application gets approved + # use the auth token you get from etsy \ No newline at end of file diff --git a/cheaper_main/api/Etsy/__init__.py b/cheaper_main/api/Etsy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/Etsy/generate_code_challenge.py b/cheaper_main/api/Etsy/generate_code_challenge.py new file mode 100644 index 0000000..0904335 --- /dev/null +++ b/cheaper_main/api/Etsy/generate_code_challenge.py @@ -0,0 +1,14 @@ +import secrets +import hashlib +import base64 + + +class generate_code_challenge: + + def generate_code_challenge(): + code_client = secrets.token_urlsafe(64) + code_challenge = base64.urlsafe_b64encode(hashlib.sha256(code_client.encode()) + .digest()).rstrip(b'=').decode() + return code_challenge + + \ No newline at end of file diff --git a/cheaper_main/api/best_buy_api/__init__.py b/cheaper_main/api/best_buy_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/best_buy_api/best_buy_api.py b/cheaper_main/api/best_buy_api/best_buy_api.py new file mode 100644 index 0000000..5d473aa --- /dev/null +++ b/cheaper_main/api/best_buy_api/best_buy_api.py @@ -0,0 +1,16 @@ +import os +from cheaper_main.ABC import RetailerApi + +best_buy = os.getenv("bestbuysecret") + + +class best_buy_api(RetailerApi): + + def retrieve_access_token(): + + return + + + def retrieve_response(): + + return diff --git a/webscraper/api/EbayAPI.py b/cheaper_main/api/ebay_api/EbayAPI.py similarity index 96% rename from webscraper/api/EbayAPI.py rename to cheaper_main/api/ebay_api/EbayAPI.py index 15f78e3..d016abc 100644 --- a/webscraper/api/EbayAPI.py +++ b/cheaper_main/api/ebay_api/EbayAPI.py @@ -2,11 +2,12 @@ from requests.auth import HTTPBasicAuth from dotenv import load_dotenv import os +from ...ABC.RetailerApi import RetailerApi load_dotenv() #initialize -class EbayAPI: +class EbayAPI(RetailerApi): client_secret_key = os.getenv("clientsecret") client_id_key = os.getenv("clientid") diff --git a/cheaper_main/api/ebay_api/__init__.py b/cheaper_main/api/ebay_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/webscraper/api/routes.py b/cheaper_main/api/routes.py similarity index 100% rename from webscraper/api/routes.py rename to cheaper_main/api/routes.py diff --git a/webscraper/api/tests/test_ebay_api.py b/cheaper_main/api/tests/test_ebay_api.py similarity index 95% rename from webscraper/api/tests/test_ebay_api.py rename to cheaper_main/api/tests/test_ebay_api.py index 7651c13..d730c64 100644 --- a/webscraper/api/tests/test_ebay_api.py +++ b/cheaper_main/api/tests/test_ebay_api.py @@ -1,7 +1,7 @@ import unittest from unittest.mock import patch,Mock import requests -from webscraper.api.EbayAPI import EbayAPI +from ebay_api import EbayAPI class EbayTestApi(unittest.TestCase): diff --git a/webscraper/api/tests/test_routes.py b/cheaper_main/api/tests/test_routes.py similarity index 100% rename from webscraper/api/tests/test_routes.py rename to cheaper_main/api/tests/test_routes.py diff --git a/webscraper/main.py b/cheaper_main/main.py similarity index 80% rename from webscraper/main.py rename to cheaper_main/main.py index 81fa95f..e558e26 100644 --- a/webscraper/main.py +++ b/cheaper_main/main.py @@ -2,14 +2,13 @@ import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from src.Cheaper_Scraper import CheaperScraper +from cheaper_main.Scraper.Cheaper_Scraper import CheaperScraper +import cheaper_main.api.ebay_api def main(): - - - - + EbayAPI = cheaper_main.api.EbayAPI # instantiate because class + # Set up the scraper for a simple legal-to-scrape website scraper = CheaperScraper("https://books.toscrape.com", user_agent="CheaperBot/0.1", @@ -18,7 +17,7 @@ def main(): pages = ["/"] # Use the scraper to fetch and parse the pages - results = CheaperScraper.scraper.scrape(pages) + results = scraper.scrape(pages) # Show the output in the terminal for path, items in results.items(): @@ -30,6 +29,10 @@ def main(): with open("output.json", "w") as f: json.dump(results, f, indent=2) + + + + if __name__ == "__main__": main() diff --git a/cheaper_main/package.json b/cheaper_main/package.json new file mode 100644 index 0000000..e69de29 diff --git a/webscraper/src/tests/__init__.py b/cheaper_main/src/tests/__init__.py similarity index 100% rename from webscraper/src/tests/__init__.py rename to cheaper_main/src/tests/__init__.py diff --git a/webscraper/src/tests/test_cheaper_scraper.py b/cheaper_main/src/tests/test_cheaper_scraper.py similarity index 100% rename from webscraper/src/tests/test_cheaper_scraper.py rename to cheaper_main/src/tests/test_cheaper_scraper.py diff --git a/webscraper/src/tests/test_fetch_and_cache.py b/cheaper_main/src/tests/test_fetch_and_cache.py similarity index 100% rename from webscraper/src/tests/test_fetch_and_cache.py rename to cheaper_main/src/tests/test_fetch_and_cache.py diff --git a/webscraper/database/init_db.sql b/database/init_db.sql similarity index 100% rename from webscraper/database/init_db.sql rename to database/init_db.sql diff --git a/webscraper/.DS_Store b/webscraper/.DS_Store deleted file mode 100644 index 33e2dd0515d5d2244b11ce015677611c1d2b47a9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKOG*Pl5UtK+F``J8;;KD?|yxEIM3d)jz$iEj`!$~X2p8b zT4#aXRe;}JLIWDo*=o^ze}_+XW?kg@bX80dNl%{dKOWy+2hHsKm+a=cxvNJR_$IWW zWz2VcRcD%f%jINS@rAl|QS-I4G2dADZs~?*G2aFMP7?~5Cg1k*^yT5}e44HK+S!=z zo%zg60aL&f*mVWav)R&pK^sj0Q@|9c6yWcJhcjl1rC|7UUD(FnDbqNYrJBn zSPCKob5aVFQm0o8C*_EHU6(1Af>KUSFCR{so!(GfV8{MGEGL%<+Gq-x0)AtkMbdoq`kFuIPSF`K8CaLx>B$z!6dX|tmRgG1P_L|=Ls-V TECmsP*&l%*gAJy@pDOSP5P4jX diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc deleted file mode 100644 index 93f68b7ea1f4d0af94dd192898297982d34cad29..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1925 zcma)7&1w`u5T5@`cC%4q6g&uhU67n5h-eglAg+?=We5zt(>>XS`Lnx+b#pb3;R^^p zgl8XNuAcM-1W#7=>`o>UaeJAftGa%=zN)UJqtU>E_4DhsS?ycacVzm@1LhHY>OK%= zF*~zzJF;^pa%|+C%+0;X%l*jDgD9}A=N5CBcWyC{+aKL%gZanSIQRk9*4Sy>#|KZE zZ8}w2(nRN6&q_9S8}Cpk-FQz$qMPt_rA1lLEJ2(83V?Y8pZWlVTanGI$l)&c;O8@& zIiEZe+xuuofst=;?~cX%PYw&%#yQGEBi}Ug5agR|VB`ak8z~=xH?vC}upNTV78@Cz zEoMV5+s*KRDt??uT5&lUdrde6KCWff3~I$?Oiy{C8=vuvE}B4cUCSatAXf|bHhfRu zQ^**{&e^Y*h0ua#;*1kL<3zPF$Xw_d!62lfk^*X6s=WbOu5R;Ko~r$h!_(Ep_Q_jt zXFR++IwZPWy1t}*arb@l4)P!-4+5nU#VOhd26>5e(ZAaz4<=)XC2~$gK@?BQf+;ke zQ=!R}>pABIk^Ftll~$eYVu~C!*#M(UDJ-8^=-bw*F(W|JQ$sy<@MiaGR;r^=9e0#=qG9!?X1`tRp z%bZZ6DxQc`Bv2PUTdN|d*j57zfgHki481qyvmzM;P1_nw~$=4yV+sS%MBo?tMEaD zj$_{*z&C6^b2=G^jZ3GA#2L}X4fFz$X@eO5CNTQCDo7JPXtQ{TBgRbr>-Mhm2dZnq A761SM diff --git a/webscraper/api/__pycache__/interface.cpython-39.pyc b/webscraper/api/__pycache__/interface.cpython-39.pyc deleted file mode 100644 index ecfd58a6382569e0c3931fc3549532fe753a8aa1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 599 zcmZuu%}T>S5Z<4pjm2uglLsM(UILOQ5V7`9dMQG0p|EVTTN_ByxVus8wLXR~;9K|{ zdldwI1W)d6ELQ2jY-VON-}lWd?RGoB*yro<^4)yjY_e;fNv@2`zBw3RFb4%pV1W_@ z!3Z$K@DpH|!wpU_^QIv3Kg^#XLK+XQhx&-lr4lq%1y{=zOYPmR{lVuTfea*wWA1VP z6z~><&&Y9j8FPi;$lHlZutC)s-@H|!Bs8F6z$O17#bfMmzI)2 zqZF;|CDY`9k;xMYOla*I7J!;Pa+I z%xIO(vaPhZdepsJuE-A@VN_AuE}cZY?(dl={@*Z(0v!-i(t;DBJA@P~R_8W8B;>B9 zd2=JM0oIj?XfIX!ank^+9o=%HMB{>Mba2rq LxwH#*BlXZX)W?mM diff --git a/webscraper/output.json b/webscraper/output.json deleted file mode 100644 index e64146a..0000000 --- a/webscraper/output.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "/": [ - "A Light in the Attic", - "Tipping the Velvet", - "Soumission", - "Sharp Objects", - "Sapiens: A Brief History of Humankind", - "The Requiem Red", - "The Dirty Little Secrets of Getting Your Dream Job", - "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull", - "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics", - "The Black Maria", - "Starving Hearts (Triangular Trade Trilogy, #1)", - "Shakespeare's Sonnets", - "Set Me Free", - "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)", - "Rip it Up and Start Again", - "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991", - "Olio", - "Mesaerion: The Best Science Fiction Stories 1800-1849", - "Libertarianism for Beginners", - "It's Only the Himalayas" - ] -} \ No newline at end of file diff --git a/webscraper/src/__pycache__/__init__.cpython-39.pyc b/webscraper/src/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index eb72cb2fb66814e6b45ed4ee06016c6f598d6516..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 140 zcmYe~<>g`kf>#IlGpvF1V-N=!FakLaKwQiMBvKfH88jLFRx%WUgb~Cq!xXC+=Zw_E zg4Cjz;*!+FoRW;-%#zfY^30i_MyjK{U5A~AY#F%l7nw5zJ#3mW^KthZ)V=-%-i3adHcuw zd=ije<`!xz2Ef1Cm=ZfvP6R5?!2yT3P&}2SNYFN1qe+XDXdiXWrd6~E7{IAx9$0$> zU?0~hIWZn{;f>I*ka`jU&1UF9tm#spWsXqBw#;y2&Me;?ncXFf3<*f`TR$tG;k`>;uJ z@w=T5e){^}y>GMM@kc(F>OuBPuFAD~tD3C{ubJKC4{v5W$?x%6KQ8ENJ=QIgGs1X{ z+YT2pjH`N+d%o(V?g-(DY-M`BW8BQ7T5W6me7R@-JjweWID5Kdo9UB!lk~jtfn`Gyap}v z4X^2v9EKin2q)JiFN7zfgs=M-&_I8+zy3Q#fV4<5< ze^?HX_2HG72$I(a5*f~+xjr)219N?xT7FRsE`8MhYGTB!#iR7{ueWyT;D^5-2I;~u zT^OYcPuZ9)ykuAZVONK2b;MQ!wmME_c6tGO@Ac7KdNg-!0*1AC&W>1WXFhoMvjIh> z<)c$MWNRa~7O=H(D*aSV$7`gCg$Yo~41#^}X(0)R$yDJsJzTm{xIqtZSg7ZiNMTeg zA`250zg1SA=SrcQHjJt5Hr~0=of%T8L{$HuaDq07%gElaTTRdLU4C1vps9Z(xr1y% z2_a)h^`kL{OZ{k!VI{aY$FLk+oRb(BbN!`fYXQj&NoGVc6C*(upM5?76uTRd82tEG W`QSmMp0ocb7?-E}&)wpE1oS`8dpyGc diff --git a/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc b/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc deleted file mode 100644 index c187d480318297de82ea1ef34d6ec16f883aeb71..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 743 zcmYk3&2AGh5XU|1{YunA)kA?3E5v0FR6TI25CWkh4yYhnR1t!rW$h$e?H6OONLCw! zRIYsjKx&V?3Xk9`r@R6u#tEQ$wSRlY_TRs;lg-UOqn7>Lp5AvD`(=Zx&7>m>ZnB|rC){7%>#s(s;pas};KkM`Ieji|=pu)Xv4+41qq=#~7? zNTuIJPo++$SviTuSQXJn`F=lY`>&~_dyj2NON$;mCYZ`ZNK_`!Ss|-Nx7QC4D~!hf z_P5S{6o?uuX|pPXwb z^jZq>v42U|o^t6A@S${8PD*CkZ)awA9^XiDr{f_I_QxB)U=YS3=b5$+OmvLs8q#Mng9$wul09_Wl)S14$SHn-1-{3==t z1YUfM`pzv>LPs;=`+Zz@55^)cgghQebBxU0y*Qw_un%CX?*JMyaKb5P*L04!b&BV> zLN~aA`g2l|8?r=)=*!EH%;}tI%I%8&K-~FB7tmkD;I2Ej!0%Spf?#y9f3c+*-{9VY zeA}Gc+B&lFwuwu9Hhxw>X zp}Y#;C;CG<3`grv8D5IvUH(wXQJBU_<`>hN8KniO)H47?7$%tfwJn=^w1<1x#a-IQ z@-f`iZWNi=?_A3n0a7Cmt%(G0@ACtaf-Jvp5wd{3Mx(C<>lH w2makZ7Oh7sn`!;8V*;Y6;rIX^HC?<;b6!rw8#7+j1#qbgeQx14b)UNUA1pTAegFUf diff --git a/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc b/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index be884a59741c5ab5c5cf3abba483d4f081daa77f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 146 zcmYe~<>g`kf>#IlGgN`}V-N=!FakLaKwQiMBvKfH88jLFRx%WUgb~Cq^AxKX=Zw_E zg4Cjz;*!+FoRW;-%#zfY^3+%OniK1US>&ryk0@&Ee@O9{FKt1 MR6CG?pMjVG00NgImjD0& diff --git a/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc b/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc deleted file mode 100644 index c810c00763f92a89caca5925a827303ec0b66e54..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2583 zcmai0-EJF26rR~1d*e7!NCi!QC`;7~j2eejfKX^rrKqinkS|C8VMSVPJ>z=Q^{$y2 zw_v$f={*mCknB5p!-Mb$d&^Z{p;vro)~(|tG+ynQpPf19eBYco$MMQajp3L4v)=oA ziLrlaaQx?Ca2Ky`K?o*z%Gzbi+ZnYK&-%n)#B>D)MnB8eVFX+ zlKBpYfj2+uljU8!`U4096i0l_B5s_CYf&-ibXud!J!1xXXKU+Gos_&PFmv!g8yTmjx0e`s@Jv3M={V4; zb6|i&TN!l;Kon;pvTB=y-wIz#^TS{j`ZS#=<263I{x0ZEyJ3L<^4rI23Al;Qm>seS z7ktW}gNBJS=Ej{m9P<-TU^MZkjK+d!aa*tX)@%QE&-BwqMUkSyx1JVjvDQ+Vhx%cr zO`LUP;cBCbU^blZPLs+U1c7%S|aLfQvQ4A3crJM3nf~F{>*OjMwpuHs6W% z z(cu)e67#+qN<|3N1riho$FN?s-X>Xzb`YCh3Hc(H&5pP3j6Odf@M#qbxXCSw7JJ}b zbPSuYDWk(a4*}wj-LW$Zmde;WtO`4tg^H@f+AF$o zjiB&M(wB3N${SPWhbYOq`5V%{lUf(+C4mo=jEz+Hei_Co0wyuK;g`|lBgB*`d{Vky zHn-GeSUSn_hGn_Dn6lfI9g{qj8Ejm|Vois`ctEk`%;o5d^GQEL;{xl1n&RBD{v|qK ziy3hJ`v%ws>qlVyCtNBI&ymYnwzkdmC!Zm!((^N3%c9e2uOiZ-I06@GG*oHBE$Ssb z-^YoTx~Tmar$c#9sa)Btq^dA?l85&!4_Tf?l+~;rta=k?eAGN2^6||JSjiqmnZ^1S zI%Q4;PCntYNJDN#Y;Z)ESI$zb3HQi(?qan!cK<+~`O_s<&e-Lh+T~+cW$e#)*=R&O zis}?>n#!aK%N48&#+9QoouPNQL=}WqSaz3NqZ_q5ckYC{R4Bszq@QGuL*(3W&%{QD zL`sP7Wh7JT9If9^6eexX3k@p6PB;oeEjxK8^ytE#?JR!2xiG1F`7jmXBN=vMR00)> zp@gE4A_~?=_0ksvxY0XmgKXZ|?@1-Y`OescC^B$k+isH~?na|l_yd(>MqR?5`AsW) zl@BwauF$MY!rl{#fAt|mqiU~|x`uw+r>wlKK6=}~r9$*kdB~lUL3PCj6*(>=+(dY9 z>y=%*iEWxJ;eNSvx7PXE+jbQ_)UL1254y9Jw;D@D5Jg$smr+#IqNtyXVM_ga6j8P) zzo=!HSMQUctg1dHaf3vI#5^I}9#tSknUd0$CJ{J+7x;Kq0?&RSqy0F^q6lmU!z?kx zfF-=RHZSE(l=9}R+DEg0L**Za%O@6 Date: Sun, 8 Jun 2025 00:51:44 -0700 Subject: [PATCH 3/5] shell for background service --- cheaper_main/start_service.sh | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 cheaper_main/start_service.sh diff --git a/cheaper_main/start_service.sh b/cheaper_main/start_service.sh new file mode 100644 index 0000000..8277d63 --- /dev/null +++ b/cheaper_main/start_service.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +APP_NAME="Cheaper" +APP_FILE="main.py" +LOG_DIR="$HOME/CheaperLogs" + +# Allow user to override PYTHON_PATH by setting it externally +if [ -z "$PYTHON_PATH" ]; then + PYTHON_PATH=$(command -v python3) +fi + +if [ -z "$PYTHON_PATH" ]; then + PYTHON_PATH=$(command -v python) +fi + +# Final fallback for Windows users (optional) +if [ -z "$PYTHON_PATH" ] && [ -f "/c/Users/$USERNAME/AppData/Local/Programs/Python/Python39/python.exe" ]; then + PYTHON_PATH="/c/Users/$USERNAME/AppData/Local/Programs/Python/Python39/python.exe" +fi + +# Validate Python path +if ! "$PYTHON_PATH" --version > /dev/null 2>&1; then + echo "❌ Python not found. Please install it or set PYTHON_PATH manually." + exit 1 +fi + +echo "✅ Using Python at: $PYTHON_PATH" + +# Create log directory +mkdir -p "$LOG_DIR" + +# Start with PM2 +pm2 start "$APP_FILE" \ + --name "$APP_NAME" \ + --interpreter="$PYTHON_PATH" \ + --output "$LOG_DIR/out.log" \ + --error "$LOG_DIR/err.log" \ + --watch + +pm2 save From e9cf69751167ce13c62da3d26e29fd2aa20f5da5 Mon Sep 17 00:00:00 2001 From: James Cacapit Date: Fri, 13 Jun 2025 17:33:20 -0700 Subject: [PATCH 4/5] refactoring, editing for best practice, and retrieving ebay item --- cheaper_main/ABC/RetailerApi.py | 4 +-- cheaper_main/Scraper/Cheaper_Scraper.py | 4 +-- cheaper_main/api/__init__.py | 0 cheaper_main/api/ebay_api/EbayAPI.py | 27 +++++++------- cheaper_main/api/routes.py | 2 +- cheaper_main/main.py | 47 +++++++++++++++++++++---- webscraper/ABC/Ebay_API.py | 15 -------- 7 files changed, 58 insertions(+), 41 deletions(-) create mode 100644 cheaper_main/api/__init__.py delete mode 100644 webscraper/ABC/Ebay_API.py diff --git a/cheaper_main/ABC/RetailerApi.py b/cheaper_main/ABC/RetailerApi.py index a85cc57..3484f35 100644 --- a/cheaper_main/ABC/RetailerApi.py +++ b/cheaper_main/ABC/RetailerApi.py @@ -3,13 +3,13 @@ class RetailerApi(ABC): @abstractmethod - def retrieve_access_token() -> str: + def retrieve_access_token(self) -> str: """ retrieves the user access token for sandbox environment it's a long line of text, numbers, symbols """ pass @abstractmethod - def retrieve_response(httprequest:str,query:str) -> dict: + def retrieve_response(self,httprequest:str,query:str) -> dict: """ retrieves a json of large data with category ids, names, parentcategorynodes """ pass \ No newline at end of file diff --git a/cheaper_main/Scraper/Cheaper_Scraper.py b/cheaper_main/Scraper/Cheaper_Scraper.py index 3e3cb3a..3ee4bdf 100644 --- a/cheaper_main/Scraper/Cheaper_Scraper.py +++ b/cheaper_main/Scraper/Cheaper_Scraper.py @@ -6,15 +6,13 @@ from typing import Dict, List, Optional from cheaper_main.ABC.base_scraper import BaseScraper from cheaper_main.Scraper.robot_check import RoboCheck -from ..ABC import base_scraper from cheaper_main.Scraper.fetch_utils import cached_get from functools import lru_cache -from webscraper.api.EbayAPI import EbayItem -class CheaperScraper(BaseScraper, base_scraper): +class CheaperScraper(BaseScraper): def __init__(self, base_url: str = "", user_agent: str = "CheaperBot/0.1", delay: float = 2.0) -> None: """Initialize the scraper with base parameters. diff --git a/cheaper_main/api/__init__.py b/cheaper_main/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/ebay_api/EbayAPI.py b/cheaper_main/api/ebay_api/EbayAPI.py index ad5e5d9..0d55e16 100644 --- a/cheaper_main/api/ebay_api/EbayAPI.py +++ b/cheaper_main/api/ebay_api/EbayAPI.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv import os import logging -from ...ABC.RetailerApi import RetailerApi +from ABC.RetailerApi import RetailerApi # Load environment variables and configure logging load_dotenv() @@ -21,21 +21,22 @@ def __init__(self, name, price, currency, url, user_id=None): self.currency = currency self.url = url self.user_id = user_id + pass -class EbayAPI(EbayABC): - client_secret_key = os.getenv("clientsecret") - client_id_key = os.getenv("clientid") - get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) +class EbayAPI(RetailerApi): + def __init__(self): + self.client_secret_key = os.getenv("clientsecret") + self.client_id_key = os.getenv("clientid") + self.auth = HTTPBasicAuth(self.client_id_key, self.client_secret_key) - @staticmethod - def search_item(query: str) -> EbayItem: + def search_item(self,query: str) -> EbayItem: """Search for an item on eBay using the query string.""" if not isinstance(query, str) or not query.strip(): logger.warning("Invalid query input.") raise ValueError("Query must be a non-empty string.") logger.info(f"Searching eBay for: {query}") - response_json = EbayAPI.retrieve_ebay_response( + response_json = self.retrieve_response( "https://api.sandbox.ebay.com/buy/browse/v1/item_summary/search", query ) @@ -53,8 +54,7 @@ def search_item(query: str) -> EbayItem: logger.error(f"Item not found or response invalid: {response_json}") raise Exception("Could not parse item from eBay response.") from e - @staticmethod - def retrieve_access_token() -> str: + def retrieve_access_token(self) -> str: """Fetch access token from eBay API.""" logger.info("Requesting eBay access token...") try: @@ -65,7 +65,7 @@ def retrieve_access_token() -> str: "grant_type": "client_credentials", "scope": "https://api.ebay.com/oauth/api_scope" }, - auth=EbayAPI.get_user_key + auth=self.auth ) response.raise_for_status() token = response.json().get("access_token") @@ -78,10 +78,9 @@ def retrieve_access_token() -> str: logger.exception("Failed to retrieve token.") raise - @staticmethod - def retrieve_ebay_response(httprequest: str, query: str) -> dict: + def retrieve_response(self,httprequest: str, query: str) -> dict: """Perform GET request to eBay API.""" - auth = EbayAPI.retrieve_access_token() + auth = self.retrieve_access_token() logger.info(f"Making GET request to eBay API: {httprequest} with query: {query}") try: response = requests.get( diff --git a/cheaper_main/api/routes.py b/cheaper_main/api/routes.py index e74a287..d9ec0a9 100644 --- a/cheaper_main/api/routes.py +++ b/cheaper_main/api/routes.py @@ -3,7 +3,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) from flask import Flask, jsonify, request -from webscraper.src.Cheaper_Scraper import CheaperScraper +from Scraper.Cheaper_Scraper import CheaperScraper app = Flask(__name__) scraper = CheaperScraper(base_url="https://books.toscrape.com") diff --git a/cheaper_main/main.py b/cheaper_main/main.py index 13ff095..03018e1 100644 --- a/cheaper_main/main.py +++ b/cheaper_main/main.py @@ -1,12 +1,18 @@ +from flask import Flask, request , jsonify import json #import time # for testing # i added these imports below because when i ran it it wasnt finding the folders import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from webscraper.src.Cheaper_Scraper import CheaperScraper +from cheaper_main.Scraper.Cheaper_Scraper import CheaperScraper -def main(): +app = Flask(__name__) + +#python main.py will run it in the background git bash +#to stop put pm2 stop Cheaper in git bash +@app.route('/') +def scrape(): # Set up the scraper for a simple legal-to-scrape website scraper = CheaperScraper("https://books.toscrape.com", @@ -26,12 +32,41 @@ def main(): print("-", item) # Save the output to a JSON file - with open("output.json", "w") as f: - json.dump(results, f, indent=2) + #with open("output.json", "w") as f: + #json.dump(results, f, indent=2) + return jsonify(results) + +@app.route('/api/products/search', methods=['GET']) +def ebay_search(): + try: + from api.ebay_api.EbayAPI import EbayAPI + #instantiate object + ebay_api = EbayAPI() + product = request.args.get('product') + #The route will look like this + # http://127.0.0.1:5000/api/products/search?product= + #after product= type any generic item to receive json like ?product=clothes + #put that in the address bar + + print(f"product = {product}") + if not product: + return jsonify({"error": "missing ?product=parameter"}),400 + response = ebay_api.search_item(product) + return jsonify({ + "name": response.name, + "price": response.price, + "currency": response.currency, + "url": response.url + }) + + except Exception as e: + print("failed to import",e) + return jsonify({"error": str(e)}), 500 + -if __name__ == "__main__": - main() +if __name__ == "__main__":# + app.run(debug=True) diff --git a/webscraper/ABC/Ebay_API.py b/webscraper/ABC/Ebay_API.py deleted file mode 100644 index 2be5a07..0000000 --- a/webscraper/ABC/Ebay_API.py +++ /dev/null @@ -1,15 +0,0 @@ -from abc import ABC,abstractmethod - -class EbayApi(ABC): - - @abstractmethod - def retrieve_access_token() -> str: - """ retrieves the user access token for sandbox environment it's a long line - of text, numbers, symbols - """ - pass - - @abstractmethod - def retrieve_ebay_response(httprequest:str,query:str) -> dict: - """ retrieves a json of large data with category ids, names, parentcategorynodes """ - pass \ No newline at end of file From f9e9d0b430547e708577260f66de720581313496 Mon Sep 17 00:00:00 2001 From: James Cacapit Date: Fri, 4 Jul 2025 21:20:40 -0700 Subject: [PATCH 5/5] resolving Abraham Forz's comments --- cheaper_main/api/Etsy/EtsyApi.py | 13 +++++++------ cheaper_main/api/Etsy/generate_code_challenge.py | 5 +++-- cheaper_main/api/best_buy_api/best_buy_api.py | 4 ++-- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/cheaper_main/api/Etsy/EtsyApi.py b/cheaper_main/api/Etsy/EtsyApi.py index 247fd13..b843b0b 100644 --- a/cheaper_main/api/Etsy/EtsyApi.py +++ b/cheaper_main/api/Etsy/EtsyApi.py @@ -1,4 +1,4 @@ -from RetailerApi import RetailerApi +from cheaper_main.ABC.RetailerApi import RetailerApi import requests import os from generate_code_challenge import generate_code_challenge @@ -8,14 +8,16 @@ sharedsecret = os.getenv("etsysharedsecret") class Etsy(RetailerApi): - def retrieve_access_token(): + def retrieve_access_token(self): + # most likely this url will change and I will have a parameter set for it + # otherwise this default url will be used for testing purposes and development try: response = requests.post("https://api.etsy.com/v3/public/oauth/token", headers={"Content-Type': 'application/x-www-form-urlencoded"}, data = {"grant_type":"client_credentials", "scope":"listings_r", "client_id":f"{keystring}", - "code_challenge":f"{generate_code_challenge}", + "code_challenge":f"{generate_code_challenge.generate_code_challenge()}", "code_challenge_method":"S256" } @@ -25,6 +27,5 @@ def retrieve_access_token(): except Exception as e: raise e - def retrieve_response(): return - # TODO: when application gets approved - # use the auth token you get from etsy \ No newline at end of file + def retrieve_response(self): + raise NotImplementedError \ No newline at end of file diff --git a/cheaper_main/api/Etsy/generate_code_challenge.py b/cheaper_main/api/Etsy/generate_code_challenge.py index 0904335..62f35b0 100644 --- a/cheaper_main/api/Etsy/generate_code_challenge.py +++ b/cheaper_main/api/Etsy/generate_code_challenge.py @@ -4,8 +4,9 @@ class generate_code_challenge: - - def generate_code_challenge(): + # Will most likely be used only for APIs that require it + # If it gets used more than once I will make an Abstract Base Class + def generate_code_challenge() -> str: code_client = secrets.token_urlsafe(64) code_challenge = base64.urlsafe_b64encode(hashlib.sha256(code_client.encode()) .digest()).rstrip(b'=').decode() diff --git a/cheaper_main/api/best_buy_api/best_buy_api.py b/cheaper_main/api/best_buy_api/best_buy_api.py index 5d473aa..16adcfa 100644 --- a/cheaper_main/api/best_buy_api/best_buy_api.py +++ b/cheaper_main/api/best_buy_api/best_buy_api.py @@ -6,11 +6,11 @@ class best_buy_api(RetailerApi): - def retrieve_access_token(): + def retrieve_access_token(self): return - def retrieve_response(): + def retrieve_response(self): return