diff --git a/.gitignore b/.gitignore index 139ac7b..75b317f 100644 --- a/.gitignore +++ b/.gitignore @@ -24,7 +24,7 @@ venv/ .vscode/ # === macOS system files === -.DS_Store +*.DS_Store # === Pytest and test cache === htmlcov/ diff --git a/webscraper/ABC/Ebay_API.py b/cheaper_main/ABC/RetailerApi.py similarity index 71% rename from webscraper/ABC/Ebay_API.py rename to cheaper_main/ABC/RetailerApi.py index 2be5a07..3484f35 100644 --- a/webscraper/ABC/Ebay_API.py +++ b/cheaper_main/ABC/RetailerApi.py @@ -1,15 +1,15 @@ from abc import ABC,abstractmethod -class EbayApi(ABC): +class RetailerApi(ABC): @abstractmethod - def retrieve_access_token() -> str: + def retrieve_access_token(self) -> str: """ retrieves the user access token for sandbox environment it's a long line of text, numbers, symbols """ pass @abstractmethod - def retrieve_ebay_response(httprequest:str,query:str) -> dict: + def retrieve_response(self,httprequest:str,query:str) -> dict: """ retrieves a json of large data with category ids, names, parentcategorynodes """ pass \ No newline at end of file diff --git a/webscraper/package.json b/cheaper_main/ABC/__init__.py similarity index 100% rename from webscraper/package.json rename to cheaper_main/ABC/__init__.py diff --git a/webscraper/ABC/base_scraper.py b/cheaper_main/ABC/base_scraper.py similarity index 100% rename from webscraper/ABC/base_scraper.py rename to cheaper_main/ABC/base_scraper.py diff --git a/webscraper/api/interface.py b/cheaper_main/ABC/interface.py similarity index 100% rename from webscraper/api/interface.py rename to cheaper_main/ABC/interface.py diff --git a/webscraper/src/Cheaper_Scraper.py b/cheaper_main/Scraper/Cheaper_Scraper.py similarity index 90% rename from webscraper/src/Cheaper_Scraper.py rename to cheaper_main/Scraper/Cheaper_Scraper.py index ce83e2f..3ee4bdf 100644 --- a/webscraper/src/Cheaper_Scraper.py +++ b/cheaper_main/Scraper/Cheaper_Scraper.py @@ -4,17 +4,15 @@ from urllib.parse import urlparse import logging from typing import Dict, List, Optional -from webscraper.ABC.base_scraper import BaseScraper -from webscraper.src.robot_check import RoboCheck -from webscraper.api.interface import ScraperAPIInterface -from webscraper.src.fetch_utils import cached_get +from cheaper_main.ABC.base_scraper import BaseScraper +from cheaper_main.Scraper.robot_check import RoboCheck +from cheaper_main.Scraper.fetch_utils import cached_get from functools import lru_cache -from webscraper.api.EbayAPI import EbayItem -class CheaperScraper(BaseScraper, ScraperAPIInterface): +class CheaperScraper(BaseScraper): def __init__(self, base_url: str = "", user_agent: str = "CheaperBot/0.1", delay: float = 2.0) -> None: """Initialize the scraper with base parameters. diff --git a/webscraper/src/__init__.py b/cheaper_main/Scraper/__init__.py similarity index 100% rename from webscraper/src/__init__.py rename to cheaper_main/Scraper/__init__.py diff --git a/webscraper/src/fetch_utils.py b/cheaper_main/Scraper/fetch_utils.py similarity index 100% rename from webscraper/src/fetch_utils.py rename to cheaper_main/Scraper/fetch_utils.py diff --git a/webscraper/src/robot_check.py b/cheaper_main/Scraper/robot_check.py similarity index 100% rename from webscraper/src/robot_check.py rename to cheaper_main/Scraper/robot_check.py diff --git a/cheaper_main/api/Etsy/EtsyApi.py b/cheaper_main/api/Etsy/EtsyApi.py new file mode 100644 index 0000000..b843b0b --- /dev/null +++ b/cheaper_main/api/Etsy/EtsyApi.py @@ -0,0 +1,31 @@ +from cheaper_main.ABC.RetailerApi import RetailerApi +import requests +import os +from generate_code_challenge import generate_code_challenge + + +keystring = os.getenv("etsykeystring") +sharedsecret = os.getenv("etsysharedsecret") + +class Etsy(RetailerApi): + def retrieve_access_token(self): + # most likely this url will change and I will have a parameter set for it + # otherwise this default url will be used for testing purposes and development + try: + response = requests.post("https://api.etsy.com/v3/public/oauth/token", + headers={"Content-Type': 'application/x-www-form-urlencoded"}, + data = {"grant_type":"client_credentials", + "scope":"listings_r", + "client_id":f"{keystring}", + "code_challenge":f"{generate_code_challenge.generate_code_challenge()}", + "code_challenge_method":"S256" + } + + ) + if(response.status_code == 200): + data = response.json() + except Exception as e: + raise e + + def retrieve_response(self): + raise NotImplementedError \ No newline at end of file diff --git a/cheaper_main/api/Etsy/__init__.py b/cheaper_main/api/Etsy/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/Etsy/generate_code_challenge.py b/cheaper_main/api/Etsy/generate_code_challenge.py new file mode 100644 index 0000000..62f35b0 --- /dev/null +++ b/cheaper_main/api/Etsy/generate_code_challenge.py @@ -0,0 +1,15 @@ +import secrets +import hashlib +import base64 + + +class generate_code_challenge: + # Will most likely be used only for APIs that require it + # If it gets used more than once I will make an Abstract Base Class + def generate_code_challenge() -> str: + code_client = secrets.token_urlsafe(64) + code_challenge = base64.urlsafe_b64encode(hashlib.sha256(code_client.encode()) + .digest()).rstrip(b'=').decode() + return code_challenge + + \ No newline at end of file diff --git a/cheaper_main/api/__init__.py b/cheaper_main/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/best_buy_api/__init__.py b/cheaper_main/api/best_buy_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/cheaper_main/api/best_buy_api/best_buy_api.py b/cheaper_main/api/best_buy_api/best_buy_api.py new file mode 100644 index 0000000..16adcfa --- /dev/null +++ b/cheaper_main/api/best_buy_api/best_buy_api.py @@ -0,0 +1,16 @@ +import os +from cheaper_main.ABC import RetailerApi + +best_buy = os.getenv("bestbuysecret") + + +class best_buy_api(RetailerApi): + + def retrieve_access_token(self): + + return + + + def retrieve_response(self): + + return diff --git a/webscraper/api/EbayAPI.py b/cheaper_main/api/ebay_api/EbayAPI.py similarity index 84% rename from webscraper/api/EbayAPI.py rename to cheaper_main/api/ebay_api/EbayAPI.py index 715913b..0ac3424 100644 --- a/webscraper/api/EbayAPI.py +++ b/cheaper_main/api/ebay_api/EbayAPI.py @@ -3,7 +3,7 @@ from dotenv import load_dotenv import os import logging -from webscraper.api.interface import EbayABC +from ABC.RetailerApi import RetailerApi # Load environment variables and configure logging load_dotenv() @@ -22,21 +22,22 @@ def __init__(self, name, price, currency, url, date, user_id=None): self.url = url self.date = date self.user_id = user_id + pass -class EbayAPI(EbayABC): - client_secret_key = os.getenv("clientsecret") - client_id_key = os.getenv("clientid") - get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) +class EbayAPI(RetailerApi): + def __init__(self): + self.client_secret_key = os.getenv("clientsecret") + self.client_id_key = os.getenv("clientid") + self.auth = HTTPBasicAuth(self.client_id_key, self.client_secret_key) - @staticmethod - def search_item(query: str) -> list[EbayItem]: - """Search for items on eBay and return a list of EbayItem objects.""" + def search_item(self,query: str) -> EbayItem: + """Search for an item on eBay using the query string.""" if not isinstance(query, str) or not query.strip(): logger.warning("Invalid query input.") raise ValueError("Query must be a non-empty string.") logger.info(f"Searching eBay for: {query}") - response_json = EbayAPI.retrieve_ebay_response( + response_json = self.retrieve_response( "https://api.sandbox.ebay.com/buy/browse/v1/item_summary/search", query ) @@ -60,8 +61,7 @@ def search_item(query: str) -> list[EbayItem]: finally: logger.debug(f"Search attempt complete for query: {query}") - @staticmethod - def retrieve_access_token() -> str: + def retrieve_access_token(self) -> str: """Fetch access token from eBay API.""" logger.info("Requesting eBay access token...") try: @@ -72,7 +72,7 @@ def retrieve_access_token() -> str: "grant_type": "client_credentials", "scope": "https://api.ebay.com/oauth/api_scope" }, - auth=EbayAPI.get_user_key + auth=self.auth ) response.raise_for_status() token = response.json().get("access_token") @@ -85,10 +85,9 @@ def retrieve_access_token() -> str: logger.exception("Failed to retrieve token.") raise - @staticmethod - def retrieve_ebay_response(httprequest: str, query: str) -> dict: + def retrieve_response(self,httprequest: str, query: str) -> dict: """Perform GET request to eBay API.""" - auth = EbayAPI.retrieve_access_token() + auth = self.retrieve_access_token() logger.info(f"Making GET request to eBay API: {httprequest} with query: {query}") try: response = requests.get( diff --git a/cheaper_main/api/ebay_api/__init__.py b/cheaper_main/api/ebay_api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/webscraper/api/routes.py b/cheaper_main/api/routes.py similarity index 93% rename from webscraper/api/routes.py rename to cheaper_main/api/routes.py index e74a287..d9ec0a9 100644 --- a/webscraper/api/routes.py +++ b/cheaper_main/api/routes.py @@ -3,7 +3,7 @@ sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))) from flask import Flask, jsonify, request -from webscraper.src.Cheaper_Scraper import CheaperScraper +from Scraper.Cheaper_Scraper import CheaperScraper app = Flask(__name__) scraper = CheaperScraper(base_url="https://books.toscrape.com") diff --git a/webscraper/api/tests/test_ebay_api.py b/cheaper_main/api/tests/test_ebay_api.py similarity index 99% rename from webscraper/api/tests/test_ebay_api.py rename to cheaper_main/api/tests/test_ebay_api.py index 18e1917..aa397f1 100644 --- a/webscraper/api/tests/test_ebay_api.py +++ b/cheaper_main/api/tests/test_ebay_api.py @@ -8,7 +8,7 @@ import unittest from unittest.mock import patch,Mock import requests -from webscraper.api.EbayAPI import EbayAPI +from ...api.ebay_api import EbayAPI from dotenv import load_dotenv load_dotenv() diff --git a/webscraper/api/tests/test_routes.py b/cheaper_main/api/tests/test_routes.py similarity index 100% rename from webscraper/api/tests/test_routes.py rename to cheaper_main/api/tests/test_routes.py diff --git a/cheaper_main/main.py b/cheaper_main/main.py new file mode 100644 index 0000000..03018e1 --- /dev/null +++ b/cheaper_main/main.py @@ -0,0 +1,72 @@ +from flask import Flask, request , jsonify +import json +#import time # for testing +# i added these imports below because when i ran it it wasnt finding the folders +import sys +import os +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from cheaper_main.Scraper.Cheaper_Scraper import CheaperScraper + +app = Flask(__name__) + +#python main.py will run it in the background git bash +#to stop put pm2 stop Cheaper in git bash +@app.route('/') +def scrape(): + + # Set up the scraper for a simple legal-to-scrape website + scraper = CheaperScraper("https://books.toscrape.com", + user_agent="CheaperBot/0.1", + delay=2.0) + + # Define which pages you want to scrape (you can use "/" for homepage) + pages = ["/"] + + # Use the scraper to fetch and parse the pages + results = scraper.scrape(pages) + + # Show the output in the terminal + for path, items in results.items(): + print(f"\nScraped from {path}:") + for item in items: + print("-", item) + + # Save the output to a JSON file + #with open("output.json", "w") as f: + #json.dump(results, f, indent=2) + return jsonify(results) + +@app.route('/api/products/search', methods=['GET']) +def ebay_search(): + try: + from api.ebay_api.EbayAPI import EbayAPI + #instantiate object + ebay_api = EbayAPI() + + product = request.args.get('product') + #The route will look like this + # http://127.0.0.1:5000/api/products/search?product= + #after product= type any generic item to receive json like ?product=clothes + #put that in the address bar + + print(f"product = {product}") + if not product: + return jsonify({"error": "missing ?product=parameter"}),400 + response = ebay_api.search_item(product) + + return jsonify({ + "name": response.name, + "price": response.price, + "currency": response.currency, + "url": response.url + }) + + except Exception as e: + print("failed to import",e) + return jsonify({"error": str(e)}), 500 + + + + +if __name__ == "__main__":# + app.run(debug=True) diff --git a/cheaper_main/package.json b/cheaper_main/package.json new file mode 100644 index 0000000..e69de29 diff --git a/webscraper/src/tests/__init__.py b/cheaper_main/src/tests/__init__.py similarity index 100% rename from webscraper/src/tests/__init__.py rename to cheaper_main/src/tests/__init__.py diff --git a/webscraper/src/tests/test_cheaper_scraper.py b/cheaper_main/src/tests/test_cheaper_scraper.py similarity index 100% rename from webscraper/src/tests/test_cheaper_scraper.py rename to cheaper_main/src/tests/test_cheaper_scraper.py diff --git a/webscraper/src/tests/test_fetch_and_cache.py b/cheaper_main/src/tests/test_fetch_and_cache.py similarity index 100% rename from webscraper/src/tests/test_fetch_and_cache.py rename to cheaper_main/src/tests/test_fetch_and_cache.py diff --git a/cheaper_main/start_service.sh b/cheaper_main/start_service.sh new file mode 100644 index 0000000..8277d63 --- /dev/null +++ b/cheaper_main/start_service.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +APP_NAME="Cheaper" +APP_FILE="main.py" +LOG_DIR="$HOME/CheaperLogs" + +# Allow user to override PYTHON_PATH by setting it externally +if [ -z "$PYTHON_PATH" ]; then + PYTHON_PATH=$(command -v python3) +fi + +if [ -z "$PYTHON_PATH" ]; then + PYTHON_PATH=$(command -v python) +fi + +# Final fallback for Windows users (optional) +if [ -z "$PYTHON_PATH" ] && [ -f "/c/Users/$USERNAME/AppData/Local/Programs/Python/Python39/python.exe" ]; then + PYTHON_PATH="/c/Users/$USERNAME/AppData/Local/Programs/Python/Python39/python.exe" +fi + +# Validate Python path +if ! "$PYTHON_PATH" --version > /dev/null 2>&1; then + echo "❌ Python not found. Please install it or set PYTHON_PATH manually." + exit 1 +fi + +echo "✅ Using Python at: $PYTHON_PATH" + +# Create log directory +mkdir -p "$LOG_DIR" + +# Start with PM2 +pm2 start "$APP_FILE" \ + --name "$APP_NAME" \ + --interpreter="$PYTHON_PATH" \ + --output "$LOG_DIR/out.log" \ + --error "$LOG_DIR/err.log" \ + --watch + +pm2 save diff --git a/webscraper/database/init_db.sql b/database/init_db.sql similarity index 100% rename from webscraper/database/init_db.sql rename to database/init_db.sql diff --git a/webscraper/database/product_sample_data.sql b/database/product_sample_data.sql similarity index 100% rename from webscraper/database/product_sample_data.sql rename to database/product_sample_data.sql diff --git a/webscraper/database/user_sample_data.sql b/database/user_sample_data.sql similarity index 100% rename from webscraper/database/user_sample_data.sql rename to database/user_sample_data.sql diff --git a/webscraper/.DS_Store b/webscraper/.DS_Store deleted file mode 100644 index 33e2dd0..0000000 Binary files a/webscraper/.DS_Store and /dev/null differ diff --git a/webscraper/main.py b/webscraper/main.py deleted file mode 100644 index c05f95c..0000000 --- a/webscraper/main.py +++ /dev/null @@ -1,36 +0,0 @@ -import json -#import time // for testing -# i added htese imports below becasue when i ran it it wasnt finding the folders -import sys -import os -sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from src.Cheaper_Scraper import CheaperScraper - - -def main(): - - - - - # Set up the scraper for a simple legal-to-scrape website - scraper = CheaperScraper("https://books.toscrape.com", user_agent="CheaperBot/0.1", delay=2.0) - # Define which pages you want to scrape (you can use "/" for homepage) - pages = ["/"] - - # Use the scraper to fetch and parse the pages - results = scraper.scrape(pages) - - # Show the output in the terminal - for path, items in results.items(): - print(f"\nScraped from {path}:") - for item in items: - print("-", item) - - # Save the output to a JSON file - with open("output.json", "w") as f: - json.dump(results, f, indent=2) - -if __name__ == "__main__": - main() - - diff --git a/webscraper/output.json b/webscraper/output.json deleted file mode 100644 index e64146a..0000000 --- a/webscraper/output.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "/": [ - "A Light in the Attic", - "Tipping the Velvet", - "Soumission", - "Sharp Objects", - "Sapiens: A Brief History of Humankind", - "The Requiem Red", - "The Dirty Little Secrets of Getting Your Dream Job", - "The Coming Woman: A Novel Based on the Life of the Infamous Feminist, Victoria Woodhull", - "The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics", - "The Black Maria", - "Starving Hearts (Triangular Trade Trilogy, #1)", - "Shakespeare's Sonnets", - "Set Me Free", - "Scott Pilgrim's Precious Little Life (Scott Pilgrim #1)", - "Rip it Up and Start Again", - "Our Band Could Be Your Life: Scenes from the American Indie Underground, 1981-1991", - "Olio", - "Mesaerion: The Best Science Fiction Stories 1800-1849", - "Libertarianism for Beginners", - "It's Only the Himalayas" - ] -} \ No newline at end of file