diff --git a/.dockerignore b/.dockerignore deleted file mode 100644 index 110986a..0000000 --- a/.dockerignore +++ /dev/null @@ -1,8 +0,0 @@ -__pycache__/ -*.pyc -*.pyo -*.pyd -.env -venv/ -.envrc -.git diff --git a/.gitignore b/.gitignore index 7a3168d..139ac7b 100644 --- a/.gitignore +++ b/.gitignore @@ -3,10 +3,6 @@ *.pyo *.pyd __pycache__/ -**/__pycache__/ -*.egg-info/ -dist/ -build/ *.log # === SQLite & output files === diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index c181805..0000000 --- a/Dockerfile +++ /dev/null @@ -1,18 +0,0 @@ -FROM continuumio/miniconda3 - -WORKDIR /app - -COPY environment.yml . - -RUN conda install -n base -c conda-forge mamba && \ - mamba env update -n base -f environment.yml && \ - conda clean --all --yes - -COPY . . - -ENV PYTHONUNBUFFERED=1 - -EXPOSE 8000 - -CMD ["gunicorn", "--bind", "0.0.0.0:8000", "wsgi_entry:application"] - diff --git a/accounts/__init__.py b/accounts/__init__.py index 00bcb6e..e69de29 100644 --- a/accounts/__init__.py +++ b/accounts/__init__.py @@ -1 +0,0 @@ -# test \ No newline at end of file diff --git a/accounts/models.py b/accounts/models.py index 28e8f2b..c79a3e2 100644 --- a/accounts/models.py +++ b/accounts/models.py @@ -14,8 +14,7 @@ def validate_email(value): class UserAccount(models.Model): email = models.EmailField(max_length=50, unique=True) - password_hash = models.CharField(max_length=100) - # password_hash = models.CharField(max_length=100, default='defaultpass123') # added default + password_hash = models.CharField(max_length=100, default='defaultpass123') # added default def clean(self): validate_email(self.email) @@ -28,7 +27,7 @@ class Product(models.Model): product_name = models.CharField(max_length=255, default='Unnamed Product') price = models.DecimalField(max_digits=10, decimal_places=2, default=0.00) url = models.TextField(default='https://example.com') - user = models.ForeignKey(UserAccount, on_delete=models.CASCADE, null=True) + user = models.ForeignKey(UserAccount, on_delete=models.CASCADE, null=True) # optional if user not yet created def __str__(self): return self.product_name diff --git a/dockerREADME.md b/dockerREADME.md deleted file mode 100644 index 40bfa03..0000000 --- a/dockerREADME.md +++ /dev/null @@ -1,77 +0,0 @@ -# Docker Deployment Guide - -### 1. Prerequisites - -- Install [Docker Desktop](https://www.docker.com/products/docker-desktop) -- Make sure Docker Engine is running - -### 2. Project Structure (Relevant Parts) - -``` -cheaper/ -├── cheaper/ -│ └── wsgi.py -├── environment.yml -├── Dockerfile -├── .dockerignore -├── main.py -├── setup.py -└── ... -``` - ---- - -### 3. Dockerfile - -We're using Miniconda and `environment.yml` (not `requirements.txt`) for dependency management. - -```dockerfile -FROM continuumio/miniconda3:latest - -WORKDIR /app - -COPY environment.yml . - -RUN conda install -n base -c conda-forge mamba && \ - mamba env update -n base -f environment.yml && \ - conda clean --all --yes - -COPY . . - -# ⏱️ Gunicorn timeout is increased to handle long scraping time -CMD ["gunicorn", "--timeout", "120", "cheaper.wsgi:application", "-b", "0.0.0.0:8000"] -``` - ---- - -### 4. .dockerignore - -```dockerignore -__pycache__/ -*.pyc -*.pyo -*.pyd -env/ -venv/ -.git -``` - ---- - -### 5. Build and Run - -```bash -# Build the Docker image -docker build -t cheaper-app . - -# Run the container on port 8000 -docker run --rm -p 8000:8000 cheaper-app -``` - -Open [http://localhost:8000](http://localhost:8000) — you should see: - -``` -Scraping complete. -``` - ---- diff --git a/environment.yml b/environment.yml index d36e8b9..1a6ff6f 100644 --- a/environment.yml +++ b/environment.yml @@ -12,4 +12,3 @@ dependencies: - pip: - beautifulsoup4 - lxml - - gunicorn diff --git a/setup.py b/setup.py deleted file mode 100644 index 4b6b39e..0000000 --- a/setup.py +++ /dev/null @@ -1,29 +0,0 @@ -from setuptools import setup, find_packages - -setup( - name='cheaper', - version='0.1', - packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), - include_package_data=True, - install_requires=[ - "beautifulsoup4", - "lxml", - "flask", - "pandas", - "numpy", - "requests", - "gunicorn", - ], - entry_points={ - 'console_scripts': [ - 'cheaper=webscraper.main:main', - ], - }, - - description='cheaper for now', - classifiers=[ - 'Programming Language :: Python :: 3', - 'Operating System :: OS Independent', - ], - python_requires='>=3.10', -) diff --git a/webscraper/api/EbayAPI.py b/webscraper/api/EbayAPI.py index 23f8cde..715913b 100644 --- a/webscraper/api/EbayAPI.py +++ b/webscraper/api/EbayAPI.py @@ -15,11 +15,12 @@ logger = logging.getLogger(__name__) class EbayItem: - def __init__(self, name, price, currency, url, user_id=None): + def __init__(self, name, price, currency, url, date, user_id=None): self.name = name self.price = price self.currency = currency self.url = url + self.date = date self.user_id = user_id class EbayAPI(EbayABC): @@ -28,30 +29,36 @@ class EbayAPI(EbayABC): get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) @staticmethod - def search_item(query: str) -> EbayItem: - """Search for an item on eBay using the query string.""" + def search_item(query: str) -> list[EbayItem]: + """Search for items on eBay and return a list of EbayItem objects.""" if not isinstance(query, str) or not query.strip(): logger.warning("Invalid query input.") raise ValueError("Query must be a non-empty string.") - + logger.info(f"Searching eBay for: {query}") response_json = EbayAPI.retrieve_ebay_response( "https://api.sandbox.ebay.com/buy/browse/v1/item_summary/search", query ) + results = [] try: - item = response_json["itemSummaries"][0] - logger.debug(f"Item found: {item}") - return EbayItem( - name=item.get("title"), - price=float(item["price"]["value"]), - currency=item["price"]["currency"], - url=item.get("itemWebUrl"), - user_id=None - ) - except (KeyError, IndexError) as e: - logger.error(f"Item not found or response invalid: {response_json}") - raise Exception("Could not parse item from eBay response.") from e + item_summaries = response_json["itemSummaries"] + for item in item_summaries: + ebay_item = EbayItem( + name=item.get("title"), + price=float(item["price"]["value"]), + currency=item["price"]["currency"], + url=item.get("itemWebUrl"), + date=item.get("itemCreationDate"), + user_id=None + ) + results.append(ebay_item) + return results + except (KeyError, IndexError, TypeError) as e: + logger.error(f"Item list not found or response invalid: {response_json}") + raise Exception("Could not parse items from eBay response.") from e + finally: + logger.debug(f"Search attempt complete for query: {query}") @staticmethod def retrieve_access_token() -> str: @@ -100,4 +107,4 @@ def retrieve_ebay_response(httprequest: str, query: str) -> dict: return response.json() except requests.exceptions.RequestException as e: logger.exception("Error retrieving eBay response.") - raise + raise Exception(f"Error retrieving eBay response: {str(e)}") from e \ No newline at end of file diff --git a/webscraper/api/tests/test_ebay_api.py b/webscraper/api/tests/test_ebay_api.py index 764d0c2..18e1917 100644 --- a/webscraper/api/tests/test_ebay_api.py +++ b/webscraper/api/tests/test_ebay_api.py @@ -25,12 +25,42 @@ def test_retrieve_access_token_real(self): self.assertGreater(len(token), 0) def test_search_item_real(self): - item = self.EbayAPI.search_item("macbook") - self.assertIsInstance(item.name, str) - self.assertIsInstance(item.price, float) - self.assertIsInstance(item.currency, str) - self.assertTrue(item.url.startswith("http")) - + items = self.EbayAPI.search_item("macbook") + self.assertIsInstance(items, list) + self.assertGreater(len(items), 0) + self.assertIsInstance(items[0].name, str) + self.assertIsInstance(items[0].price, float) + self.assertIsInstance(items[0].currency, str) + self.assertTrue(items[0].url.startswith("http")) + + @patch("webscraper.api.EbayAPI.requests.get") + def test_search_item_http_500(self, mock_get): + mock_get.return_value.status_code = 500 + mock_get.return_value.raise_for_status.side_effect = requests.exceptions.HTTPError("Internal Server Error") + + with self.assertRaises(Exception) as context: + self.EbayAPI.search_item("macbook") + + self.assertIn("Error retrieving eBay response", str(context.exception)) + + @patch("webscraper.api.EbayAPI.requests.get") + def test_search_item_http_404(self, mock_get): + mock_get.return_value.status_code = 404 + mock_get.return_value.raise_for_status.side_effect = requests.exceptions.HTTPError("Not Found") + + with self.assertRaises(Exception): + self.EbayAPI.search_item("macbook") + + @patch("webscraper.api.EbayAPI.EbayAPI.retrieve_ebay_response") + def test_search_item_no_items_in_response(self, mock_response): + mock_response.return_value = {} # Missing 'itemSummaries' + + with self.assertRaises(Exception) as context: + self.EbayAPI.search_item("macbook") + + self.assertIn("Could not parse items", str(context.exception)) + + def test_search_item_not_found(self): with self.assertRaises(Exception) as context: self.EbayAPI.search_item("asdkfjasldfjalskdfj") # nonsense query diff --git a/webscraper/main.py b/webscraper/main.py index d6569ad..c05f95c 100644 --- a/webscraper/main.py +++ b/webscraper/main.py @@ -1,23 +1,24 @@ import json -#import time # for testing -# i added these imports below because when i ran it it wasnt finding the folders +#import time // for testing +# i added htese imports below becasue when i ran it it wasnt finding the folders import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from webscraper.src.Cheaper_Scraper import CheaperScraper +from src.Cheaper_Scraper import CheaperScraper + def main(): + + + # Set up the scraper for a simple legal-to-scrape website - scraper = CheaperScraper("https://books.toscrape.com", - user_agent="CheaperBot/0.1", - delay=2.0) - + scraper = CheaperScraper("https://books.toscrape.com", user_agent="CheaperBot/0.1", delay=2.0) # Define which pages you want to scrape (you can use "/" for homepage) pages = ["/"] # Use the scraper to fetch and parse the pages - results = scraper.scrape(pages) + results = scraper.scrape(pages) # Show the output in the terminal for path, items in results.items(): @@ -31,3 +32,5 @@ def main(): if __name__ == "__main__": main() + + diff --git a/wsgi_entry.py b/wsgi_entry.py deleted file mode 100644 index e10a643..0000000 --- a/wsgi_entry.py +++ /dev/null @@ -1,9 +0,0 @@ -from webscraper.main import main - -def application(environ, start_response): - - main() - status = '200 OK' - headers = [('Content-type', 'text/plain')] - start_response(status, headers) - return [b"Scraping complete.\n"]