diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..110986a --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +__pycache__/ +*.pyc +*.pyo +*.pyd +.env +venv/ +.envrc +.git diff --git a/.env b/.env deleted file mode 100644 index e69de29..0000000 diff --git a/.gitignore b/.gitignore index e69de29..7a3168d 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1,58 @@ +# === Python build artifacts === +*.pyc +*.pyo +*.pyd +__pycache__/ +**/__pycache__/ +*.egg-info/ +dist/ +build/ +*.log + +# === SQLite & output files === +*.sqlite3 +*.db +output.json + +# === Environment variables === +.env +.env.* +*.env + +# === Virtual environments === +venv/ +.venv/ +.env/ + +# === VSCode project settings === +.vscode/ + +# === macOS system files === +.DS_Store + +# === Pytest and test cache === +htmlcov/ +.coverage +.cache/ +pytest_cache/ +.tox/ + +# === Jupyter Notebook === +.ipynb_checkpoints/ + +# === Django migration artifacts (optional to ignore) === +# Uncomment the lines below if you want to regenerate migrations often +# **/migrations/*.py +# **/migrations/*.pyc +# !**/migrations/__init__.py + +# === FastAPI-specific artifacts === +fastapi_email/email_db.sqlite3 + +# === IDE-specific === +.idea/ +*.sublime-project +*.sublime-workspace + +# === GitHub Codespaces or devcontainers === +.devcontainer/ diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..b2d293c --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,14 @@ +{ + "python.analysis.extraPaths": [ + "./webscraper/ABC" + ], + "python.testing.unittestArgs": [ + "-v", + "-s", + "./webscraper", + "-p", + "*test*.py" + ], + "python.testing.pytestEnabled": false, + "python.testing.unittestEnabled": true +} \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c181805 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM continuumio/miniconda3 + +WORKDIR /app + +COPY environment.yml . + +RUN conda install -n base -c conda-forge mamba && \ + mamba env update -n base -f environment.yml && \ + conda clean --all --yes + +COPY . . + +ENV PYTHONUNBUFFERED=1 + +EXPOSE 8000 + +CMD ["gunicorn", "--bind", "0.0.0.0:8000", "wsgi_entry:application"] + diff --git a/README.md b/README.md index e4a5606..37c1d57 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Initial Landing page![Initial Landing page](https://github.com/user-attachments/ -To run the scraper, execute the main.py script by running the command -python src/main.py +python main.py -Make sure you are in the webscraper directory when you run the command @@ -25,3 +25,15 @@ if __name__ == "__main__": ##what file needs to be located and what variables would need to be changed if you wanted to scrape another website? -If you wanted to scrape another website, you need to locate the file main.py and change the variables “scraper” and “pages” to whatever website you wanted and the new URl paths. As well ensure the website allows scraping. + + + +Documentation on connecting the database to vscode with the postgres extension + +1. Install the PostgreSQL Extension in VSCode +2. Make sure PostgreSQL is Running Locally +3. click the extension on the left sidebar +4. click the plus button and create a new connection +5. fill in the needed information, server = localhost, database = cheaper_local, User = postgres, port = 5432 (default), password = the password you made when installing PostgreSQL +7. You should be connected now and see a message and see the conencted database in the extension now. + \ No newline at end of file diff --git a/accounts/__init__.py b/accounts/__init__.py index e69de29..00bcb6e 100644 --- a/accounts/__init__.py +++ b/accounts/__init__.py @@ -0,0 +1 @@ +# test \ No newline at end of file diff --git a/accounts/__pycache__/__init__.cpython-312.pyc b/accounts/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 228acde..0000000 Binary files a/accounts/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/admin.cpython-312.pyc b/accounts/__pycache__/admin.cpython-312.pyc deleted file mode 100644 index 5a69a0d..0000000 Binary files a/accounts/__pycache__/admin.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/apps.cpython-312.pyc b/accounts/__pycache__/apps.cpython-312.pyc deleted file mode 100644 index 33636a4..0000000 Binary files a/accounts/__pycache__/apps.cpython-312.pyc and /dev/null differ diff --git a/accounts/__pycache__/models.cpython-312.pyc b/accounts/__pycache__/models.cpython-312.pyc deleted file mode 100644 index 8e8be97..0000000 Binary files a/accounts/__pycache__/models.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py b/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py new file mode 100644 index 0000000..e478a27 --- /dev/null +++ b/accounts/migrations/0002_remove_product_name_remove_product_source_url_and_more.py @@ -0,0 +1,55 @@ +# Generated by Django 5.2 on 2025-05-05 19:14 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("accounts", "0001_initial"), + ] + + operations = [ + migrations.RemoveField( + model_name="product", + name="name", + ), + migrations.RemoveField( + model_name="product", + name="source_url", + ), + migrations.RemoveField( + model_name="useraccount", + name="password", + ), + migrations.AddField( + model_name="product", + name="product_name", + field=models.CharField(default="Unnamed Product", max_length=255), + ), + migrations.AddField( + model_name="product", + name="url", + field=models.TextField(default="https://example.com"), + ), + migrations.AddField( + model_name="product", + name="user", + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.CASCADE, + to="accounts.useraccount", + ), + ), + migrations.AddField( + model_name="useraccount", + name="password_hash", + field=models.CharField(default="defaultpass123", max_length=100), + ), + migrations.AlterField( + model_name="product", + name="price", + field=models.DecimalField(decimal_places=2, default=0.0, max_digits=10), + ), + ] diff --git a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc b/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc deleted file mode 100644 index 2a79d5d..0000000 Binary files a/accounts/migrations/__pycache__/0001_initial.cpython-312.pyc and /dev/null differ diff --git a/accounts/migrations/__pycache__/__init__.cpython-312.pyc b/accounts/migrations/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index f9a196d..0000000 Binary files a/accounts/migrations/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/accounts/models.py b/accounts/models.py index 3ff6537..28e8f2b 100644 --- a/accounts/models.py +++ b/accounts/models.py @@ -14,7 +14,8 @@ def validate_email(value): class UserAccount(models.Model): email = models.EmailField(max_length=50, unique=True) - password = models.CharField(max_length=100) + password_hash = models.CharField(max_length=100) + # password_hash = models.CharField(max_length=100, default='defaultpass123') # added default def clean(self): validate_email(self.email) @@ -22,10 +23,12 @@ def clean(self): def __str__(self): return self.email + class Product(models.Model): - name = models.CharField(max_length=200) - price = models.CharField(max_length=10) - source_url = models.URLField(max_length=150) + product_name = models.CharField(max_length=255, default='Unnamed Product') + price = models.DecimalField(max_digits=10, decimal_places=2, default=0.00) + url = models.TextField(default='https://example.com') + user = models.ForeignKey(UserAccount, on_delete=models.CASCADE, null=True) def __str__(self): - return self.name \ No newline at end of file + return self.product_name diff --git a/accounts/views.py b/accounts/views.py index 91ea44a..2ebc3d3 100644 --- a/accounts/views.py +++ b/accounts/views.py @@ -1,3 +1,10 @@ from django.shortcuts import render +from django.http import JsonResponse +from .models import Product + +def product_list(request): + products = Product.objects.all() + data = [{"name": p.product_name, "price": float(p.price), "url": p.url} for p in products] + return JsonResponse(data, safe=False) # Create your views here. diff --git a/cheaper/__pycache__/__init__.cpython-312.pyc b/cheaper/__pycache__/__init__.cpython-312.pyc deleted file mode 100644 index 65ab433..0000000 Binary files a/cheaper/__pycache__/__init__.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/settings.cpython-312.pyc b/cheaper/__pycache__/settings.cpython-312.pyc deleted file mode 100644 index 048f033..0000000 Binary files a/cheaper/__pycache__/settings.cpython-312.pyc and /dev/null differ diff --git a/cheaper/__pycache__/urls.cpython-312.pyc b/cheaper/__pycache__/urls.cpython-312.pyc deleted file mode 100644 index 8e83205..0000000 Binary files a/cheaper/__pycache__/urls.cpython-312.pyc and /dev/null differ diff --git a/cheaper/urls.py b/cheaper/urls.py index 49e4fb1..3a5aea7 100644 --- a/cheaper/urls.py +++ b/cheaper/urls.py @@ -16,7 +16,9 @@ """ from django.contrib import admin from django.urls import path +from accounts.views import product_list urlpatterns = [ path('admin/', admin.site.urls), + path('', product_list, name='product_list'), # This sets the homepage ] diff --git a/db.sqlite3 b/db.sqlite3 index 9a49d22..fc5a943 100644 Binary files a/db.sqlite3 and b/db.sqlite3 differ diff --git a/dockerREADME.md b/dockerREADME.md new file mode 100644 index 0000000..40bfa03 --- /dev/null +++ b/dockerREADME.md @@ -0,0 +1,77 @@ +# Docker Deployment Guide + +### 1. Prerequisites + +- Install [Docker Desktop](https://www.docker.com/products/docker-desktop) +- Make sure Docker Engine is running + +### 2. Project Structure (Relevant Parts) + +``` +cheaper/ +├── cheaper/ +│ └── wsgi.py +├── environment.yml +├── Dockerfile +├── .dockerignore +├── main.py +├── setup.py +└── ... +``` + +--- + +### 3. Dockerfile + +We're using Miniconda and `environment.yml` (not `requirements.txt`) for dependency management. + +```dockerfile +FROM continuumio/miniconda3:latest + +WORKDIR /app + +COPY environment.yml . + +RUN conda install -n base -c conda-forge mamba && \ + mamba env update -n base -f environment.yml && \ + conda clean --all --yes + +COPY . . + +# ⏱️ Gunicorn timeout is increased to handle long scraping time +CMD ["gunicorn", "--timeout", "120", "cheaper.wsgi:application", "-b", "0.0.0.0:8000"] +``` + +--- + +### 4. .dockerignore + +```dockerignore +__pycache__/ +*.pyc +*.pyo +*.pyd +env/ +venv/ +.git +``` + +--- + +### 5. Build and Run + +```bash +# Build the Docker image +docker build -t cheaper-app . + +# Run the container on port 8000 +docker run --rm -p 8000:8000 cheaper-app +``` + +Open [http://localhost:8000](http://localhost:8000) — you should see: + +``` +Scraping complete. +``` + +--- diff --git a/environment.yml b/environment.yml index 1a6ff6f..d36e8b9 100644 --- a/environment.yml +++ b/environment.yml @@ -12,3 +12,4 @@ dependencies: - pip: - beautifulsoup4 - lxml + - gunicorn diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..4b6b39e --- /dev/null +++ b/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup, find_packages + +setup( + name='cheaper', + version='0.1', + packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*"]), + include_package_data=True, + install_requires=[ + "beautifulsoup4", + "lxml", + "flask", + "pandas", + "numpy", + "requests", + "gunicorn", + ], + entry_points={ + 'console_scripts': [ + 'cheaper=webscraper.main:main', + ], + }, + + description='cheaper for now', + classifiers=[ + 'Programming Language :: Python :: 3', + 'Operating System :: OS Independent', + ], + python_requires='>=3.10', +) diff --git a/webscraper/ABC/Ebay_API.py b/webscraper/ABC/Ebay_API.py new file mode 100644 index 0000000..2be5a07 --- /dev/null +++ b/webscraper/ABC/Ebay_API.py @@ -0,0 +1,15 @@ +from abc import ABC,abstractmethod + +class EbayApi(ABC): + + @abstractmethod + def retrieve_access_token() -> str: + """ retrieves the user access token for sandbox environment it's a long line + of text, numbers, symbols + """ + pass + + @abstractmethod + def retrieve_ebay_response(httprequest:str,query:str) -> dict: + """ retrieves a json of large data with category ids, names, parentcategorynodes """ + pass \ No newline at end of file diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc deleted file mode 100644 index 2dc46e3..0000000 Binary files a/webscraper/ABC/__pycache__/base_scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc b/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc deleted file mode 100644 index dd1704d..0000000 Binary files a/webscraper/ABC/__pycache__/base_scraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/api/EbayAPI.py b/webscraper/api/EbayAPI.py new file mode 100644 index 0000000..23f8cde --- /dev/null +++ b/webscraper/api/EbayAPI.py @@ -0,0 +1,103 @@ +import requests +from requests.auth import HTTPBasicAuth +from dotenv import load_dotenv +import os +import logging +from webscraper.api.interface import EbayABC + +# Load environment variables and configure logging +load_dotenv() + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s" +) +logger = logging.getLogger(__name__) + +class EbayItem: + def __init__(self, name, price, currency, url, user_id=None): + self.name = name + self.price = price + self.currency = currency + self.url = url + self.user_id = user_id + +class EbayAPI(EbayABC): + client_secret_key = os.getenv("clientsecret") + client_id_key = os.getenv("clientid") + get_user_key = HTTPBasicAuth(client_id_key, client_secret_key) + + @staticmethod + def search_item(query: str) -> EbayItem: + """Search for an item on eBay using the query string.""" + if not isinstance(query, str) or not query.strip(): + logger.warning("Invalid query input.") + raise ValueError("Query must be a non-empty string.") + + logger.info(f"Searching eBay for: {query}") + response_json = EbayAPI.retrieve_ebay_response( + "https://api.sandbox.ebay.com/buy/browse/v1/item_summary/search", query + ) + + try: + item = response_json["itemSummaries"][0] + logger.debug(f"Item found: {item}") + return EbayItem( + name=item.get("title"), + price=float(item["price"]["value"]), + currency=item["price"]["currency"], + url=item.get("itemWebUrl"), + user_id=None + ) + except (KeyError, IndexError) as e: + logger.error(f"Item not found or response invalid: {response_json}") + raise Exception("Could not parse item from eBay response.") from e + + @staticmethod + def retrieve_access_token() -> str: + """Fetch access token from eBay API.""" + logger.info("Requesting eBay access token...") + try: + response = requests.post( + "https://api.sandbox.ebay.com/identity/v1/oauth2/token", + headers={"Content-Type": "application/x-www-form-urlencoded"}, + data={ + "grant_type": "client_credentials", + "scope": "https://api.ebay.com/oauth/api_scope" + }, + auth=EbayAPI.get_user_key + ) + response.raise_for_status() + token = response.json().get("access_token") + if not token: + logger.error("Access token missing from response.") + raise Exception("Access token not found in response.") + logger.info("Access token successfully retrieved.") + return token + except requests.exceptions.RequestException as e: + logger.exception("Failed to retrieve token.") + raise + + @staticmethod + def retrieve_ebay_response(httprequest: str, query: str) -> dict: + """Perform GET request to eBay API.""" + auth = EbayAPI.retrieve_access_token() + logger.info(f"Making GET request to eBay API: {httprequest} with query: {query}") + try: + response = requests.get( + httprequest, + headers={ + "Authorization": f"Bearer {auth}", + "Content-Type": "application/json" + }, + params={"q": query, "category_tree_id": 0} + ) + if response.status_code == 429: + logger.warning("Rate limit exceeded.") + raise Exception("Rate limit exceeded.") + response.raise_for_status() + logger.debug(f"Raw eBay API response: {response.text}") + return response.json() + except requests.exceptions.RequestException as e: + logger.exception("Error retrieving eBay response.") + raise diff --git a/webscraper/api/__pycache__/interface.cpython-311.pyc b/webscraper/api/__pycache__/interface.cpython-311.pyc deleted file mode 100644 index 1b9240d..0000000 Binary files a/webscraper/api/__pycache__/interface.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/__pycache__/interface.cpython-39.pyc b/webscraper/api/__pycache__/interface.cpython-39.pyc deleted file mode 100644 index 4597f1e..0000000 Binary files a/webscraper/api/__pycache__/interface.cpython-39.pyc and /dev/null differ diff --git a/webscraper/api/__pycache__/routes.cpython-311.pyc b/webscraper/api/__pycache__/routes.cpython-311.pyc deleted file mode 100644 index 5e18603..0000000 Binary files a/webscraper/api/__pycache__/routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/interface.py b/webscraper/api/interface.py index af2ef9d..72f0cf0 100644 --- a/webscraper/api/interface.py +++ b/webscraper/api/interface.py @@ -1,8 +1,15 @@ from abc import ABC, abstractmethod +from typing import Dict +from accounts.models import Product -class ScraperAPIInterface(ABC): + +class EbayABC(ABC): @abstractmethod - def get_scraped_data(self, paths: list[str]) -> dict: + def get_scraped_data(self, paths: list[str]) -> Product: """Given a list of paths, return scraped results.""" pass + + @abstractmethod + def search_item(self, query: str) -> Product: + pass diff --git a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc b/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc deleted file mode 100644 index 1ce37af..0000000 Binary files a/webscraper/api/tests/__pycache__/test_routes.cpython-311.pyc and /dev/null differ diff --git a/webscraper/api/tests/test_ebay_api.py b/webscraper/api/tests/test_ebay_api.py new file mode 100644 index 0000000..764d0c2 --- /dev/null +++ b/webscraper/api/tests/test_ebay_api.py @@ -0,0 +1,95 @@ +import os +import django + +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "cheaper.settings") # adjust if your settings module is different +django.setup() + + +import unittest +from unittest.mock import patch,Mock +import requests +from webscraper.api.EbayAPI import EbayAPI +from dotenv import load_dotenv +load_dotenv() + + +class EbayTestApi(unittest.TestCase): + + def setUp(self): + self.EbayAPI = EbayAPI + + + def test_retrieve_access_token_real(self): + token = self.EbayAPI.retrieve_access_token() + self.assertIsInstance(token, str) + self.assertGreater(len(token), 0) + + def test_search_item_real(self): + item = self.EbayAPI.search_item("macbook") + self.assertIsInstance(item.name, str) + self.assertIsInstance(item.price, float) + self.assertIsInstance(item.currency, str) + self.assertTrue(item.url.startswith("http")) + + def test_search_item_not_found(self): + with self.assertRaises(Exception) as context: + self.EbayAPI.search_item("asdkfjasldfjalskdfj") # nonsense query + + self.assertIn("Could not parse item", str(context.exception)) + + + # @patch("webscraper.api.EbayAPI.requests.post") + # def test_retrieve_access_token(self, mock_post): + # mock_response = Mock() + # mock_response.status_code = 200 + # mock_response.json.return_value = {"access_token": "mock_token"} + # mock_post.return_value = mock_response + + # token = self.EbayAPI.retrieve_access_token() + # self.assertEqual(token, "mock_token") + + @patch("webscraper.api.EbayAPI.requests.post") + def test_retrieve_access_token_invalid(self,mock_post): + mock_response = Mock() + mock_response.status_code = 404 + mock_response.json.return_value ={"error": "not found"} + mock_post.return_value = mock_response + + with self.assertRaises(Exception): + self.EbayAPI.retrieve_access_token() + + + + @patch("webscraper.api.EbayAPI.requests.get") + def test_retrieve_ebay_response_invalid(self, mock_get): + mock_get.side_effect = requests.exceptions.RequestException("Invalid request") + with self.assertRaises(Exception): + self.EbayAPI.retrieve_ebay_response("https://test", "item") + + def test_search_item_empty_query(self): + with self.assertRaises(ValueError): + self.EbayAPI.search_item("") + + # @patch("webscraper.api.EbayAPI.EbayAPI.retrieve_ebay_response") + # def test_search_item(self, mock_response): + # mock_response.return_value = { + # "itemSummaries": [ + # { + # "title": "Test Product", + # "price": { + # "value": "19.99", + # "currency": "USD" + # } + # } + # ] + # } + + # result = self.EbayAPI.search_item("test") + # self.assertEqual(result["name"], "Test Product") + # self.assertEqual(result["price"], "19.99") + # self.assertEqual(result["currency"], "USD") + + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/webscraper/database/init_db.sql b/webscraper/database/init_db.sql index 5effbec..069122a 100644 --- a/webscraper/database/init_db.sql +++ b/webscraper/database/init_db.sql @@ -1,3 +1,6 @@ +from sqlalchemy import create_engine +from webscraper.database.models import Base + -- Create users table CREATE TABLE users ( id SERIAL PRIMARY KEY, @@ -14,3 +17,7 @@ CREATE TABLE products ( user_id INTEGER NOT NULL, FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE ); + + +engine = create_engine('postgresql://postgres:your_password@localhost/cheaper_local') +Base.metadata.create_all(engine) \ No newline at end of file diff --git a/webscraper/src/main.py b/webscraper/main.py similarity index 75% rename from webscraper/src/main.py rename to webscraper/main.py index 4a27839..d6569ad 100644 --- a/webscraper/src/main.py +++ b/webscraper/main.py @@ -1,19 +1,18 @@ - import json -#import time // for testing -# i added htese imports below becasue when i ran it it wasnt finding the folders +#import time # for testing +# i added these imports below because when i ran it it wasnt finding the folders import sys import os sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) -from src.Cheaper_Scraper import CheaperScraper - +from webscraper.src.Cheaper_Scraper import CheaperScraper def main(): + # Set up the scraper for a simple legal-to-scrape website scraper = CheaperScraper("https://books.toscrape.com", - user_agent="CheaperBot/0.1", - delay=2.0) - + user_agent="CheaperBot/0.1", + delay=2.0) + # Define which pages you want to scrape (you can use "/" for homepage) pages = ["/"] @@ -32,5 +31,3 @@ def main(): if __name__ == "__main__": main() - - diff --git a/webscraper/src/Cheaper_Scraper.py b/webscraper/src/Cheaper_Scraper.py index 5aa7bc9..ce83e2f 100644 --- a/webscraper/src/Cheaper_Scraper.py +++ b/webscraper/src/Cheaper_Scraper.py @@ -9,7 +9,7 @@ from webscraper.api.interface import ScraperAPIInterface from webscraper.src.fetch_utils import cached_get from functools import lru_cache - +from webscraper.api.EbayAPI import EbayItem @@ -107,3 +107,4 @@ def scrape(self, paths: List[str]) -> Dict[str, List[str]]: def get_scraped_data(self, paths: List[str]) -> Dict[str, List[str]]: return self.scrape(paths) + diff --git a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc b/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc deleted file mode 100644 index f60b091..0000000 Binary files a/webscraper/src/__pycache__/CheaperScraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc deleted file mode 100644 index cc3aa5e..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc b/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc deleted file mode 100644 index 036324a..0000000 Binary files a/webscraper/src/__pycache__/Cheaper_Scraper.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/__init__.cpython-311.pyc b/webscraper/src/__pycache__/__init__.cpython-311.pyc deleted file mode 100644 index c420941..0000000 Binary files a/webscraper/src/__pycache__/__init__.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/__init__.cpython-39.pyc b/webscraper/src/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index d43fde4..0000000 Binary files a/webscraper/src/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc b/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc deleted file mode 100644 index 4a1d1ac..0000000 Binary files a/webscraper/src/__pycache__/fetch_utils.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc b/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc deleted file mode 100644 index 8e69c48..0000000 Binary files a/webscraper/src/__pycache__/fetch_utils.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/main.cpython-39.pyc b/webscraper/src/__pycache__/main.cpython-39.pyc deleted file mode 100644 index bd33919..0000000 Binary files a/webscraper/src/__pycache__/main.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-311.pyc b/webscraper/src/__pycache__/robot_check.cpython-311.pyc deleted file mode 100644 index e7f181b..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/robot_check.cpython-39.pyc b/webscraper/src/__pycache__/robot_check.cpython-39.pyc deleted file mode 100644 index d1557b9..0000000 Binary files a/webscraper/src/__pycache__/robot_check.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc b/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc deleted file mode 100644 index ef900b2..0000000 Binary files a/webscraper/src/__pycache__/test_cheaper_scraper.cpython-311.pyc and /dev/null differ diff --git a/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc b/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index ba417b4..0000000 Binary files a/webscraper/src/tests/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc b/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc deleted file mode 100644 index c22e0aa..0000000 Binary files a/webscraper/src/tests/__pycache__/test_fetch_and_cache.cpython-39.pyc and /dev/null differ diff --git a/wsgi_entry.py b/wsgi_entry.py new file mode 100644 index 0000000..e10a643 --- /dev/null +++ b/wsgi_entry.py @@ -0,0 +1,9 @@ +from webscraper.main import main + +def application(environ, start_response): + + main() + status = '200 OK' + headers = [('Content-type', 'text/plain')] + start_response(status, headers) + return [b"Scraping complete.\n"]