diff --git a/.gitignore b/.gitignore index 24e2fc2..2eea525 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1 @@ -__pycache__/ -venv/ -*.pyc -.DS_Store -*.db \ No newline at end of file +.env \ No newline at end of file diff --git a/args.py b/args.py deleted file mode 100644 index 2427984..0000000 --- a/args.py +++ /dev/null @@ -1,32 +0,0 @@ -import argparse - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--host", - default="localhost", - help="host for server to listen on, defaults to localhost" - ) - parser.add_argument( - "--port", - type=int, - default=8000, - help="port for server to be hosted on, defaults to 8000" - ) - - parser.add_argument( - "--verbose", - "-v", - action="count", - default=0, - help="increase logging verbosity; can be used multiple times in a chain such as '-vvv'", - ) - - parser.add_argument( - "--dbfile", - default="parking.db", - help="sqlite3 database file to store parking data" - ) - - - return parser.parse_args() \ No newline at end of file diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..f2f3b85 --- /dev/null +++ b/scraper.py @@ -0,0 +1,127 @@ +import requests +from bs4 import BeautifulSoup +from requests.packages.urllib3.exceptions import InsecureRequestWarning +import psycopg2 +import os +from dotenv import load_dotenv +from datetime import datetime + +# Load environment variables from .env file +load_dotenv() + +TIMESTAMP_FILE = 'last_timestamp.txt' + +def get_last_stored_timestamp(): + try: + with open(TIMESTAMP_FILE, 'r') as file: + return file.read().strip() + except FileNotFoundError: + return None + +def update_stored_timestamp(new_timestamp): + with open(TIMESTAMP_FILE, 'w') as file: + file.write(new_timestamp) + +def scrape_parking_data_and_insert_into_db(): + # Suppress the warning + requests.packages.urllib3.disable_warnings(InsecureRequestWarning) + + # Define the URL to scrape + url = 'https://sjsuparkingstatus.sjsu.edu' + + # Send a GET request to the URL with SSL verification turned off + response = requests.get(url, verify=False) + + # Check if the request was successful + if response.status_code == 200: + # Decode the response content and create a BeautifulSoup object + data = response.content.decode('utf-8') + soup = BeautifulSoup(data, 'html.parser') + + timestamp_tag = soup.find('p', class_='timestamp') + if not timestamp_tag: + print("Timestamp not found on the page.") + return + + current_timestamp = timestamp_tag.text.strip().split("Last updated ")[-1].split(" Refresh")[0] + last_timestamp = get_last_stored_timestamp() + if last_timestamp == current_timestamp: + print("Timestamp has not changed; skipping database insert.") + return + + update_stored_timestamp(current_timestamp) + + garage_div = soup.find('div', class_='garage') + garage_names = garage_div.find_all('h2', class_='garage__name') + garage_fullness = garage_div.find_all('span', class_='garage__fullness') + + # Process the scraped data + garage_data = [] + for name, fullness in zip(garage_names, garage_fullness): + garage_data.append((name.text.strip(), fullness.text.strip())) + + print(garage_data) + try: + print("Inserting to the PostgreSQL database...") + conn = psycopg2.connect( + dbname=os.getenv('dbname'), + user=os.getenv('user'), + password=os.getenv('password'), + host=os.getenv('host'), + port=os.getenv('port') + ) + cursor = conn.cursor() + + # Insert data into the table + insert_query = ''' + INSERT INTO garage_info (garage_name, fullness_percentage, timestamp) + VALUES (%s, %s, CURRENT_TIMESTAMP AT TIME ZONE 'America/Los_Angeles') + ''' + cursor.executemany(insert_query, garage_data) + conn.commit() + + print(f"Inserted {len(garage_data)} records into the garage_info table.") + + except Exception as e: + print(f"An error occurred: {e}") + + finally: + # Close the cursor and connection + cursor.close() + conn.close() + else: + print(f"Failed to retrieve the page. Status code: {response.status_code}") + +def display_parking_data_from_db(): + try: + print("Selecting from to the PostgreSQL database...") + conn = psycopg2.connect( + dbname=os.getenv('dbname'), + user=os.getenv('user'), + password=os.getenv('password'), + host=os.getenv('host'), + port=os.getenv('port') + ) + cursor = conn.cursor() + + # Insert data into the table + select_query = ''' + SELECT * FROM garage_info + ''' + cursor.execute(select_query) + results = cursor.fetchall() + for row in results: + print(row) + except Exception as e: + print(f"An error occurred: {e}") + + finally: + # Close the cursor and connection + cursor.close() + conn.close() + +if __name__ == '__main__': + scrape_parking_data_and_insert_into_db() + # display_parking_data_from_db() + + diff --git a/server.py b/server.py deleted file mode 100644 index 3c7910f..0000000 --- a/server.py +++ /dev/null @@ -1,89 +0,0 @@ -import asyncio -import urllib3 -from bs4 import BeautifulSoup -from datetime import datetime -import uvicorn -from fastapi import FastAPI -import sqlhelper -import random -import logging -import time -from args import get_args -import pytz -import threading - -app = FastAPI() -args = get_args() - -urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) -http = urllib3.PoolManager(cert_reqs='CERT_NONE', assert_hostname=False) - -DB_FILE = args.dbfile -garage_addresses = [] -GARAGE_NAMES = ["North_Garage", "South_Garage", "West_Garage", "South_Campus_Garage"] -sqlhelper.maybe_create_table(DB_FILE, GARAGE_NAMES) - -def get_time(): - return datetime.now().strftime("%Y-%m-%d %H:%M:%S") - -#fastapi endpoints -@app.get("/parking") -async def get_garage_data(): - response = http.request('GET', 'https://sjsuparkingstatus.sjsu.edu') - data = response.data.decode('utf-8') - soup = BeautifulSoup(data, 'html.parser') - garage_div = soup.find('div', class_='garage') # ensures we are only looking in the scope of the garage div - garage_names = garage_div.find_all('h2', class_='garage__name') - garage_fullness = garage_div.find_all('span', class_='garage__fullness') - garage_addresses = garage_div.find_all('a') - href_links = [link.get('href') for link in garage_addresses] - - garage_data = {} # Reset garage data for each request - for name, fullness, address in zip(garage_names, garage_fullness, href_links): - garage_data[name.text.strip().replace(" ", "_")] = [fullness.text.strip(), address] - garage_addresses = [info[1] for info in garage_data.values()] - - - timestamp = get_time() - for garage in GARAGE_NAMES: - sqlhelper.insert_garage_data(DB_FILE, garage, garage_data[garage][0], timestamp) - sqlhelper.delete_garage_data(DB_FILE, garage) - - return garage_data - -logging.Formatter.converter = time.gmtime -logging.basicConfig( - format="%(asctime)s.%(msecs)03dZ %(levelname)s:%(name)s:%(message)s", - datefmt="%Y-%m-%dT%H:%M:%S", - level= logging.ERROR - (args.verbose*10), -) - -@app.get("/") -async def root(): - return "Welcome to SJSU Parking!" - -def helper_thread(): - print("Helper thread started.") - while True: - current_time = datetime.now(pytz.timezone('US/Pacific')) - print(f"Current time: {current_time}") - if current_time.hour >= 8 and current_time.hour < 14: - try: - # Between 8am-2pm, call endpoint - asyncio.run(get_garage_data()) - except Exception as e: - print(f"An error occurred: {e}") - else: - print("Stopping data retrieval as it's past 2:00 PM PST.") - break - - # Calling endpoint every minute - time.sleep(60) - -if __name__ == 'server': - helper = threading.Thread(target=helper_thread, daemon=True) - helper.start() - -if __name__ == "__main__": - args = get_args() - uvicorn.run("server:app", host=args.host, port=args.port, reload=True, ) diff --git a/sqlhelper.py b/sqlhelper.py deleted file mode 100644 index 461af0e..0000000 --- a/sqlhelper.py +++ /dev/null @@ -1,63 +0,0 @@ -import sqlite3 -from datetime import datetime, timedelta - -#database setup function -def maybe_create_table(dbfile: str, garage_names): - conn = sqlite3.connect(dbfile) - c = conn.cursor() - - for garage_name in garage_names: - c.execute(f'''CREATE TABLE IF NOT EXISTS {garage_name} ( - id INTEGER PRIMARY KEY, - garage_fullness TEXT, - time TEXT - )''') - conn.commit() - print("Database setup complete") - return conn - -#insert data function -def insert_garage_data(dbfile: str, garage, fullness, timestamp): - conn = sqlite3.connect(dbfile) - c = conn.cursor() - # delete_garage_data() - print(f"{garage}") - try: - query = f"INSERT INTO {garage} (garage_fullness, time) VALUES (?, ?)" - c.execute(query, [fullness, timestamp]) - conn.commit() - except Exception as e: - print(e) - return False - - conn.commit() - print(f"Data inserted into {garage} at {timestamp}") - c.execute(f"SELECT * FROM {garage}") - print(c.fetchall()) - -def get_garage_data(dbfile: str, garage, time=None): - conn = sqlite3.connect(dbfile) - c = conn.cursor() - try: - query = f"SELECT * FROM {garage}" - c.execute(query) - return c.fetchall() - except Exception as e: - print(e) - -#delete data after two weeks -def delete_garage_data(dbfile: str, garage): - conn = sqlite3.connect(dbfile) - c = conn.cursor() - time_threshold = (datetime.now() - timedelta(weeks=2)).strftime('%Y-%m-%d %H:%M:%S') - print("TIME:",time_threshold) - try: - query = f"DELETE FROM {garage} WHERE time < ?" - c.execute(query, (time_threshold,)) - conn.commit() - print("Old data deleted") - except Exception as e: - print(e) - - -