Skip to content

Commit 91f3c76

Browse files
committed
Fix package structure for correct PyPI entry point
1 parent c124239 commit 91f3c76

File tree

8 files changed

+604
-603
lines changed

8 files changed

+604
-603
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="nyc_infohub_excel_api_access",
5-
version="1.0.8",
5+
version="1.0.9",
66
author="Dylan Picart",
77
author_email="dylanpicart@mail.adelphi.edu",
88
description="A Python scraper for downloading Excel datasets from NYC InfoHub.",

src/__init__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/excel_scraper/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Initialize excel_api_access package
2+
from .scraper import run_scraper
Lines changed: 61 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,61 +1,61 @@
1-
import os
2-
import sys
3-
import logging
4-
import asyncio
5-
from logging.handlers import RotatingFileHandler
6-
from excel_scraper import NYCInfoHubScraper
7-
8-
# Ensure stdout is line-buffered so logs appear in real time
9-
sys.stdout.reconfigure(line_buffering=True)
10-
11-
# -------------------- SCRAPER EXECUTION --------------------
12-
async def main():
13-
"""
14-
Main entry point for running the NYCInfoHubScraper.
15-
Delegates the entire scraping workflow to the scraper's scrape_data().
16-
"""
17-
scraper = NYCInfoHubScraper()
18-
try:
19-
# The new refactored pipeline is entirely within scrape_data()
20-
await scraper.scrape_data()
21-
except Exception as e:
22-
logging.error(f"Some error occurred: {e}", exc_info=True)
23-
return 1 # Non-zero exit code indicates an error
24-
finally:
25-
# Clean up Selenium & httpx
26-
await scraper.close()
27-
28-
return 0 # Signals success to the caller
29-
30-
# Run scraper process if script is executed directly
31-
if __name__ == "__main__":
32-
base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
33-
logs_dir = os.path.join(base_dir, "logs")
34-
os.makedirs(logs_dir, exist_ok=True)
35-
36-
# Create rotating log handler
37-
log_file_path = os.path.join(logs_dir, "excel_fetch.log")
38-
rotating_handler = RotatingFileHandler(
39-
log_file_path,
40-
maxBytes=5_242_880, # ~5 MB
41-
backupCount=2,
42-
encoding="utf-8"
43-
)
44-
rotating_handler.setFormatter(logging.Formatter(
45-
"%(asctime)s - %(levelname)s - %(message)s"
46-
))
47-
48-
# Configure logging with both file & console handlers
49-
logging.basicConfig(
50-
level=logging.INFO,
51-
format="%(asctime)s - %(levelname)s - %(message)s",
52-
handlers=[rotating_handler, logging.StreamHandler()],
53-
force=True
54-
)
55-
56-
try:
57-
exit_code = asyncio.run(main())
58-
sys.exit(exit_code)
59-
except Exception as e:
60-
logging.error(f"Script failed: {e}", exc_info=True)
61-
sys.exit(1)
1+
import os
2+
import sys
3+
import logging
4+
import asyncio
5+
from logging.handlers import RotatingFileHandler
6+
from src.excel_scraper.scraper import NYCInfoHubScraper
7+
8+
# Ensure stdout is line-buffered so logs appear in real time
9+
sys.stdout.reconfigure(line_buffering=True)
10+
11+
# -------------------- SCRAPER EXECUTION --------------------
12+
async def main():
13+
"""
14+
Main entry point for running the NYCInfoHubScraper.
15+
Delegates the entire scraping workflow to the scraper's scrape_data().
16+
"""
17+
scraper = NYCInfoHubScraper()
18+
try:
19+
# The new refactored pipeline is entirely within scrape_data()
20+
await scraper.scrape_data()
21+
except Exception as e:
22+
logging.error(f"Some error occurred: {e}", exc_info=True)
23+
return 1 # Non-zero exit code indicates an error
24+
finally:
25+
# Clean up Selenium & httpx
26+
await scraper.close()
27+
28+
return 0 # Signals success to the caller
29+
30+
# Run scraper process if script is executed directly
31+
if __name__ == "__main__":
32+
base_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
33+
logs_dir = os.path.join(base_dir, "logs")
34+
os.makedirs(logs_dir, exist_ok=True)
35+
36+
# Create rotating log handler
37+
log_file_path = os.path.join(logs_dir, "excel_fetch.log")
38+
rotating_handler = RotatingFileHandler(
39+
log_file_path,
40+
maxBytes=5_242_880, # ~5 MB
41+
backupCount=2,
42+
encoding="utf-8"
43+
)
44+
rotating_handler.setFormatter(logging.Formatter(
45+
"%(asctime)s - %(levelname)s - %(message)s"
46+
))
47+
48+
# Configure logging with both file & console handlers
49+
logging.basicConfig(
50+
level=logging.INFO,
51+
format="%(asctime)s - %(levelname)s - %(message)s",
52+
handlers=[rotating_handler, logging.StreamHandler()],
53+
force=True
54+
)
55+
56+
try:
57+
exit_code = asyncio.run(main())
58+
sys.exit(exit_code)
59+
except Exception as e:
60+
logging.error(f"Script failed: {e}", exc_info=True)
61+
sys.exit(1)

0 commit comments

Comments
 (0)