Skip to content

Commit 42053a0

Browse files
committed
Refactor NYCInfoHubScraper to accept base_dir parameter; update main.py to pass script directory for improved data directory management
1 parent cebcfa5 commit 42053a0

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name="nyc_infohub_excel_api_access",
5-
version="1.0.12",
5+
version="1.0.13",
66
author="Dylan Picart",
77
author_email="dylanpicart@mail.adelphi.edu",
88
description="A Python scraper for downloading Excel datasets from NYC InfoHub.",

src/excel_scraper/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@ async def main():
1414
Main entry point for running the NYCInfoHubScraper.
1515
Delegates the entire scraping workflow to the scraper's scrape_data().
1616
"""
17-
scraper = NYCInfoHubScraper()
17+
script_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
18+
scraper = NYCInfoHubScraper(base_dir=script_dir)
1819
try:
1920
# The new refactored pipeline is entirely within scrape_data()
2021
await scraper.scrape_data()

src/excel_scraper/scraper.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -377,10 +377,10 @@ class NYCInfoHubScraper(BaseScraper):
377377
}
378378

379379
def __init__(self, base_dir=None, data_dir=None, hash_dir=None, log_dir=None, skip_win_scan=True):
380-
super().__init__(security_manager=SecurityManager("/var/run/clamav/clamd.ctl", skip_windows_scan=skip_win_scan), base_dir=base_dir)
381-
382380
script_dir = os.path.abspath(os.path.dirname(__file__)) if "__file__" in globals() else os.getcwd()
383381
self._base_dir = base_dir or os.path.join(script_dir, "..")
382+
super().__init__(security_manager=SecurityManager("/var/run/clamav/clamd.ctl", skip_windows_scan=skip_win_scan), base_dir=self._base_dir)
383+
384384
self._data_dir = data_dir or os.path.join(self._base_dir, "data")
385385
self._hash_dir = hash_dir or os.path.join(self._base_dir, "hashes")
386386
self._log_dir = log_dir or os.path.join(self._base_dir, "logs")

0 commit comments

Comments
 (0)