From d61e30293568edabe02b7cb14417b08f00cef074 Mon Sep 17 00:00:00 2001 From: h0und <79583632+msrezaie@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:30:59 -0500 Subject: [PATCH 1/5] spider for Atlantic City --- city_scrapers/spiders/atconj_Atlantic_City.py | 127 +++ tests/files/atconj_Atlantic_City.json | 842 ++++++++++++++++++ .../atconj_Atlantic_City_meeting_detail.json | 15 + tests/test_atconj_Atlantic_City.py | 98 ++ 4 files changed, 1082 insertions(+) create mode 100644 city_scrapers/spiders/atconj_Atlantic_City.py create mode 100644 tests/files/atconj_Atlantic_City.json create mode 100644 tests/files/atconj_Atlantic_City_meeting_detail.json create mode 100644 tests/test_atconj_Atlantic_City.py diff --git a/city_scrapers/spiders/atconj_Atlantic_City.py b/city_scrapers/spiders/atconj_Atlantic_City.py new file mode 100644 index 0000000..f1dcc58 --- /dev/null +++ b/city_scrapers/spiders/atconj_Atlantic_City.py @@ -0,0 +1,127 @@ +import json +from datetime import datetime +from urllib.parse import urljoin + +import scrapy +from city_scrapers_core.constants import ( + CANCELLED, + CITY_COUNCIL, + CLASSIFICATIONS, + NOT_CLASSIFIED, + PASSED, + TENTATIVE, +) +from city_scrapers_core.items import Meeting +from city_scrapers_core.spiders import CityScrapersSpider +from dateutil.parser import parse + + +class AlanticCitySpider(CityScrapersSpider): + name = "atconj_Atlantic_City" + agency = "Atlantic City" + timezone = "America/New_York" + + custom_settings = { + "ROBOTSTXT_OBEY": False, + } + + """ + The website layout of this agency uses JavaScript to dynamically + load meetings for one month at a time, making it challenging to + scrape data directly from the HTML/CSS structure of the site. + + So instead API endpoints from the agency's URL are used to fetch + the meetings data: + - `meetings_url`: provides a list of all meetings for a given time + period. + - `meeting_detail_url`: retrieves detailed information for each + meeting using its ID. + + Additionally, a third url `calender_source` is used as the source + field of the meeting since it is more user friendly to navigate + than the api endpoints. + """ + meetings_url = "https://www.acnj.gov/api/data/GetCalendarMeetings?end=06%2F30%2F2025+12:00+am&meetingTypeID=all&start=06%2F01%2F2024+12:00+am" # noqa + meeting_detail_url = "https://www.acnj.gov/api/data/GetMeeting?id=" + calender_source = "https://www.acnj.gov/calendar" + + def start_requests(self): + yield scrapy.Request(url=self.meetings_url, method="GET", callback=self.parse) + + def parse(self, response): + data = json.loads(response.text) + for item in data: + meeting_id = item["id"] + meeting_detail_url = self.meeting_detail_url + meeting_id + + yield scrapy.Request( + url=meeting_detail_url, + method="GET", + callback=self.parse_meeting, + cb_kwargs={"item": item}, + ) + + def parse_meeting(self, response, item): + meeting_detail = json.loads(response.text) + + meeting = Meeting( + title=item["title"], + description="", + classification=self._parse_classification(meeting_detail), + start=parse(item["start"]), + end=None, + all_day=item["allDay"], + time_notes="", + location=self._parse_location(meeting_detail), + links=self._parse_links(meeting_detail), + source=self.calender_source, + ) + + meeting["status"] = self._get_status(meeting_detail) + meeting["id"] = int(item["id"]) + + yield meeting + + def _parse_classification(self, item): + for classification in CLASSIFICATIONS: + if classification.lower() in item["Meeting_Type"].lower(): + return classification + elif "council" in item["Meeting_Type"].lower(): + return CITY_COUNCIL + return NOT_CLASSIFIED + + def _parse_location(self, item): + meeting_location = ( + item["Meeting_Location"] + or "1301 Bacharach Boulevard Atlantic City, NJ, 08401" + ) + + if "-" in meeting_location: + return { + "address": meeting_location.split("-")[1].strip(), + "name": meeting_location.split("-")[0].strip(), + } + else: + return { + "address": meeting_location, + "name": "City Hall of Atlantic City", + } + + def _parse_links(self, item): + base_url = "https://www.acnj.gov/" + keys = ["Meeting_AgendaPDF", "Meeting_MinutesPDF", "Meeting_NoticePDF"] + titles = ["Agenda", "Minutes", "Notice"] + + links = [ + {"title": title, "href": urljoin(base_url, item.get(key, ""))} + for title, key in zip(titles, keys) + if item.get(key) + ] + return links + + def _get_status(self, item): + if item["Meeting_IsCanceled"]: + return CANCELLED + if parse(item["Meeting_DateTime"]) < datetime.now(): + return PASSED + return TENTATIVE diff --git a/tests/files/atconj_Atlantic_City.json b/tests/files/atconj_Atlantic_City.json new file mode 100644 index 0000000..0ca3b2d --- /dev/null +++ b/tests/files/atconj_Atlantic_City.json @@ -0,0 +1,842 @@ +[ + { + "id": "429", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-06-26T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "431", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-07-24T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "433", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-09-04T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "435", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-10-02T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "438", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-11-12T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "440", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2024-12-11T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "441", + "title": "CITISTAT Meeting", + "allDay": false, + "start": "2025-01-08T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "447", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-06-12T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "448", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-07-10T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "449", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-08-14T13:00:00", + "end": null, + "url": null, + "className": "Meeting Canceled", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "450", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-09-11T13:00:00", + "end": null, + "url": null, + "className": "Meeting Canceled", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "451", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-10-09T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "452", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-11-13T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "453", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2024-12-11T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "454", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2025-01-08T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "455", + "title": "Historic Preservation Commission", + "allDay": false, + "start": "2025-02-12T13:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "461", + "title": "Council", + "allDay": false, + "start": "2024-06-12T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "462", + "title": "Council", + "allDay": false, + "start": "2024-07-17T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "463", + "title": "Council", + "allDay": false, + "start": "2024-08-14T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "464", + "title": "Council", + "allDay": false, + "start": "2024-09-18T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "465", + "title": "Council", + "allDay": false, + "start": "2024-10-23T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "466", + "title": "Council", + "allDay": false, + "start": "2024-11-13T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "467", + "title": "Council", + "allDay": false, + "start": "2024-12-18T17:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "479", + "title": "Planning Board", + "allDay": false, + "start": "2024-06-04T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "480", + "title": "Planning Board", + "allDay": false, + "start": "2024-07-02T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "481", + "title": "Planning Board", + "allDay": false, + "start": "2024-08-06T10:00:00", + "end": null, + "url": null, + "className": "Meeting Canceled", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "482", + "title": "Planning Board", + "allDay": false, + "start": "2024-09-03T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "483", + "title": "Planning Board", + "allDay": false, + "start": "2024-10-01T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "484", + "title": "Planning Board", + "allDay": false, + "start": "2024-11-12T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "485", + "title": "Planning Board", + "allDay": false, + "start": "2024-12-03T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "486", + "title": "Planning Board", + "allDay": false, + "start": "2025-01-07T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "487", + "title": "Planning Board", + "allDay": false, + "start": "2025-02-04T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "493", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-06-27T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "494", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-07-25T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "495", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-08-22T10:00:00", + "end": null, + "url": null, + "className": "Meeting Canceled", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "496", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-09-26T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "497", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-10-24T10:00:00", + "end": null, + "url": null, + "className": "Meeting Canceled", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "498", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-11-21T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "499", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2024-12-19T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "500", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2025-01-23T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "501", + "title": "Zoning Board of Adjustment", + "allDay": false, + "start": "2025-02-27T10:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + }, + { + "id": "502", + "title": "Council - Special", + "allDay": false, + "start": "2024-12-03T12:00:00", + "end": null, + "url": null, + "className": "Meeting", + "editable": null, + "startEditable": null, + "durationEditable": null, + "rendering": null, + "overlap": null, + "constraint": null, + "source": null, + "color": null, + "backgroundColor": null, + "borderColor": null, + "textColor": null + } +] \ No newline at end of file diff --git a/tests/files/atconj_Atlantic_City_meeting_detail.json b/tests/files/atconj_Atlantic_City_meeting_detail.json new file mode 100644 index 0000000..e77f64f --- /dev/null +++ b/tests/files/atconj_Atlantic_City_meeting_detail.json @@ -0,0 +1,15 @@ +{ + "Meeting_ID": 429, + "Meeting_IsCanceled": false, + "Meeting_Title": null, + "Meeting_Location": "John F. Scarpa Academic Center - 3711 Atlantic Ave., Atlantic City, NJ 08401", + "Meeting_Description": "
\r\n", + "Meeting_AgendaPDF": "/_Content/pdf/agendas/2024-06-26-CITISTAT-Presentations.pdf", + "Meeting_MinutesPDF": "/_Content/pdf/minutes/2024-06-26-CITISTAT-Responses.pdf", + "Meeting_ResolutionPDF": null, + "Meeting_NoticePDF": null, + "Meeting_ExtraPDF": null, + "Meeting_ExtraPDFLabel": "Package", + "Meeting_DateTime": "2024-06-26T17:00:00", + "Meeting_Type": "CITISTAT Meeting" + } \ No newline at end of file diff --git a/tests/test_atconj_Atlantic_City.py b/tests/test_atconj_Atlantic_City.py new file mode 100644 index 0000000..bca1d5e --- /dev/null +++ b/tests/test_atconj_Atlantic_City.py @@ -0,0 +1,98 @@ +from datetime import datetime +from os.path import dirname, join + +import scrapy +from city_scrapers_core.constants import NOT_CLASSIFIED +from city_scrapers_core.utils import file_response +from freezegun import freeze_time + +from city_scrapers.spiders.atconj_Atlantic_City import AlanticCitySpider + +test_response = file_response( + join(dirname(__file__), "files", "atconj_Atlantic_City.json"), + url="https://www.acnj.gov/api/data/GetCalendarMeetings?end=06%2F30%2F2025+12:00+am&meetingTypeID=all&start=06%2F01%2F2024+12:00+am", # noqa +) + +meeting_detail_response = file_response( + join(dirname(__file__), "files", "atconj_Atlantic_City_meeting_detail.json"), + url="https://www.acnj.gov/api/data/GetMeeting?id=429", +) + +spider = AlanticCitySpider() + +freezer = freeze_time("2024-12-06") +freezer.start() + +parsed_items = [] +for req in spider.parse(test_response): + if isinstance(req, scrapy.Request): + meeting_detail_item = spider.parse_meeting( + meeting_detail_response, req.cb_kwargs["item"] + ) + parsed_items.extend(meeting_detail_item) + +freezer.stop() + + +def test_count(): + assert len(parsed_items) == 42 + + +def test_title(): + assert parsed_items[0]["title"] == "CITISTAT Meeting" + + +def test_description(): + assert parsed_items[0]["description"] == "" + + +def test_start(): + assert parsed_items[0]["start"] == datetime(2024, 6, 26, 17, 0) + + +def test_end(): + assert parsed_items[0]["end"] is None + + +def test_time_notes(): + assert parsed_items[0]["time_notes"] == "" + + +def test_all_day(): + assert parsed_items[0]["all_day"] is False + + +def test_id(): + assert parsed_items[0]["id"] == 429 + + +def test_status(): + assert parsed_items[0]["status"] == "passed" + + +def test_location(): + assert parsed_items[0]["location"] == { + "name": "John F. Scarpa Academic Center", + "address": "3711 Atlantic Ave., Atlantic City, NJ 08401", + } + + +def test_source(): + assert parsed_items[0]["source"] == "https://www.acnj.gov/calendar" + + +def test_links(): + assert parsed_items[0]["links"] == [ + { + "href": "https://www.acnj.gov/_Content/pdf/agendas/2024-06-26-CITISTAT-Presentations.pdf", # noqa + "title": "Agenda", + }, + { + "href": "https://www.acnj.gov/_Content/pdf/minutes/2024-06-26-CITISTAT-Responses.pdf", # noqa + "title": "Minutes", + }, + ] + + +def test_classification(): + assert parsed_items[0]["classification"] == NOT_CLASSIFIED From a41e14c48223b122a49a43d232fdbd76d96b26e8 Mon Sep 17 00:00:00 2001 From: h0und <79583632+msrezaie@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:47:37 -0500 Subject: [PATCH 2/5] fixed typo --- city_scrapers/spiders/atconj_Atlantic_City.py | 2 +- tests/test_atconj_Atlantic_City.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/city_scrapers/spiders/atconj_Atlantic_City.py b/city_scrapers/spiders/atconj_Atlantic_City.py index f1dcc58..994c4a1 100644 --- a/city_scrapers/spiders/atconj_Atlantic_City.py +++ b/city_scrapers/spiders/atconj_Atlantic_City.py @@ -16,7 +16,7 @@ from dateutil.parser import parse -class AlanticCitySpider(CityScrapersSpider): +class AtlanticCitySpider(CityScrapersSpider): name = "atconj_Atlantic_City" agency = "Atlantic City" timezone = "America/New_York" diff --git a/tests/test_atconj_Atlantic_City.py b/tests/test_atconj_Atlantic_City.py index bca1d5e..93c782e 100644 --- a/tests/test_atconj_Atlantic_City.py +++ b/tests/test_atconj_Atlantic_City.py @@ -6,7 +6,7 @@ from city_scrapers_core.utils import file_response from freezegun import freeze_time -from city_scrapers.spiders.atconj_Atlantic_City import AlanticCitySpider +from city_scrapers.spiders.atconj_Atlantic_City import AtlanticCitySpider test_response = file_response( join(dirname(__file__), "files", "atconj_Atlantic_City.json"), @@ -18,7 +18,7 @@ url="https://www.acnj.gov/api/data/GetMeeting?id=429", ) -spider = AlanticCitySpider() +spider = AtlanticCitySpider() freezer = freeze_time("2024-12-06") freezer.start() From 0fa8dd3c821b9bd25845c92fb71408aac554384a Mon Sep 17 00:00:00 2001 From: h0und <79583632+msrezaie@users.noreply.github.com> Date: Fri, 6 Dec 2024 12:54:40 -0500 Subject: [PATCH 3/5] refactored a conditional logic - fixed typo --- city_scrapers/spiders/atconj_Atlantic_City.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/city_scrapers/spiders/atconj_Atlantic_City.py b/city_scrapers/spiders/atconj_Atlantic_City.py index 994c4a1..7e3819f 100644 --- a/city_scrapers/spiders/atconj_Atlantic_City.py +++ b/city_scrapers/spiders/atconj_Atlantic_City.py @@ -37,13 +37,13 @@ class AtlanticCitySpider(CityScrapersSpider): - `meeting_detail_url`: retrieves detailed information for each meeting using its ID. - Additionally, a third url `calender_source` is used as the source + Additionally, a third url `calendar_source` is used as the source field of the meeting since it is more user friendly to navigate than the api endpoints. """ meetings_url = "https://www.acnj.gov/api/data/GetCalendarMeetings?end=06%2F30%2F2025+12:00+am&meetingTypeID=all&start=06%2F01%2F2024+12:00+am" # noqa meeting_detail_url = "https://www.acnj.gov/api/data/GetMeeting?id=" - calender_source = "https://www.acnj.gov/calendar" + calendar_source = "https://www.acnj.gov/calendar" def start_requests(self): yield scrapy.Request(url=self.meetings_url, method="GET", callback=self.parse) @@ -74,7 +74,7 @@ def parse_meeting(self, response, item): time_notes="", location=self._parse_location(meeting_detail), links=self._parse_links(meeting_detail), - source=self.calender_source, + source=self.calendar_source, ) meeting["status"] = self._get_status(meeting_detail) @@ -86,8 +86,8 @@ def _parse_classification(self, item): for classification in CLASSIFICATIONS: if classification.lower() in item["Meeting_Type"].lower(): return classification - elif "council" in item["Meeting_Type"].lower(): - return CITY_COUNCIL + if "council" in item["Meeting_Type"].lower(): + return CITY_COUNCIL return NOT_CLASSIFIED def _parse_location(self, item): From 6fca950494c25d00265e9130d039318b27c3b77c Mon Sep 17 00:00:00 2001 From: msrezaie <79583632+msrezaie@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:37:53 -0500 Subject: [PATCH 4/5] made improvements - made the start and end parameters of the API url dynamic --- city_scrapers/spiders/atconj_Atlantic_City.py | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/city_scrapers/spiders/atconj_Atlantic_City.py b/city_scrapers/spiders/atconj_Atlantic_City.py index 7e3819f..58464fd 100644 --- a/city_scrapers/spiders/atconj_Atlantic_City.py +++ b/city_scrapers/spiders/atconj_Atlantic_City.py @@ -14,6 +14,8 @@ from city_scrapers_core.items import Meeting from city_scrapers_core.spiders import CityScrapersSpider from dateutil.parser import parse +from dateutil.relativedelta import relativedelta +from pytz import timezone class AtlanticCitySpider(CityScrapersSpider): @@ -41,15 +43,32 @@ class AtlanticCitySpider(CityScrapersSpider): field of the meeting since it is more user friendly to navigate than the api endpoints. """ - meetings_url = "https://www.acnj.gov/api/data/GetCalendarMeetings?end=06%2F30%2F2025+12:00+am&meetingTypeID=all&start=06%2F01%2F2024+12:00+am" # noqa + meetings_url = "https://www.acnj.gov/api/data/GetCalendarMeetings?end={endDT}&meetingTypeID=all&start={startDT}" # noqa meeting_detail_url = "https://www.acnj.gov/api/data/GetMeeting?id=" calendar_source = "https://www.acnj.gov/calendar" def start_requests(self): - yield scrapy.Request(url=self.meetings_url, method="GET", callback=self.parse) + """ + The date parameters for the API endpoint are open ended. So to + include a good chunk of past and future meetings, start and end + dates are calculated based on the current date and a few months + in the past and future. + + The end date is set to 3 months in the future and the start date + is set to 8 months in the past (arbitrarily chosen). + """ + now = datetime.now() + + start_dt = now - relativedelta(months=8) + end_dt = now + relativedelta(months=3) + + start_date = start_dt.strftime("%m%%2F%d%%2F%Y+12:00+am") + end_date = end_dt.strftime("%m%%2F%d%%2F%Y+12:00+am") + url = self.meetings_url.format(startDT=start_date, endDT=end_date) + yield scrapy.Request(url=url, method="GET", callback=self.parse) def parse(self, response): - data = json.loads(response.text) + data = response.json() for item in data: meeting_id = item["id"] meeting_detail_url = self.meeting_detail_url + meeting_id @@ -113,15 +132,16 @@ def _parse_links(self, item): titles = ["Agenda", "Minutes", "Notice"] links = [ - {"title": title, "href": urljoin(base_url, item.get(key, ""))} + {"title": title, "href": urljoin(base_url, item.get(key))} for title, key in zip(titles, keys) if item.get(key) ] return links def _get_status(self, item): + eastern = timezone(self.timezone) if item["Meeting_IsCanceled"]: return CANCELLED - if parse(item["Meeting_DateTime"]) < datetime.now(): + if parse(item["Meeting_DateTime"]).astimezone(eastern) < datetime.now(eastern): return PASSED return TENTATIVE From 68a85bc6d23dab262336fe798e97478199b774c1 Mon Sep 17 00:00:00 2001 From: msrezaie <79583632+msrezaie@users.noreply.github.com> Date: Wed, 18 Dec 2024 11:20:31 -0500 Subject: [PATCH 5/5] removed unused package --- city_scrapers/spiders/atconj_Atlantic_City.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/city_scrapers/spiders/atconj_Atlantic_City.py b/city_scrapers/spiders/atconj_Atlantic_City.py index 58464fd..a5daad3 100644 --- a/city_scrapers/spiders/atconj_Atlantic_City.py +++ b/city_scrapers/spiders/atconj_Atlantic_City.py @@ -1,4 +1,3 @@ -import json from datetime import datetime from urllib.parse import urljoin @@ -81,7 +80,7 @@ def parse(self, response): ) def parse_meeting(self, response, item): - meeting_detail = json.loads(response.text) + meeting_detail = response.json() meeting = Meeting( title=item["title"],