diff --git a/city_scrapers/spiders/cle_cpc.py b/city_scrapers/spiders/cle_cpc.py new file mode 100644 index 0000000..f8071e9 --- /dev/null +++ b/city_scrapers/spiders/cle_cpc.py @@ -0,0 +1,138 @@ +from datetime import datetime, time + +from city_scrapers_core.constants import COMMISSION, PASSED, TENTATIVE +from city_scrapers_core.items import Meeting +from city_scrapers_core.spiders import CityScrapersSpider + + +class CleCpcSpider(CityScrapersSpider): + name = "cle_cpc" + agency = "Cleveland Community Police Commission" + timezone = "America/Detroit" + start_urls = ["https://clecpc.org/get-involved/calendar/"] + links = [ + { + "title": "Meeting agendas and minutes", + "href": "https://clecpc.org/resources/meeting-agendas-minutes/", + } + ] + + def parse(self, response): + """ + Parses meeting calendar page. Because this website has a variety of + quirks that can lead to duplicate data we only scrape the first + unique link for each meeting. + """ + calendars = response.css(".mec-wrap.mec-skin-grid-container") + if len(calendars) <= 1: + raise ("Meetings calendar not found") + + unique_links = [] + for event in calendars[0].css("article.mec-event-article"): + link = event.css("h4.mec-event-title a::attr(href)").get() + if link not in unique_links: + unique_links.append(link) + + for link in unique_links: + yield response.follow(link, callback=self._parse_detail) + + def _parse_detail(self, response): + """ + Parse details from the event detail page. + """ + date = self._parse_date(response) + start_time, end_time = self._parse_start_end_time(response) + meeting = Meeting( + title=self._parse_title(response), + description=self._parse_description(response), + classification=COMMISSION, + start=self._gen_datetime(date, start_time), + end=self._gen_datetime(date, end_time), + time_notes="", + all_day=False, + location=self._parse_location(response), + links=self.links, + source=self._parse_source(response), + ) + meeting["status"] = self._get_status(meeting) + meeting["id"] = self._get_id(meeting) + yield meeting + + def _parse_title(self, response): + """Parse meeting title.""" + title = response.css(".mec-single-title::text").get() + if not title: + return None + return title.strip() + + def _parse_description(self, response): + """ + Extracts and returns all text within the mec-single-event-description element. + """ + description_parts = response.css( + ".mec-single-event-description *::text" + ).getall() + full_description = " ".join( + part.strip() for part in description_parts if part.strip() + ) + return full_description + + def _parse_date(self, response): + """Parse date from calendar element.""" + # Extracting the date string + date_str = response.css( + ".mec-single-event-date .mec-start-date-label::text" + ).get() + # Parsing the date string into a datetime object + date = datetime.strptime(date_str, "%b %d %Y") if date_str else None + return date + + def _parse_start_end_time(self, response): + """ + Parses start and end times from calendar element. + If times are missing, returns None for both. + """ + time_str = response.css(".mec-single-event-time .mec-events-abbr::text").get() + if time_str: + time_parts = time_str.split(" - ") + if len(time_parts) == 2: + # Splitting the start and end time and converting to datetime objects + start_time_str, end_time_str = time_parts + start_time = datetime.strptime(start_time_str, "%I:%M %p").time() + end_time = datetime.strptime(end_time_str, "%I:%M %p").time() + return start_time, end_time + else: + # Log a warning in case where the time format is unexpected + self.logger.warning(f"Unexpected time format: {time_str}") + return None, None + + def _gen_datetime(self, date, time_obj): + """ + Generate a datetime object from a date and a time object. + If time_obj is None, set the time to midnight. + """ + if time_obj is None: + time_obj = time(0, 0) # Midnight + return datetime.combine(date, time_obj) + + def _parse_location(self, response): + """Parses location from calendar element.""" + org_name = response.css(".org::text").get().strip() + address = response.css(".mec-address::text").get().strip() + return { + "name": org_name, + "address": address, + } + + def _parse_source(self, response): + return response.url + + def _get_status(self, item): + """ + Overrides the parent class method because basing a meeting's + cancellation status on the title and time of the meeting is not + reliable. Instead, we'll only focus on the meeting's start time. + """ + if item["start"] < datetime.now(): + return PASSED + return TENTATIVE diff --git a/tests/files/cle_cpc.html b/tests/files/cle_cpc.html new file mode 100644 index 0000000..b477cd3 --- /dev/null +++ b/tests/files/cle_cpc.html @@ -0,0 +1,2184 @@ + + + + + + + + + + + + + Calendar - Cleveland Community Police Commission + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+
+
+
+ +
+ +
+
+
+
+
+
+
+
+

CPC & Community Events Calendar

+
+
+
+

Your involvement is needed to shape how policing looks in Cleveland. Participate in CPC meetings, join a CPC work group, take part in events hosted by local community organizations and attend police district meetings to share your thoughts about policing and related issues in your community.

+
+
+
+
+
+
+
+
+
+
+

CPC Meetings and Events

+
+
+
+
+ + +
+
+
+
+
+ +
+
+ No event found!
+ +
Load More
+ +
+
+
+
+
+
+
+
+
+
+
+

Community Events

+
+
+
+
+ + +
+
+
+
+
+ +
+
+ No event found!
+ +
Load More
+ +
+
+
+
+
+
+
+
+
+
+
+

Full Event Calendar

+
+
+
+

+ +
+
+ + +
+
+
+
+
+
+
+
+ Monthly Weekly Daily List
+
+
+
+ +
+
+ + +
+
+
+

January 2024

+
+ +
+
SU
MO
TU
WE
TH
FR
SA
+
31
1
2
3
+
+
Cancelled: Rules Committee Meeting January 3rd
6:00 pm – 8:00 pm
+ + + + +The Rules Committee Meeting for January has been cancelled/ Postponed. A new date will be posted when the committee reschedules. + + + +Rules CommitteeMeeting Dates: Every 1st Wednesday of the MonthTime: 6:00pm – 8:00pmLocation: CPC Office, 3631 Perkins Avenue, 4th Floor, Cleveland, OH 44114Chair: Teri Wang Mem , …
+
+
4
+
+
1st District – District Policing Committee Meeting
6:00 pm – 7:00 pm
+
+
5
6
7
8
+
+
Budget & Grants Committee Meeting
6:30 pm – 8:30 pm
+Budget & Grants CommitteeDate: Monday, January 8, 2024Time: 6:30pm – 8:30pmLocation: CPC Office, 3631 Perkins Avenue, 4th Floor, Cleveland, OH 44114Public Livestream: http://www.youtube.com/@ClevelandCPCJan. 8th Meeting Agenda (pdf)Chair: Dr. John Adams Members: Alana Garrett-Ferguson, Cait Kennedy, Piet van Lier, , …
+
+
9
+
+
Civilian Police Review Board (CPRB) Meeting
9:00 am
+Meeting Dates: Second Tuesday of the month Time: 9:00am until all cases are heardLocation: https://cityclevelandoh.webex.com/cityclevelandoh/j.php?MTID=m1d33399dc7219eb4e936d8c4d2e952c5Meetings are streamed live on YouTube: http://www.youtube.com/@clevelandopsThe CPRB currently meets on the second Tuesday of every month at 9 , …
+
+
+
+
2nd District – District Policing Committee Meeting
6:00 pm – 8:00 pm
+
+
10
+
+
Police Training Committee –
6:00 pm – 8:00 pm
+Meeting Name: CPC Police Training CommitteeDate(s): Every 2nd Wednesday of the MonthTime: 6:00pm – 8:00pmLocation: CPC Office, 3631 Perkins Avenue, 4th Floor, Cleveland, OH 44114Livestream: http://www.youtube.com/@ClevelandCPCChair: John AdamsMembers: James Chura, Charles Donaldson, Gregory Reaves + + + + + , …
+
+
11
+
+
Behavioral Health & Crisis Intervention Work Group- January 11th Cancelled
+CPC Behavioral Health & Crisis Intervention Work Group. Chair: Commissioner Shandra Benito + + + + +January 11th Meeting has been cancelled. Regular meeting schedule: 2nd Thursday of the Month6:00pm – 8:00pm In Person or at the CPC Office 3631 Perkins Avenue, 4th Floor or Virtually on Microsoft Teams. + + + + +Work g , …
+
+
12
13
14
15
16
+
+
3rd District – District Policing Committee Meeting
6:00 pm – 7:00 pm
+
+
+
+
Police Accountability Committee
6:00 pm – 8:00 pm
+Police Accountability CommitteeDate: Tuesday, January 16, 2024Time: 6:00pm – 8:00pmLocation: CPC Office, 3631 Perkins Avenue, 4th Floor, Cleveland, OH 44114Livestream: http://www.youtube.com/@ClevelandCPCChair: Teri WangMembers: John Adams, Shandra Benito, James Chura, Sharena Zayed + + + + + , …
+
+
17
+
+
5th District Policing Committee Meeting
6:00 pm – 7:00 pm
+Meeting Dates: Third Wednesday of the MonthTime: 6:00pm – 7:00pmLocation: Five Pointe Community Center, 813 E. 152nd Street, Cleveland, OH 44110 + + + +Join the Fifth District Cleveland Police Commander, Supervisors and Officers at the 5th District’s monthly community meetings. This is an opportunity , …
+
+
+
+
Police Policy Committee
6:00 pm – 8:00 pm
+Police Policy CommitteeDate(s): Every 3rd Wednesday of the MonthTime: 6:00pm – 8:00pmLocation: CPC Office, 3631 Perkins Avenue, 4th Floor, Cleveland, OH 44114Livestream: http://www.youtube.com/@ClevelandCPC* The November meeting has been moved to Thursday, Nov. 9th * Meeting AgendaChair: Piet van LierMembers: Shandra B , …
+
+
18
19
20
21
22
23
24
+
+
Full Commission Meeting
6:00 pm
+Full Commission Meeting January 24th. + + + +6 p.m. – 8:30 p.m. + + + +3631 Perkins Ave. 4th Floor + + + +Agenda to be Determined + , …
+
+
+
+
4th District Policing Committee Meeting
7:00 pm – 8:00 pm
+
+
25
+
+
Behavioral Health & Crisis Intervention Work Group- January 11th Cancelled
+CPC Behavioral Health & Crisis Intervention Work Group. Chair: Commissioner Shandra Benito + + + + +January 11th Meeting has been cancelled. Regular meeting schedule: 2nd Thursday of the Month6:00pm – 8:00pm In Person or at the CPC Office 3631 Perkins Avenue, 4th Floor or Virtually on Microsoft Teams. + + + + +Work g , …
+
+
+
+
Police Discipline Work Group
6:30 pm – 8:30 pm
Work Group to review the police disciplinary policy, disciplinary matrix and police manual of rules  + , …
+
+
26
27
28
29
30
31
1
2
3
+
+ +
+
+ +
+ +

+
+
+
+
+
+
+
+ +
+
+
+
+ +
+
+
+
+ +
+
+ + + +
+
+
+ + +
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/files/cle_cpc_detail.html b/tests/files/cle_cpc_detail.html new file mode 100644 index 0000000..442f798 --- /dev/null +++ b/tests/files/cle_cpc_detail.html @@ -0,0 +1,1075 @@ + + + + + + + + + + + + + Police Discipline Work Group - Cleveland Community Police Commission + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+
+ + + + +
+
+
+
+

Police Discipline Work Group

+

Work Group to review the police disciplinary policy, disciplinary matrix and police manual of rules 

+
+
+ + + + +
+ + + + + + + + + + + +
+
+ +
+ +
+ +
+
+ +

Date

+
+
Feb 01 2024
+
+
+ +
+ +

Time

+ +
+
6:30 pm - 8:30 pm
+
+
+ + + + + + + + +
+ +

Location

+
+
CPC Offices
+
3631 Perkins Ave., Cleveland 4th Fl
+ +
+
+ +
+ +
Category
+
+
+
+
+

Organizer

+
+
+ +
Cleveland Community Police Commission
+
+
+ +
Website
+ http://clecpc.org +
+
+
+ + + +
+ + + + + + + + +
+ +
+ +
+
+ + + +
+
+ + + + + + + +
+
+ + +
+ + +
+ + + + + +
+

Discussion: Leave a Comment or Question

+ + + + +
+
+ + + + + +
+ + +
+
+ + + +
+
+
+ + +
+
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/test_cle_cpc.py b/tests/test_cle_cpc.py new file mode 100644 index 0000000..c42329f --- /dev/null +++ b/tests/test_cle_cpc.py @@ -0,0 +1,95 @@ +from datetime import datetime +from os.path import dirname, join + +import pytest # noqa +from city_scrapers_core.constants import COMMISSION, TENTATIVE +from city_scrapers_core.utils import file_response +from freezegun import freeze_time + +from city_scrapers.spiders.cle_cpc import CleCpcSpider + +test_response = file_response( + join(dirname(__file__), "files", "cle_cpc.html"), + url="https://clecpc.org/get-involved/calendar/", +) +test_detail_response = file_response( + join(dirname(__file__), "files", "cle_cpc_detail.html"), + url="http://bc.cuyahogacounty.us/en-US/AuditCommitteeMtg-090519.aspx", +) +spider = CleCpcSpider() + +freezer = freeze_time("2024-01-29") +freezer.start() + +parsed_items = [item for item in spider.parse(test_response)] +parsed_item = next(spider._parse_detail(test_detail_response), None) + +freezer.stop() + + +def test_count(): + print(len(parsed_items)) + print(parsed_items) + assert len(parsed_items) == 4 + + +def test_title(): + assert parsed_item["title"] == "Police Discipline Work Group" + + +def test_description(): + assert ( + parsed_item["description"][0:51] + == "Work Group to review the police disciplinary policy" + ) + + +def test_start(): + assert parsed_item["start"] == datetime(2024, 2, 1, 18, 30) + + +def test_end(): + assert parsed_item["end"] == datetime(2024, 2, 1, 20, 30) + + +def test_time_notes(): + assert parsed_item["time_notes"] == "" + + +def test_id(): + assert parsed_item["id"] == "cle_cpc/202402011830/x/police_discipline_work_group" + + +def test_status(): + assert parsed_item["status"] == TENTATIVE + + +def test_location(): + assert parsed_item["location"] == { + "name": "CPC Offices", + "address": "3631 Perkins Ave., Cleveland 4th Fl", + } + + +def test_source(): + assert ( + parsed_item["source"] + == "http://bc.cuyahogacounty.us/en-US/AuditCommitteeMtg-090519.aspx" + ) + + +def test_links(): + assert parsed_item["links"] == [ + { + "title": "Meeting agendas and minutes", + "href": "https://clecpc.org/resources/meeting-agendas-minutes/", + } + ] + + +def test_classification(): + assert parsed_item["classification"] == COMMISSION + + +def test_all_day(): + assert parsed_item["all_day"] is False