Skip to content

Commit

Permalink
Merge pull request #84 from City-Bureau/fix-cle-metro-sd
Browse files Browse the repository at this point in the history
🕷️ Fix spider: Cleveland Metropolitan School District
  • Loading branch information
SimmonsRitchie authored Feb 5, 2024
2 parents 757a2af + 4eb2f3f commit 0452743
Showing 1 changed file with 9 additions and 6 deletions.
15 changes: 9 additions & 6 deletions city_scrapers/spiders/cle_metro_school_district.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class CleMetroSchoolDistrictSpider(CityScrapersSpider):
agency = "Cleveland Metropolitan School District"
timezone = "America/Detroit"
start_urls = ["https://www.boarddocs.com/oh/cmsd/board.nsf/XML-ActiveMeetings"]
custom_settings = {"ROBOTSTXT_OBEY": False}

def parse(self, response):
"""
Expand Down Expand Up @@ -70,12 +71,14 @@ def _parse_start(self, item):

def _parse_location(self, item):
"""Parse or generate location."""
loc_item = (
item.xpath("./category[@order='1']/agendaitems/item/name/text()")
.extract_first()
.strip()
)
loc_str = re.sub(r"^\d{1,2}\.\d{1,2} ?", "", loc_item)
loc_item = item.xpath("./category[@order='1']/agendaitems/item/name/text()")
if not loc_item:
return {
"address": "",
"name": "TBD",
}
loc_raw_str = loc_item.extract_first().strip()
loc_str = re.sub(r"^\d{1,2}\.\d{1,2} ?", "", loc_raw_str)
loc_parts = re.split(r", ?(?=\d{2})", loc_str, 1)
if len(loc_parts) == 2:
return {
Expand Down

0 comments on commit 0452743

Please sign in to comment.