diff --git a/city_scrapers/spiders/cuya_audit.py b/city_scrapers/spiders/cuya_audit.py index 143afe8..ba672ac 100644 --- a/city_scrapers/spiders/cuya_audit.py +++ b/city_scrapers/spiders/cuya_audit.py @@ -1,27 +1,13 @@ -import re - from city_scrapers_core.constants import ADVISORY_COMMITTEE from city_scrapers_core.spiders import CityScrapersSpider -from city_scrapers.mixins import CuyaCountyMixin +from city_scrapers.mixins import CuyaCountyMixin2 -class CuyaAuditSpider(CuyaCountyMixin, CityScrapersSpider): +class CuyaAuditSpider(CuyaCountyMixin2, CityScrapersSpider): name = "cuya_audit" agency = "Cuyahoga County Audit Committee" - start_urls = ["http://bc.cuyahogacounty.us/en-US/Audit-Committee.aspx"] + start_urls = [ + "https://cuyahogacounty.gov/boards-and-commissions/board-details/external/audit-committee" # noqa + ] classification = ADVISORY_COMMITTEE - location = { - "name": "County Headquarters, 4-407 Conference Room B", - "address": "2079 East 9th St Cleveland, OH 44115", - } - - def _parse_location(self, response): - detail_strs = response.css("blockquote dd::text").extract() - loc_str = None - for detail_str in detail_strs: - if re.search(r"\d{3}", detail_str): - loc_str = re.sub(r"\s+", " ", detail_str).strip() - if not loc_str or "2079" in loc_str: - return self.location - return {"name": "", "address": loc_str} diff --git a/tests/files/cuya_audit.html b/tests/files/cuya_audit.html index 5ad1d79..8eac8b9 100644 --- a/tests/files/cuya_audit.html +++ b/tests/files/cuya_audit.html @@ -1,368 +1,731 @@ - - -