diff --git a/integrations/aew.py b/integrations/aew.py new file mode 100644 index 0000000..bb7c9b7 --- /dev/null +++ b/integrations/aew.py @@ -0,0 +1,254 @@ +from typing import List +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo +import re +import logging + +import requests +from bs4 import BeautifulSoup +from fastapi import HTTPException + +from base import CalendarBase, Event, IntegrationBase + +logger = logging.getLogger(__name__) + + +AEW_EVENTS_URL = "https://www.allelitewrestling.com/aew-events" + +# Timezone mappings for AEW events +TIMEZONE_MAP = { + "PT": "America/Los_Angeles", + "MT": "America/Denver", + "CT": "America/Chicago", + "ET": "America/New_York", + "EST": "America/New_York", + "AEDT": "Australia/Sydney", + "AEST": "Australia/Sydney", + "GMT": "Europe/London", + "BST": "Europe/London", +} + +# Month pattern for date matching +MONTHS = ["JANUARY", "FEBRUARY", "MARCH", "APRIL", "MAY", "JUNE", + "JULY", "AUGUST", "SEPTEMBER", "OCTOBER", "NOVEMBER", "DECEMBER"] + + +def parse_aew_datetime(date_str: str, time_str: str) -> datetime | None: + """ + Parse AEW date and time strings into a UTC datetime. + + Args: + date_str: e.g., "FEBRUARY 11, 2026" + time_str: e.g., "4:30pm PT" or "TBA" + + Returns: + datetime in UTC, or None if parsing fails + """ + try: + # Parse the date + date_str = date_str.strip().upper() + date = datetime.strptime(date_str, "%B %d, %Y") + + # Handle TBA times - default to 8 PM ET + if not time_str or time_str.upper() == "TBA": + time_str = "8:00pm ET" + + time_str = time_str.strip() + + # Extract timezone abbreviation from end of time string + parts = time_str.split() + if len(parts) >= 2: + tz_abbr = parts[-1].upper() + time_part = " ".join(parts[:-1]) + else: + tz_abbr = "ET" + time_part = time_str + + # Parse time (e.g., "4:30pm" or "8:00pm") + time_part = time_part.lower().replace(".", "") + if ":" in time_part: + time_obj = datetime.strptime(time_part, "%I:%M%p") + else: + time_obj = datetime.strptime(time_part, "%I%p") + + # Combine date and time + local_dt = datetime( + date.year, date.month, date.day, + time_obj.hour, time_obj.minute, 0 + ) + + # Convert to UTC + tz_name = TIMEZONE_MAP.get(tz_abbr, "America/New_York") + local_tz = ZoneInfo(tz_name) + local_dt = local_dt.replace(tzinfo=local_tz) + utc_dt = local_dt.astimezone(ZoneInfo("UTC")).replace(tzinfo=None) + + return utc_dt + + except Exception: + return None + + +def get_aew_events() -> List[dict]: + """ + Fetch all events from AEW.com events page. + + The AEW website is Wix-based and JavaScript-rendered. + We fetch the page and parse the text content which follows a pattern: + + DATE + EVENT NAME + City + EVENT DETAILS > + LOCATION + Venue + VENUE NAME + Time + TIME VALUE + BUY TICKETS + """ + try: + response = requests.get(AEW_EVENTS_URL, timeout=30, headers={ + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" + }) + if response.status_code != 200: + return [] + + soup = BeautifulSoup(response.text, "html.parser") + + # Get all text content + text = soup.get_text(separator="\n") + lines = [line.strip() for line in text.split("\n") if line.strip()] + + events = [] + seen_uids = set() + + i = 0 + while i < len(lines): + line = lines[i] + + # Look for date pattern: MONTH DAY, YEAR + date_match = re.match(r"^(" + "|".join(MONTHS) + r")\s+\d{1,2},\s+\d{4}$", line.upper()) + + if date_match: + date_str = line + event_name = None + city = None + venue = None + time_str = None + + # Look ahead for event details (up to 15 lines or next date) + j = i + 1 + while j < min(i + 15, len(lines)): + next_line = lines[j] + + # Stop if we hit another date + if re.match(r"^(" + "|".join(MONTHS) + r")\s+\d{1,2},\s+\d{4}$", next_line.upper()): + break + + # Skip navigation/label lines + if next_line in ["City", "Venue", "Time", "EVENT DETAILS >", "BUY TICKETS", + "ON SALE: TBA", "Empty heading", "UPCOMING EVENTS"]: + j += 1 + continue + + # Event name contains "AEW" + if not event_name and "AEW" in next_line: + event_name = next_line + + # City/location pattern: "City, STATE" or "City, Country" + elif not city and re.match(r"^[A-Za-z\s]+,\s*[A-Za-z\s]+$", next_line) and len(next_line) < 50: + city = next_line + + # Venue names typically contain these words + elif not venue and any(v in next_line for v in + ["Arena", "Center", "Centre", "Stadium", "Auditorium", "Ballroom", + "Civic", "House", "Entertainment", "Bank"]): + venue = next_line + + # Time pattern: "H:MMpm TZ" or "H:MMam TZ" + elif not time_str and re.match(r"^\d{1,2}:\d{2}[ap]m\s+[A-Z]+$", next_line, re.I): + time_str = next_line + + j += 1 + + # Create event if we have minimum data + if event_name and date_str: + # Generate UID from event name and date + slug = re.sub(r"[^a-z0-9]+", "-", event_name.lower()).strip("-") + date_slug = re.sub(r"[^a-z0-9]+", "-", date_str.lower()).strip("-") + uid = f"aew-{slug}-{date_slug}" + + if uid not in seen_uids: + seen_uids.add(uid) + + # Parse datetime + start = parse_aew_datetime(date_str, time_str or "TBA") + if start: + location = "" + if venue and city: + location = f"{venue}, {city}" + elif city: + location = city + elif venue: + location = venue + + events.append({ + "uid": uid, + "title": event_name, + "start": start, + "location": location, + "description": "", + }) + + i = j # Skip to where we left off + else: + i += 1 + + return events + + except Exception as e: + logger.warning(f"AEW scraper error: {e}") + return [] + + +class AewCalendar(CalendarBase): + def fetch_events(self) -> List[Event]: + """Fetch AEW events from allelitewrestling.com.""" + try: + event_data = get_aew_events() + events: List[Event] = [] + + for data in event_data: + # AEW shows typically last 2-3 hours, PPVs 4+ hours + is_ppv = any(x in data["title"].lower() for x in + ["revolution", "dynasty", "double or nothing", "forbidden door", + "all in", "all out", "full gear", "worlds end", "grand slam"]) + duration = timedelta(hours=4) if is_ppv else timedelta(hours=3) + + events.append( + Event( + uid=data["uid"], + title=data["title"], + start=data["start"], + end=data["start"] + duration, + all_day=False, + description=data["description"], + location=data["location"], + ) + ) + + # Sort by start time + events.sort(key=lambda e: e.start) + self.events = events + return events + + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) from e + + +class AewIntegration(IntegrationBase): + def fetch_calendars(self, *args, **kwargs): + return None diff --git a/integrations/ufc.py b/integrations/ufc.py index e289da4..a1635c6 100644 --- a/integrations/ufc.py +++ b/integrations/ufc.py @@ -120,20 +120,66 @@ def get_event_details(event_url: str) -> dict | None: location = " ".join(venue_el.get_text().split()) break - # Build description with fight card + # Build description with fight card and broadcast info description_parts = [] - # Main card fights - main_card = soup.select_one("#main-card") - if main_card: - fights = main_card.select(".c-listing-fight") - if fights: - description_parts.append("Main Card:") - for fight in fights[:6]: # Limit to 6 fights - red_corner = fight.select_one(".c-listing-fight__corner--red .c-listing-fight__corner-name") - blue_corner = fight.select_one(".c-listing-fight__corner--blue .c-listing-fight__corner-name") - if red_corner and blue_corner: - description_parts.append(f" {red_corner.get_text(strip=True)} vs {blue_corner.get_text(strip=True)}") + # Find fight card sections (Main Card, Prelims, etc.) + broadcaster_containers = soup.select(".c-event-fight-card-broadcaster__container") + + for container in broadcaster_containers: + # Get card title (Main Card, Prelims, etc.) + card_title_el = container.select_one(".c-event-fight-card-broadcaster__card-title strong") + card_title = card_title_el.get_text(strip=True) if card_title_el else "Fight Card" + + # Get broadcaster/streaming info + broadcaster_link = container.select_one(".c-event-fight-card-broadcaster__link a") + broadcaster = "" + if broadcaster_link: + broadcaster = broadcaster_link.get_text(strip=True) + + # Get the fights for this card section + # The fights follow the broadcaster container in the next
+ next_section = container.find_next_sibling("section", class_="l-listing--stacked--full-width") + if not next_section: + # Try finding it as next element + next_section = container.find_next("section", class_="l-listing--stacked--full-width") + + fights_text = [] + if next_section: + fights = next_section.select(".c-listing-fight") + for fight in fights[:5]: # Limit to 5 fights per card + red_name = fight.select_one(".c-listing-fight__corner-name--red") + blue_name = fight.select_one(".c-listing-fight__corner-name--blue") + weight_class = fight.select_one(".c-listing-fight__class-text") + + if red_name and blue_name: + # Extract first and last names with proper spacing + red_given = red_name.select_one(".c-listing-fight__corner-given-name") + red_family = red_name.select_one(".c-listing-fight__corner-family-name") + blue_given = blue_name.select_one(".c-listing-fight__corner-given-name") + blue_family = blue_name.select_one(".c-listing-fight__corner-family-name") + + if red_given and red_family and blue_given and blue_family: + red = f"{red_given.get_text(strip=True)} {red_family.get_text(strip=True)}" + blue = f"{blue_given.get_text(strip=True)} {blue_family.get_text(strip=True)}" + else: + # Fallback to full text with space separator + red = " ".join(red_name.get_text().split()) + blue = " ".join(blue_name.get_text().split()) + + fight_str = f"• {red} vs {blue}" + if weight_class: + weight = weight_class.get_text(strip=True).replace(" Bout", "") + fight_str += f" ({weight})" + fights_text.append(fight_str) + + if fights_text: + header = card_title + if broadcaster: + header += f" - {broadcaster}" + description_parts.append(header) + description_parts.extend(fights_text) + description_parts.append("") # Empty line between sections # Generate a unique ID from the URL event_slug = event_url.split("/event/")[-1].split("?")[0] @@ -144,7 +190,7 @@ def get_event_details(event_url: str) -> dict | None: "title": title, "start": start_time, "location": location, - "description": "\n".join(description_parts) if description_parts else "", + "description": "\n".join(description_parts).strip() if description_parts else "", } except Exception: diff --git a/main.py b/main.py index 2b99a68..8104d67 100644 --- a/main.py +++ b/main.py @@ -15,6 +15,7 @@ from integrations.thetvdb import TheTvDbIntegration, TheTvDbCalendar from integrations.wwe import WweIntegration, WweCalendar from integrations.ufc import UfcIntegration, UfcCalendar +from integrations.aew import AewIntegration, AewCalendar from integrations.shows import ShowsIntegration, ShowsCalendar from integrations.releases import ReleasesIntegration, ReleasesCalendar from integrations.sportsdb import SportsDbIntegration, SportsDbCalendar @@ -113,6 +114,14 @@ calendar_class=UfcCalendar, multi_calendar=False, ), + AewIntegration( + id="aew", + name="AEW", + description="AEW events scraped directly from allelitewrestling.com", + base_url="https://www.allelitewrestling.com", + calendar_class=AewCalendar, + multi_calendar=False, + ), ShowsIntegration( id="shows", name="TV Shows", diff --git a/tests/integrations/test_aew.py b/tests/integrations/test_aew.py new file mode 100644 index 0000000..3144543 --- /dev/null +++ b/tests/integrations/test_aew.py @@ -0,0 +1,298 @@ +import pytest +from datetime import datetime +from unittest.mock import patch, MagicMock + +from integrations.aew import parse_aew_datetime, get_aew_events, AewCalendar + + +class TestParseAewDatetime: + """Tests for parse_aew_datetime function.""" + + def test_pacific_time(self): + """Test parsing Pacific Time.""" + result = parse_aew_datetime("FEBRUARY 11, 2026", "4:30pm PT") + assert result is not None + # 4:30 PM PT = 12:30 AM UTC next day (8 hour offset) + assert result.hour == 0 + assert result.minute == 30 + assert result.day == 12 # Next day in UTC + assert result.month == 2 + assert result.year == 2026 + + def test_eastern_time(self): + """Test parsing Eastern Time.""" + result = parse_aew_datetime("MARCH 15, 2026", "8:00pm ET") + assert result is not None + # 8:00 PM ET = 1:00 AM UTC next day (5 hour offset in March - DST) + assert result.hour == 0 # Midnight UTC + assert result.month == 3 + assert result.day == 16 # Next day in UTC + + def test_mountain_time(self): + """Test parsing Mountain Time.""" + result = parse_aew_datetime("MARCH 4, 2026", "5:30pm MT") + assert result is not None + # 5:30 PM MT = 12:30 AM UTC next day (7 hour offset) + assert result.hour == 0 + assert result.minute == 30 + + def test_central_time(self): + """Test parsing Central Time.""" + result = parse_aew_datetime("MARCH 25, 2026", "6:30pm CT") + assert result is not None + # 6:30 PM CT = 11:30 PM UTC same day (5 hour offset in March - DST) + assert result.hour == 23 + assert result.minute == 30 + + def test_australian_time(self): + """Test parsing Australian Eastern Daylight Time.""" + result = parse_aew_datetime("FEBRUARY 14, 2026", "6:30pm AEDT") + assert result is not None + # 6:30 PM AEDT = 7:30 AM UTC same day (AEDT is UTC+11) + assert result.hour == 7 + assert result.minute == 30 + assert result.day == 14 + + def test_tba_defaults_to_8pm_et(self): + """Test that TBA time defaults to 8:00 PM ET.""" + result = parse_aew_datetime("AUGUST 30, 2026", "TBA") + assert result is not None + # 8:00 PM ET in August (EDT) = 12:00 AM UTC next day + assert result.hour == 0 + assert result.day == 31 # Next day + + def test_tba_uppercase(self): + """Test TBA is case-insensitive.""" + result = parse_aew_datetime("JANUARY 15, 2026", "tba") + assert result is not None + + def test_empty_time_defaults_to_8pm_et(self): + """Test that empty time defaults to 8:00 PM ET.""" + result = parse_aew_datetime("JANUARY 15, 2026", "") + assert result is not None + + def test_invalid_date_returns_none(self): + """Test that invalid date returns None.""" + result = parse_aew_datetime("INVALID DATE", "8:00pm ET") + assert result is None + + def test_invalid_time_format_returns_none(self): + """Test that invalid time format returns None.""" + result = parse_aew_datetime("FEBRUARY 11, 2026", "invalid") + assert result is None + + def test_lowercase_date(self): + """Test that date parsing handles lowercase.""" + result = parse_aew_datetime("february 11, 2026", "4:30pm PT") + assert result is not None + assert result.month == 2 + assert result.day == 12 # UTC next day + + def test_all_months(self): + """Test parsing all months.""" + months = [ + ("JANUARY", 1), ("FEBRUARY", 2), ("MARCH", 3), ("APRIL", 4), + ("MAY", 5), ("JUNE", 6), ("JULY", 7), ("AUGUST", 8), + ("SEPTEMBER", 9), ("OCTOBER", 10), ("NOVEMBER", 11), ("DECEMBER", 12) + ] + for month_name, month_num in months: + result = parse_aew_datetime(f"{month_name} 15, 2026", "12:00pm ET") + assert result is not None + # Note: month in result may differ due to UTC conversion + + +class TestGetAewEvents: + """Tests for get_aew_events function with mocked responses.""" + + @patch("integrations.aew.requests.get") + def test_success(self, mock_get): + """Test successful parsing of AEW events page.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.text = """ + + + UPCOMING EVENTS + FEBRUARY 11, 2026 + AEW Dynamite: Ontario + City + Ontario, CA + Venue + Toyota Arena + Time + 4:30pm PT + BUY TICKETS + FEBRUARY 14, 2026 + AEW Grand Slam: Australia + City + Sydney, AUS + Venue + Qudos Bank Arena + Time + 6:30pm AEDT + BUY TICKETS + + + """ + mock_get.return_value = mock_response + + events = get_aew_events() + assert len(events) == 2 + assert events[0]["title"] == "AEW Dynamite: Ontario" + assert "Toyota Arena" in events[0]["location"] + assert events[1]["title"] == "AEW Grand Slam: Australia" + + @patch("integrations.aew.requests.get") + def test_api_failure_returns_empty(self, mock_get): + """Test that API failure returns empty list.""" + mock_response = MagicMock() + mock_response.status_code = 500 + mock_get.return_value = mock_response + + events = get_aew_events() + assert events == [] + + @patch("integrations.aew.requests.get") + def test_empty_page_returns_empty(self, mock_get): + """Test that empty page returns empty list.""" + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.text = "" + mock_get.return_value = mock_response + + events = get_aew_events() + assert events == [] + + @patch("integrations.aew.requests.get") + def test_request_exception_returns_empty(self, mock_get): + """Test that request exception returns empty list.""" + mock_get.side_effect = Exception("Network error") + + events = get_aew_events() + assert events == [] + + @patch("integrations.aew.requests.get") + def test_duplicate_events_deduplicated(self, mock_get): + """Test that duplicate events are deduplicated by UID.""" + mock_response = MagicMock() + mock_response.status_code = 200 + # Same event appearing twice + mock_response.text = """ + FEBRUARY 11, 2026 + AEW Dynamite: Ontario + Ontario, CA + Toyota Arena + 4:30pm PT + FEBRUARY 11, 2026 + AEW Dynamite: Ontario + Ontario, CA + Toyota Arena + 4:30pm PT + """ + mock_get.return_value = mock_response + + events = get_aew_events() + # Should only have one event due to deduplication + assert len(events) == 1 + + +class TestAewCalendar: + """Tests for AewCalendar class.""" + + @patch("integrations.aew.get_aew_events") + def test_fetch_events_success(self, mock_get_events): + """Test successful event fetching.""" + mock_get_events.return_value = [ + { + "uid": "aew-dynamite-feb-11", + "title": "AEW Dynamite: Ontario", + "start": datetime(2026, 2, 12, 0, 30), + "location": "Toyota Arena, Ontario, CA", + "description": "", + } + ] + + cal = AewCalendar(name="AEW", id="aew", icon="", events=[]) + events = cal.fetch_events() + + assert len(events) == 1 + assert events[0].title == "AEW Dynamite: Ontario" + assert events[0].uid == "aew-dynamite-feb-11" + + @patch("integrations.aew.get_aew_events") + def test_ppv_events_get_longer_duration(self, mock_get_events): + """Test that PPV events get 4 hour duration.""" + mock_get_events.return_value = [ + { + "uid": "aew-revolution-2026", + "title": "AEW: Revolution 2026", + "start": datetime(2026, 3, 15, 23, 0), + "location": "Crypto.com Arena, Los Angeles, CA", + "description": "", + } + ] + + cal = AewCalendar(name="AEW", id="aew", icon="", events=[]) + events = cal.fetch_events() + + assert len(events) == 1 + # PPV should be 4 hours + duration = events[0].end - events[0].start + assert duration.total_seconds() == 4 * 3600 # 4 hours + + @patch("integrations.aew.get_aew_events") + def test_regular_show_gets_shorter_duration(self, mock_get_events): + """Test that regular shows get 3 hour duration.""" + mock_get_events.return_value = [ + { + "uid": "aew-dynamite-feb-11", + "title": "AEW Dynamite: Ontario", + "start": datetime(2026, 2, 12, 0, 30), + "location": "Toyota Arena, Ontario, CA", + "description": "", + } + ] + + cal = AewCalendar(name="AEW", id="aew", icon="", events=[]) + events = cal.fetch_events() + + assert len(events) == 1 + # Regular show should be 3 hours + duration = events[0].end - events[0].start + assert duration.total_seconds() == 3 * 3600 # 3 hours + + @patch("integrations.aew.get_aew_events") + def test_events_sorted_by_start_time(self, mock_get_events): + """Test that events are sorted by start time.""" + mock_get_events.return_value = [ + { + "uid": "aew-event-2", + "title": "Event 2", + "start": datetime(2026, 3, 1), + "location": "", + "description": "", + }, + { + "uid": "aew-event-1", + "title": "Event 1", + "start": datetime(2026, 2, 1), + "location": "", + "description": "", + }, + ] + + cal = AewCalendar(name="AEW", id="aew", icon="", events=[]) + events = cal.fetch_events() + + assert events[0].title == "Event 1" + assert events[1].title == "Event 2" + + @patch("integrations.aew.get_aew_events") + def test_empty_events_returns_empty_list(self, mock_get_events): + """Test that empty events returns empty list.""" + mock_get_events.return_value = [] + + cal = AewCalendar(name="AEW", id="aew", icon="", events=[]) + events = cal.fetch_events() + + assert events == []