Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 200 additions & 0 deletions integrations/ufc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
from typing import List
from datetime import datetime, timedelta
import re

import requests
from bs4 import BeautifulSoup
from fastapi import HTTPException

from base import CalendarBase, Event, IntegrationBase


UFC_EVENTS_URL = "https://www.ufc.com/events"
UFC_BASE_URL = "https://www.ufc.com"


def get_event_urls() -> List[str]:
"""Fetch event URLs from UFC.com events listing pages."""
seen_slugs = set()
event_urls = []

for page in range(2): # Pages 0 and 1
url = f"{UFC_EVENTS_URL}?page={page}"
response = requests.get(url, timeout=30, headers={
"User-Agent": "Mozilla/5.0 (compatible; Sync2Cal/1.0)"
})
if response.status_code != 200:
continue

soup = BeautifulSoup(response.text, "html.parser")

# Find all event links - they must be on UFC.com and contain /event/
for link in soup.find_all("a", href=True):
href = link["href"]

# Skip non-UFC links (e.g., ticketmaster)
if not href.startswith("/") and "ufc.com" not in href:
continue

if "/event/" not in href:
continue

# Make absolute URL if needed
if href.startswith("/"):
href = f"{UFC_BASE_URL}{href}"

# Remove fragment (e.g., #1295) and query string for deduplication
clean_url = href.split("#")[0].split("?")[0]

# Extract slug for deduplication
slug = clean_url.split("/event/")[-1]

# Use set to avoid duplicates by slug
if slug not in seen_slugs:
seen_slugs.add(slug)
event_urls.append(clean_url)

return event_urls


def get_event_details(event_url: str) -> dict | None:
"""Fetch event details from an individual event page."""
try:
response = requests.get(event_url, timeout=30, headers={
"User-Agent": "Mozilla/5.0 (compatible; Sync2Cal/1.0)"
})
if response.status_code != 200:
return None

soup = BeautifulSoup(response.text, "html.parser")

# Get event title - handle the nested structure with fighters
title_el = soup.select_one(".c-hero__headline")
if not title_el:
return None

# Check if it's a vs matchup (e.g., "Bautista vs Oliveira")
divider = title_el.select_one(".e-divider")
if divider:
top = divider.select_one(".e-divider__top")
bottom = divider.select_one(".e-divider__bottom")
if top and bottom:
title = f"{top.get_text(strip=True)} vs {bottom.get_text(strip=True)}"
else:
title = title_el.get_text(strip=True)
else:
title = title_el.get_text(strip=True)

# Get headline prefix (e.g., "UFC Fight Night")
prefix_el = soup.select_one(".c-hero__headline-prefix")
if prefix_el:
prefix = prefix_el.get_text(strip=True)
title = f"{prefix}: {title}"

# Get timestamp from data-timestamp attribute
timestamp_el = soup.select_one(".c-hero__headline-suffix[data-timestamp]")
if not timestamp_el:
# Try alternative selectors
timestamp_el = soup.select_one("[data-timestamp]")

if not timestamp_el:
return None

timestamp_str = timestamp_el.get("data-timestamp")
if not timestamp_str:
return None

# Parse the Unix timestamp (in seconds)
try:
timestamp = int(timestamp_str)
start_time = datetime.utcfromtimestamp(timestamp)
except (ValueError, TypeError):
return None

# Get venue/location - try multiple selectors
location = ""
for venue_selector in [".field--name-venue", ".c-hero__headline-location", ".c-event-venue"]:
venue_el = soup.select_one(venue_selector)
if venue_el:
# Clean up whitespace in venue text
location = " ".join(venue_el.get_text().split())
break

# Build description with fight card
description_parts = []

# Main card fights
main_card = soup.select_one("#main-card")
if main_card:
fights = main_card.select(".c-listing-fight")
if fights:
description_parts.append("Main Card:")
for fight in fights[:6]: # Limit to 6 fights
red_corner = fight.select_one(".c-listing-fight__corner--red .c-listing-fight__corner-name")
blue_corner = fight.select_one(".c-listing-fight__corner--blue .c-listing-fight__corner-name")
if red_corner and blue_corner:
description_parts.append(f" {red_corner.get_text(strip=True)} vs {blue_corner.get_text(strip=True)}")

# Generate a unique ID from the URL
event_slug = event_url.split("/event/")[-1].split("?")[0]
uid = f"ufc-{event_slug}"

return {
"uid": uid,
"title": title,
"start": start_time,
"location": location,
"description": "\n".join(description_parts) if description_parts else "",
}

except Exception:
return None


class UfcCalendar(CalendarBase):
def fetch_events(self) -> List[Event]:
"""Fetch UFC events directly from UFC.com."""
try:
event_urls = get_event_urls()
events: List[Event] = []
seen_uids = set()

for event_url in event_urls:
details = get_event_details(event_url)
if details is None:
continue

# Skip if we've already seen this event (by UID)
if details["uid"] in seen_uids:
continue
seen_uids.add(details["uid"])

# UFC events typically last about 3-4 hours
end_time = details["start"] + timedelta(hours=4)

events.append(
Event(
uid=details["uid"],
title=details["title"],
start=details["start"],
end=end_time,
all_day=False,
description=details["description"],
location=details["location"],
)
)

# Sort by start time
events.sort(key=lambda e: e.start)
self.events = events
return events

except HTTPException:
raise
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) from e


class UfcIntegration(IntegrationBase):
def fetch_calendars(self, *args, **kwargs):
return None
9 changes: 9 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from integrations.moviedb import MovieDbIntegration, MovieDbCalendar
from integrations.thetvdb import TheTvDbIntegration, TheTvDbCalendar
from integrations.wwe import WweIntegration, WweCalendar
from integrations.ufc import UfcIntegration, UfcCalendar
from integrations.shows import ShowsIntegration, ShowsCalendar
from integrations.releases import ReleasesIntegration, ReleasesCalendar
from integrations.sportsdb import SportsDbIntegration, SportsDbCalendar
Expand Down Expand Up @@ -104,6 +105,14 @@
calendar_class=WweCalendar,
multi_calendar=False,
),
UfcIntegration(
id="ufc",
name="UFC",
description="UFC events scraped directly from UFC.com",
base_url="https://www.ufc.com",
calendar_class=UfcCalendar,
multi_calendar=False,
),
ShowsIntegration(
id="shows",
name="TV Shows",
Expand Down