-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
107 lines (74 loc) · 4.19 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import requests
from bs4 import BeautifulSoup
import re
import csv
def fetch_page_content(url):
"""Fetches the HTML content of a given URL."""
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36"
}
try:
page = requests.get(url, headers=headers, timeout=5)
page.raise_for_status() # Raises an error for non-successful requests
return page.content # Return HTML page content
except requests.exceptions.HTTPError as http_error:
print(f"HTTP error occurred: {http_error}")
except requests.exceptions.Timeout as timeout_error:
print(f"Timeout error occurred: {timeout_error}")
except requests.exceptions.ConnectionError as connect_error:
print(f"Connection error occurred: {connect_error}")
except requests.exceptions.RequestException as error:
print(f"An error occurred: {error}")
return None
def parse_page_content(html_content):
"""Parses the HTML content to extract football match data."""
soup = BeautifulSoup(html_content, 'lxml')
# Find the football-fixtures container
fixtures = soup.find('section', class_='matchesCenter')
# Find all championships within the fixtures container
championships = fixtures.find_all('div', class_=re.compile(r'matchCard matchesList'))
matches_list = [] # List to store all match data
# Loop through championships to extract data for each championship
for championship in championships:
# Find championship title
championship_title = championship.find('div', class_='title').h2.text.strip()
# Find all matches for each championship
matches = championship.find_all('div', class_='allData')
# Loop through matches and extract their details
for match in matches:
match_dic = {} # Dictionary to store match details
match_dic['الفريق الأول'] = match.find('div', class_='teams teamA').p.text.strip()
result = match.find_all('span', class_='score')
match_dic['نتيجة المباراة'] = f"{result[1].text.strip()} - {result[0].text.strip()}"
match_dic['الفريق الثاني'] = match.find('div', class_='teams teamB').p.text.strip()
match_dic['الحالة'] = match.find('div', class_='matchStatus').span.text.strip()
match_dic['موعد المباراة'] = match.find('span', class_='time').text.strip()
match_dic['رقم الجولة'] = match.find('div', class_='date').text.strip()
match_dic['البطولة'] = championship_title
matches_list.append(match_dic) # Append the match dictionary to the list
return matches_list # Returns a list of dictionaries for all matches
def save_to_csv(data):
"""Saves match data to a CSV file."""
with open('football_fixtures.csv', 'w', encoding='utf-8') as f:
# Define the desired fieldnames for the CSV file
fieldnames = ['الفريق الأول', 'نتيجة المباراة', 'الفريق الثاني', 'الحالة', 'موعد المباراة', 'رقم الجولة',
'البطولة']
# Create the CSV DictWriter object
csv_writer = csv.DictWriter(f, fieldnames=fieldnames)
# Write the header row
csv_writer.writeheader()
# Write each row to the CSV file
csv_writer.writerows(data)
def football_fixtures_scraper():
""" Main function to scrape football fixtures based on user-provided date and save the data to a CSV file."""
# Get date from user
date = input("Please, enter the date(MM/DD/YYYY): ")
football_page_url = f"https://www.yallakora.com/match-center/?date={date}"
# Fetching (requesting) football page
football_page = fetch_page_content(football_page_url)
# Parsing (extracting) football data
football_data = parse_page_content(football_page)
save_to_csv(football_data)
print("\nData saved to football_fixtures.csv.")
if __name__ == '__main__':
football_fixtures_scraper()