From 34ccd5ad9505e1247f643422d226ce7af21c3767 Mon Sep 17 00:00:00 2001 From: Pratham Gupta <85290399+Prathamg001@users.noreply.github.com> Date: Tue, 28 May 2024 12:18:13 +0530 Subject: [PATCH] Create Football site webiste table scrap --- Football site webiste table scrap | 40 +++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 Football site webiste table scrap diff --git a/Football site webiste table scrap b/Football site webiste table scrap new file mode 100644 index 00000000..e93c6394 --- /dev/null +++ b/Football site webiste table scrap @@ -0,0 +1,40 @@ +#LA LIGA SQUAD STATS TABLE# + + +import requests +import pandas as pd +from bs4 import BeautifulSoup + +# Define the URL of the webpage you want to scrape +url = "https://fbref.com/en/squads/8d6fd021/Alaves-Stats" + +# Send an HTTP GET request to the URL +response = requests.get(url) + +# Check if the request was successful +if response.status_code == 200: + # Parse the HTML content of the page + soup = BeautifulSoup(response.text,'html.parser') + squad_stats_table = soup.select('table.stats_table')[2] + + if squad_stats_table: + # Extract data directly from the table + data = [] + for row in squad_stats_table.find_all('tr')[1:]: + row_data = [cell.get_text(strip=True) for cell in row.find_all(['td', 'th'])] + data.append(row_data) + + # Define the column names (headers) + headers = data[0] + # Create a pandas DataFrame + df = pd.DataFrame(data[1:], columns=headers) + + # Save the DataFrame to an Excel file + excel_file = "Team Score .xlsx" + df.to_excel(excel_file, index=False) + + print(f"Data has been scraped and saved to {excel_file}") + else: + print("Squad stats table not found on the page.") +else: + print("Failed to retrieve the webpage")