diff --git a/.gitignore b/.gitignore index 44ca3011..d71cc262 100644 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,11 @@ __pycache__/ *.py[cod] *$py.class +<<<<<<< HEAD local_test.py +======= +locaL_test.py +>>>>>>> 8f31f07307b5d503598fb062f51376a7b9eac315 # C extensions *.so *.ipynb diff --git a/src/scrape_up/coinmarketcap/__init__.py b/src/scrape_up/coinmarketcap/__init__.py index 3c800c80..4cd82404 100644 --- a/src/scrape_up/coinmarketcap/__init__.py +++ b/src/scrape_up/coinmarketcap/__init__.py @@ -1,3 +1,3 @@ -from .coinmarketcap import Crypto +from .crypto import Crypto __all__ = ["Crypto"] diff --git a/src/scrape_up/coinmarketcap/CoinMarketCap.py b/src/scrape_up/coinmarketcap/crypto.py similarity index 97% rename from src/scrape_up/coinmarketcap/CoinMarketCap.py rename to src/scrape_up/coinmarketcap/crypto.py index 11c67b09..bd529976 100644 --- a/src/scrape_up/coinmarketcap/CoinMarketCap.py +++ b/src/scrape_up/coinmarketcap/crypto.py @@ -1,121 +1,121 @@ -from bs4 import BeautifulSoup -import requests - - -class Crypto: - """ - Create an instance of `Crypto` class - - ```python - crypto = Crypto() - ``` - - | Method | Details | - | ---------------------------- | -------------------------------------------------------- | - | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. | - """ - - def __init__(self): - """ - Initialize the CoinMarketCap class by fetching data from the CoinMarketCap website. - """ - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" - } - url = "https://coinmarketcap.com/" - html_text = requests.get(url, headers=headers).text - self.soup = BeautifulSoup(html_text, "lxml") - - def get_top_cryptocurrencies(self): - """ - A list of dictionaries containing details of the top cryptocurrencies.\n - ```python - crypto = Crypto() - ``` - Example output: - ```python - [ - { - "Name": "Bitcoin", - "Symbol": "BTC", - "Link": "https://coinmarketcap.com/...", - "Price": "$65,432.10", - "1h%": "-1.23% (Down)", - "24h%": "+0.45% (Up)", - "7d%": "-2.15% (Down)", - "MarketCap": "$1.23T", - "Volume(24h)": "$12.5B", - "Circulating Supply": "18.7M BTC" - }, - ... - ] - """ - try: - cryptocurrency = [] - container = self.soup.find("div", {"class": "sc-4c520df-2 kGWYlx"}) - i = 0 - tbody = container.find("tbody") - for items in tbody.find_all("tr"): - i += 1 - if i == 11: - break - j = 0 - for item in items.find_all("td"): - j += 1 - if j == 1 or j == 2: - continue - elif j == 3: - name = item.find("p", {"class": "sc-4984dd93-0 kKpPOn"}).text - symbol = item.find( - "p", {"class": "sc-4984dd93-0 iqdbQL coin-item-symbol"} - ).text - link = ( - "https://coinmarketcap.com/" - + item.find("a", href=True)["href"] - ) - elif j == 4: - price = item.text - elif j == 5: - if item.find("span", {"class": "icon-Caret-down"}) is not None: - market = "Down" - else: - market = "Up" - hour = item.text + f" ({market})" - elif j == 6: - if item.find("span", {"class": "icon-Caret-down"}) is not None: - market = "Down" - else: - market = "Up" - hour_24 = item.text + f" ({market})" - elif j == 7: - if item.find("span", {"class": "icon-Caret-down"}) is not None: - market = "Down" - else: - market = "Up" - day = item.text + f" ({market})" - elif j == 8: - marketcap = item.find( - "span", {"class": "sc-f8982b1f-1 bOsKfy"} - ).text - elif j == 9: - volume = item.find( - "p", {"class": "sc-4984dd93-0 jZrMxO font_weight_500"} - ).text - elif j == 10: - supply = item.find("p", {"class": "sc-4984dd93-0 WfVLk"}).text - data = { - "Name": name, - "Symbol": symbol, - "Link": link, - "Price": price, - "1h%": hour, - "24h%": hour_24, - "7d%": day, - "MarketCap": marketcap, - "Volume(24h)": volume, - "Circulating Supply": supply, - } - cryptocurrency.append(data) - return cryptocurrency - except: - return None +from bs4 import BeautifulSoup +import requests + + +class Crypto: + """ + Create an instance of `Crypto` class + + ```python + crypto = Crypto() + ``` + + | Method | Details | + | ---------------------------- | -------------------------------------------------------- | + | `get_top_cryptocurrencies()` | Fetches and returns data about the top cryptocurrencies. | + """ + + def __init__(self): + """ + Initialize the CoinMarketCap class by fetching data from the CoinMarketCap website. + """ + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" + } + url = "https://coinmarketcap.com/" + html_text = requests.get(url, headers=headers).text + self.soup = BeautifulSoup(html_text, "lxml") + + def get_top_cryptocurrencies(self): + """ + A list of dictionaries containing details of the top cryptocurrencies.\n + ```python + crypto = Crypto() + ``` + Example output: + ```python + [ + { + "Name": "Bitcoin", + "Symbol": "BTC", + "Link": "https://coinmarketcap.com/...", + "Price": "$65,432.10", + "1h%": "-1.23% (Down)", + "24h%": "+0.45% (Up)", + "7d%": "-2.15% (Down)", + "MarketCap": "$1.23T", + "Volume(24h)": "$12.5B", + "Circulating Supply": "18.7M BTC" + }, + ... + ] + """ + try: + cryptocurrency = [] + container = self.soup.find("div", {"class": "sc-4c520df-2 kGWYlx"}) + i = 0 + tbody = container.find("tbody") + for items in tbody.find_all("tr"): + i += 1 + if i == 11: + break + j = 0 + for item in items.find_all("td"): + j += 1 + if j == 1 or j == 2: + continue + elif j == 3: + name = item.find("p", {"class": "sc-4984dd93-0 kKpPOn"}).text + symbol = item.find( + "p", {"class": "sc-4984dd93-0 iqdbQL coin-item-symbol"} + ).text + link = ( + "https://coinmarketcap.com/" + + item.find("a", href=True)["href"] + ) + elif j == 4: + price = item.text + elif j == 5: + if item.find("span", {"class": "icon-Caret-down"}) is not None: + market = "Down" + else: + market = "Up" + hour = item.text + f" ({market})" + elif j == 6: + if item.find("span", {"class": "icon-Caret-down"}) is not None: + market = "Down" + else: + market = "Up" + hour_24 = item.text + f" ({market})" + elif j == 7: + if item.find("span", {"class": "icon-Caret-down"}) is not None: + market = "Down" + else: + market = "Up" + day = item.text + f" ({market})" + elif j == 8: + marketcap = item.find( + "span", {"class": "sc-f8982b1f-1 bOsKfy"} + ).text + elif j == 9: + volume = item.find( + "p", {"class": "sc-4984dd93-0 jZrMxO font_weight_500"} + ).text + elif j == 10: + supply = item.find("p", {"class": "sc-4984dd93-0 WfVLk"}).text + data = { + "Name": name, + "Symbol": symbol, + "Link": link, + "Price": price, + "1h%": hour, + "24h%": hour_24, + "7d%": day, + "MarketCap": marketcap, + "Volume(24h)": volume, + "Circulating Supply": supply, + } + cryptocurrency.append(data) + return cryptocurrency + except: + return None diff --git a/src/scrape_up/espn/__init__.py b/src/scrape_up/espn/__init__.py index 5a4cfa71..5b4db792 100644 --- a/src/scrape_up/espn/__init__.py +++ b/src/scrape_up/espn/__init__.py @@ -1,3 +1,3 @@ -from .espn import ESPN +from .espnmodule import ESPN __all__ = ["ESPN"] diff --git a/src/scrape_up/espn/ESPN.py b/src/scrape_up/espn/espnmodule.py similarity index 97% rename from src/scrape_up/espn/ESPN.py rename to src/scrape_up/espn/espnmodule.py index 5455c097..38c71466 100644 --- a/src/scrape_up/espn/ESPN.py +++ b/src/scrape_up/espn/espnmodule.py @@ -1,192 +1,192 @@ -from bs4 import BeautifulSoup -import requests - - -class ESPN: - """ - Create an instance of `ESPN` class - ```python - espn = espn.ESPN() - ``` - - | Method | Details | - | ------------------- | ----------------------------------------------------------------- | - | `get_scoreboard()` | Fetches and returns the football scoreboards for a given date. | - | `get_tournaments()` | Fetches and returns information about ongoing football tournaments. | - | `get_teams()` | Fetches and returns information about football teams. | - """ - - def __init__(self): - self.headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" - } - - def get_scoreboard(self, date): - """ - Create an instance of the `ESPN` class to fetch football scoreboards.\n - Example: - ```python - espn = ESPN() - scores = espn.get_scoreboard(date="20230721") - ``` - Example output: - ```python - [ - { - "Game":"Mexican Liga de Expansión MX", - "Game Link":"https://www.espn.in/football/scoreboard/_/league/mex.2", - "Teams":[ - { - "Name":"Venados FC", - "Score":"3" - }, - { - "Name":"Tlaxcala FC", - "Score":"1" - } - ], - "Location":"Estadio Carlos Iturralde Rivero,Mérida, Mexico" - }, - ... - ] - ``` - """ - try: - url = f"https://www.espn.in/football/scoreboard/_/date/{date}" - html_text = requests.get(url, headers=self.headers).text - soup = BeautifulSoup(html_text, "lxml") - - scores = [] - container = soup.find("div", {"class": "PageLayout__Main"}) - for items in container.find_all("section", {"class": "Card gameModules"}): - title = items.find( - "h3", {"class": "Card__Header__Title Card__Header__Title--no-theme"} - ) - link = ( - "https://www.espn.in" - + items.find( - "a", {"class": "AnchorLink Card__Header__Link"}, href=True - )["href"] - ) - for item in items.find_all( - "section", - {"class": "Scoreboard bg-clr-white flex flex-auto justify-between"}, - ): - teams = [] - for team in item.find( - "ul", {"class": "ScoreboardScoreCell__Competitors"} - ).find_all("li"): - name = team.find( - "div", - { - "class": "ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db" - }, - ) - score = team.find( - "div", - { - "class": "ScoreCell__Score h4 clr-gray-01 fw-heavy tar ScoreCell_Score--scoreboard pl2" - }, - ) - if score: - score = score.text - teams.append({"Name": name.text, "Score": score}) - place = item.find( - "div", - { - "class": "LocationDetail__Item LocationDetail__Item--headline" - }, - ) - if place: - country = place.next_sibling - location = place.text + "," + country.text - data = { - "Game": title.text, - "Game Link": link, - "Teams": teams, - "Location": location, - } - else: - data = { - "Game": title.text, - "Game Link": link, - "Teams": teams, - } - scores.append(data) - return scores - except: - return None - - def get_tournaments(self): - """ - Fetches and returns information about ongoing football tournaments. - ```py - espn = ESPN() - tournaments = espn.get_tournaments() - ``` - Example output: - ```python - [ - { - "Premier League": [ - ["https://www.espn.in/football/competition/_/id/eng.1", "English Premier League"] - ] - }, - ... - ] - ``` - """ - try: - url = "https://www.espn.in/football/competitions" - html_text = requests.get(url, headers=self.headers).text - soup = BeautifulSoup(html_text, "lxml") - - container = soup.find("div", {"class": "Wrapper bg-clr-white br-5 mb3 pa5"}) - data = [] - for items in container.find_all("h3"): - heading = items.text - div = items.next_sibling - li = [] - for item in div.find_all("div", {"class": "ContentList__Item"}): - link = item.find("a", href=True)["href"] - title = item.find("h2") - li.append([link, title.text]) - data.append({heading: li}) - return data - except: - return None - - def get_teams(self): - """ - Fetches and returns information about football teams. - ```py - espn = ESPN() - teams = espn.get_teams() - ``` - - Example output: - ```python - [ - { - "Name": "Manchester United", - "Link": "https://www.espn.in/football/team/_/id/360/manchester-united" - }, - ... - ] - ``` - """ - try: - url = "https://www.espn.in/football/teams" - html_text = requests.get(url, headers=self.headers).text - soup = BeautifulSoup(html_text, "lxml") - - container = soup.find("div", {"class": "Wrapper TeamsWrapper br-5 mb3 pa5"}) - teams = [] - for items in container.find_all("div", {"class": "ContentList__Item"}): - title = items.find("h2") - link = items.find("a", href=True)["href"] - data = {"Name": title.text, "Link": "https://www.espn.in" + link} - teams.append(data) - return teams - except: - return None +from bs4 import BeautifulSoup +import requests + + +class ESPN: + """ + Create an instance of `ESPN` class + ```python + espn = espn.ESPN() + ``` + + | Method | Details | + | ------------------- | ----------------------------------------------------------------- | + | `get_scoreboard()` | Fetches and returns the football scoreboards for a given date. | + | `get_tournaments()` | Fetches and returns information about ongoing football tournaments. | + | `get_teams()` | Fetches and returns information about football teams. | + """ + + def __init__(self): + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" + } + + def get_scoreboard(self, date): + """ + Create an instance of the `ESPN` class to fetch football scoreboards.\n + Example: + ```python + espn = ESPN() + scores = espn.get_scoreboard(date="20230721") + ``` + Example output: + ```python + [ + { + "Game":"Mexican Liga de Expansión MX", + "Game Link":"https://www.espn.in/football/scoreboard/_/league/mex.2", + "Teams":[ + { + "Name":"Venados FC", + "Score":"3" + }, + { + "Name":"Tlaxcala FC", + "Score":"1" + } + ], + "Location":"Estadio Carlos Iturralde Rivero,Mérida, Mexico" + }, + ... + ] + ``` + """ + try: + url = f"https://www.espn.in/football/scoreboard/_/date/{date}" + html_text = requests.get(url, headers=self.headers).text + soup = BeautifulSoup(html_text, "lxml") + + scores = [] + container = soup.find("div", {"class": "PageLayout__Main"}) + for items in container.find_all("section", {"class": "Card gameModules"}): + title = items.find( + "h3", {"class": "Card__Header__Title Card__Header__Title--no-theme"} + ) + link = ( + "https://www.espn.in" + + items.find( + "a", {"class": "AnchorLink Card__Header__Link"}, href=True + )["href"] + ) + for item in items.find_all( + "section", + {"class": "Scoreboard bg-clr-white flex flex-auto justify-between"}, + ): + teams = [] + for team in item.find( + "ul", {"class": "ScoreboardScoreCell__Competitors"} + ).find_all("li"): + name = team.find( + "div", + { + "class": "ScoreCell__TeamName ScoreCell__TeamName--shortDisplayName truncate db" + }, + ) + score = team.find( + "div", + { + "class": "ScoreCell__Score h4 clr-gray-01 fw-heavy tar ScoreCell_Score--scoreboard pl2" + }, + ) + if score: + score = score.text + teams.append({"Name": name.text, "Score": score}) + place = item.find( + "div", + { + "class": "LocationDetail__Item LocationDetail__Item--headline" + }, + ) + if place: + country = place.next_sibling + location = place.text + "," + country.text + data = { + "Game": title.text, + "Game Link": link, + "Teams": teams, + "Location": location, + } + else: + data = { + "Game": title.text, + "Game Link": link, + "Teams": teams, + } + scores.append(data) + return scores + except: + return None + + def get_tournaments(self): + """ + Fetches and returns information about ongoing football tournaments. + ```py + espn = ESPN() + tournaments = espn.get_tournaments() + ``` + Example output: + ```python + [ + { + "Premier League": [ + ["https://www.espn.in/football/competition/_/id/eng.1", "English Premier League"] + ] + }, + ... + ] + ``` + """ + try: + url = "https://www.espn.in/football/competitions" + html_text = requests.get(url, headers=self.headers).text + soup = BeautifulSoup(html_text, "lxml") + + container = soup.find("div", {"class": "Wrapper bg-clr-white br-5 mb3 pa5"}) + data = [] + for items in container.find_all("h3"): + heading = items.text + div = items.next_sibling + li = [] + for item in div.find_all("div", {"class": "ContentList__Item"}): + link = item.find("a", href=True)["href"] + title = item.find("h2") + li.append([link, title.text]) + data.append({heading: li}) + return data + except: + return None + + def get_teams(self): + """ + Fetches and returns information about football teams. + ```py + espn = ESPN() + teams = espn.get_teams() + ``` + + Example output: + ```python + [ + { + "Name": "Manchester United", + "Link": "https://www.espn.in/football/team/_/id/360/manchester-united" + }, + ... + ] + ``` + """ + try: + url = "https://www.espn.in/football/teams" + html_text = requests.get(url, headers=self.headers).text + soup = BeautifulSoup(html_text, "lxml") + + container = soup.find("div", {"class": "Wrapper TeamsWrapper br-5 mb3 pa5"}) + teams = [] + for items in container.find_all("div", {"class": "ContentList__Item"}): + title = items.find("h2") + link = items.find("a", href=True)["href"] + data = {"Name": title.text, "Link": "https://www.espn.in" + link} + teams.append(data) + return teams + except: + return None diff --git a/src/scrape_up/healthgrades/__init__.py b/src/scrape_up/healthgrades/__init__.py index 1c324fc4..d4fc7c1a 100644 --- a/src/scrape_up/healthgrades/__init__.py +++ b/src/scrape_up/healthgrades/__init__.py @@ -1,3 +1,3 @@ -from .healthgrades import HealthGrades +from .healthgradesmodule import HealthGrades __all__ = ["HealthGrades"] diff --git a/src/scrape_up/healthgrades/HealthGrades.py b/src/scrape_up/healthgrades/healthgradesmodule.py similarity index 97% rename from src/scrape_up/healthgrades/HealthGrades.py rename to src/scrape_up/healthgrades/healthgradesmodule.py index f6be283f..7b16fed5 100644 --- a/src/scrape_up/healthgrades/HealthGrades.py +++ b/src/scrape_up/healthgrades/healthgradesmodule.py @@ -1,78 +1,78 @@ -from bs4 import BeautifulSoup -import requests - - -class HealthGrades: - """ - Create an instance of `HealthGrades` class - - ```python - hc = HealthGrades() - ``` - - | Method | Details | - | --------------------------- | -------------------------------------------------------------------- | - | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. | - - """ - - def __init__(self): - self.headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" - } - - def get_best_hospitals(self, state): - """ - Fetches and returns information about the best hospitals in a state.\n - ```python - hc = HealthGrades() - hc.get_best_hospitals(state="bihar") - ``` - - Example output: - ```python - [ - { - "Name": "ABC Hospital", - "Location": "123 Main St, Philadelphia, PA", - "Link": "https://www.healthgrades.com/hospital/abc-hospital", - "Awards": ["America's 100 Best Hospitals", "Patient Safety Excellence Award"] - }, - ... - ] - ``` - """ - try: - state = state.replace(" ", "-") - url = ( - f"https://www.healthgrades.com/quality/americas-best-hospitals/{state}" - ) - html_text = requests.get(url, headers=self.headers).text - soup = BeautifulSoup(html_text, "lxml") - - hospitals = [] - container = soup.find("ul", {"class": "quality-results-group"}) - - for items in container.find_all("div", {"class": "quality-card"}): - award = [] - title = items.find("h3") - location = items.find("div", {"class": "location-info"}) - link = ( - "https://www.healthgrades.com" - + items.find("div", {"class": "hospital-info__hospital-link"}).find( - "a", href=True - )["href"] - ) - awards = items.find("ul", {"class": "awards-list__quality-award"}) - for item in awards.find_all("li"): - award.append(item.text) - data = { - "Name": title.text, - "Location": location.text, - "Link": link, - "Awards": award[:-2], - } - hospitals.append(data) - return hospitals - except: - return None +from bs4 import BeautifulSoup +import requests + + +class HealthGrades: + """ + Create an instance of `HealthGrades` class + + ```python + hc = HealthGrades() + ``` + + | Method | Details | + | --------------------------- | -------------------------------------------------------------------- | + | `get_best_hospitals(state)` | Fetches and returns information about the best hospitals in a state. | + + """ + + def __init__(self): + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36" + } + + def get_best_hospitals(self, state): + """ + Fetches and returns information about the best hospitals in a state.\n + ```python + hc = HealthGrades() + hc.get_best_hospitals(state="bihar") + ``` + + Example output: + ```python + [ + { + "Name": "ABC Hospital", + "Location": "123 Main St, Philadelphia, PA", + "Link": "https://www.healthgrades.com/hospital/abc-hospital", + "Awards": ["America's 100 Best Hospitals", "Patient Safety Excellence Award"] + }, + ... + ] + ``` + """ + try: + state = state.replace(" ", "-") + url = ( + f"https://www.healthgrades.com/quality/americas-best-hospitals/{state}" + ) + html_text = requests.get(url, headers=self.headers).text + soup = BeautifulSoup(html_text, "lxml") + + hospitals = [] + container = soup.find("ul", {"class": "quality-results-group"}) + + for items in container.find_all("div", {"class": "quality-card"}): + award = [] + title = items.find("h3") + location = items.find("div", {"class": "location-info"}) + link = ( + "https://www.healthgrades.com" + + items.find("div", {"class": "hospital-info__hospital-link"}).find( + "a", href=True + )["href"] + ) + awards = items.find("ul", {"class": "awards-list__quality-award"}) + for item in awards.find_all("li"): + award.append(item.text) + data = { + "Name": title.text, + "Location": location.text, + "Link": link, + "Awards": award[:-2], + } + hospitals.append(data) + return hospitals + except: + return None