From 0733b44c51141ca63daccd3a8c6c0d156a226978 Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Mon, 13 May 2024 12:48:54 +0530 Subject: [PATCH 1/6] Added Ambitionbox --- src/scrape_up/ambitionBox/company.py | 69 +++++++++++++++++++ src/scrape_up/ambitionBox/company_ratings.txt | 0 2 files changed, 69 insertions(+) create mode 100644 src/scrape_up/ambitionBox/company.py create mode 100644 src/scrape_up/ambitionBox/company_ratings.txt diff --git a/src/scrape_up/ambitionBox/company.py b/src/scrape_up/ambitionBox/company.py new file mode 100644 index 00000000..6ac69435 --- /dev/null +++ b/src/scrape_up/ambitionBox/company.py @@ -0,0 +1,69 @@ +import requests +from bs4 import BeautifulSoup + + +class Comapiens: # Class name with proper capitalization + def __init__(self,num_pages: int=1): + self.num_pages = num_pages + + def write_sorted_list(self, file, company_list): + + company_list.sort(key=lambda x: x[1], reverse=True) + for company_name, rating in company_list: + file.write(f"{company_name.strip()} {rating}\n") + + def scrape_companies(self): + + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36" + } + + for page in range(1, self.num_pages + 1): + print(f"Scraping webpage number: {page} of {self.num_pages}") + + url = f"https://www.ambitionbox.com/list-of-companies?page={page}" + response = requests.get(url, headers=headers) + + if response.status_code == 200: + soup = BeautifulSoup(response.text, 'lxml') + + companies = soup.find_all('div', class_="companyCardWrapper") + + company_ratings = [] + + for company in companies: + company_name = company.find('h2', class_="companyCardWrapper__companyName").text.strip() + company_star = company.find('span', class_="companyCardWrapper__companyRatingValue") + + if company_name and company_star: + try: + rating = float(company_star.text) + company_ratings.append((company_name, rating)) + except ValueError: + print(f"Error parsing rating for company: {company_name}") + + with open("src/scrape_up/ambitionBox/company_ratings.txt", "a") as f: + f.write(f"\nPAGE: {url}\n") + f.write("COMPANY UNDER 5 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 4 < r[1] <= 5]) + + f.write("\nCOMPANY UNDER 4 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 3 < r[1] <= 4]) + + # Corrected indentation for following lines + f.write("\nCOMPANY UNDER 3 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 2 < r[1] <= 3]) + + f.write("\nCOMPANY UNDER 2 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 1 < r[1] <= 2]) + + f.write("\nCOMPANY UNDER 1 STAR\n") + self.write_sorted_list(f, [r for r in company_ratings if 0 < r[1] <= 1]) + else: + print(f"Error scraping page {page}: {response.status_code}") + + +if __name__ == "__main__": + c = Comapiens(10) + c.scrape_companies() diff --git a/src/scrape_up/ambitionBox/company_ratings.txt b/src/scrape_up/ambitionBox/company_ratings.txt new file mode 100644 index 00000000..e69de29b From bda061df79967ee2e6f1f6b571dcc58707368153 Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Mon, 13 May 2024 12:51:18 +0530 Subject: [PATCH 2/6] ambitionboxupdated --- src/scrape_up/ambitionBox/company.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scrape_up/ambitionBox/company.py b/src/scrape_up/ambitionBox/company.py index 6ac69435..49f30251 100644 --- a/src/scrape_up/ambitionBox/company.py +++ b/src/scrape_up/ambitionBox/company.py @@ -2,7 +2,7 @@ from bs4 import BeautifulSoup -class Comapiens: # Class name with proper capitalization +class Comapiens: def __init__(self,num_pages: int=1): self.num_pages = num_pages From 1ac758fb1511e2ed441e1c93cf5786905145aee3 Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Mon, 13 May 2024 13:00:55 +0530 Subject: [PATCH 3/6] update ambitionBox --- documentation.md | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/documentation.md b/documentation.md index dd03b34a..f28e3de0 100644 --- a/documentation.md +++ b/documentation.md @@ -733,3 +733,25 @@ boxoffice = imdb.BoxOffice() | Methods | Details | | --------------- | ----------------------------------------------------------------------------- | | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released | + + +#### AmbitonBx + +Create an directory with name ambitonbox +created a python which consist the code for scarping the website + +```python +# Example usage +from scrape_up import ambitionBox + +num_pages_to_scrape = 2 + +scraper = ambitionBox.Comapiens(num_pages_to_scrape) + +scraper.scrape_companies() + +``` + +| Methods | Details | +| --------------- | ----------------------------------------------------------------------------- | +| `.scrape_companies()` | Returns the company name with the rating | From e134eae9eb012deb05e2f6f34dd473113a36dff5 Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Tue, 14 May 2024 08:48:41 +0530 Subject: [PATCH 4/6] updated ambitionbox dev-doc.md file --- dev-documentation.md | 22 ++++++++++++++++++++++ documentation.md | 20 -------------------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/dev-documentation.md b/dev-documentation.md index 624696a8..f2fe0e3f 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1616,3 +1616,25 @@ First create an object of class `Dictionary`. | `.get_word_of_the_day()` | Returns the word of the day. | | `.word_of_the_day_definition()` | Returns the definition of the word of the day. -------- + + +#### AmbitonBx + +Create an directory with name ambitonbox +created a python which consist the code for scarping the website + +```python +# Example usage +from scrape_up import ambitionBox + +num_pages_to_scrape = 2 + +scraper = ambitionBox.Comapiens(num_pages_to_scrape) + +scraper.scrape_companies() + +``` + +| Methods | Details | +| --------------- | ----------------------------------------------------------------------------- | +| `.scrape_companies()` | Returns the company name with the rating | diff --git a/documentation.md b/documentation.md index f28e3de0..ef501f78 100644 --- a/documentation.md +++ b/documentation.md @@ -735,23 +735,3 @@ boxoffice = imdb.BoxOffice() | `.top_movies()` | Returns the top box office movies, weekend and total gross and weeks released | -#### AmbitonBx - -Create an directory with name ambitonbox -created a python which consist the code for scarping the website - -```python -# Example usage -from scrape_up import ambitionBox - -num_pages_to_scrape = 2 - -scraper = ambitionBox.Comapiens(num_pages_to_scrape) - -scraper.scrape_companies() - -``` - -| Methods | Details | -| --------------- | ----------------------------------------------------------------------------- | -| `.scrape_companies()` | Returns the company name with the rating | From 4093a2f903f7617c36ef8321b0a9b2328d824bdc Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Tue, 14 May 2024 08:53:14 +0530 Subject: [PATCH 5/6] update amibitonbox dev-doc.md --- dev-documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-documentation.md b/dev-documentation.md index f2fe0e3f..94e9a59b 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1637,4 +1637,4 @@ scraper.scrape_companies() | Methods | Details | | --------------- | ----------------------------------------------------------------------------- | -| `.scrape_companies()` | Returns the company name with the rating | +| `.scrape_companies()` | Returns the company name with the rating. | From 0687f13ea634267a01efec37afb1fc9546a14480 Mon Sep 17 00:00:00 2001 From: wrieddude <59405252+PlanetDestroyyer@users.noreply.github.com> Date: Tue, 14 May 2024 08:55:01 +0530 Subject: [PATCH 6/6] update amibitonbox dev-doc.md new file --- dev-documentation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-documentation.md b/dev-documentation.md index 94e9a59b..58c30076 100644 --- a/dev-documentation.md +++ b/dev-documentation.md @@ -1618,7 +1618,7 @@ First create an object of class `Dictionary`. -------- -#### AmbitonBx +#### AmbitionBx Create an directory with name ambitonbox created a python which consist the code for scarping the website