Skip to content

Commit

Permalink
Merge pull request #933 from PlanetDestroyyer/ambitionbox
Browse files Browse the repository at this point in the history
able tò scape Ambitionbox  website and get compaines with there ratings
  • Loading branch information
nikhil25803 authored May 14, 2024
2 parents e99fbef + 46dc125 commit 12ee113
Show file tree
Hide file tree
Showing 4 changed files with 94 additions and 0 deletions.
24 changes: 24 additions & 0 deletions dev-documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -1615,6 +1615,30 @@ First create an object of class `Dictionary`.
| `.get_word_of_the_day()` | Returns the word of the day. |
| `.word_of_the_day_definition()` | Returns the definition of the word of the day. |

--------


#### AmbitionBx

Create an directory with name ambitonbox
created a python which consist the code for scarping the website

```python
# Example usage
from scrape_up import ambitionBox

num_pages_to_scrape = 2

scraper = ambitionBox.Comapiens(num_pages_to_scrape)

scraper.scrape_companies()

```

| Methods | Details |
| --------------- | ----------------------------------------------------------------------------- |
| `.scrape_companies()` | Returns the company name with the rating. |

---

## Geeksforgeeks
Expand Down
1 change: 1 addition & 0 deletions documentation.md
Original file line number Diff line number Diff line change
Expand Up @@ -733,3 +733,4 @@ boxoffice = imdb.BoxOffice()
| Methods | Details |
| --------------- | ------------------------------------------------------------------------------ |
| `.top_movies()` | Returns the top box office movies, weekend and total gross, and weeks released.|

69 changes: 69 additions & 0 deletions src/scrape_up/ambitionBox/company.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import requests
from bs4 import BeautifulSoup


class Comapiens:
def __init__(self,num_pages: int=1):
self.num_pages = num_pages

def write_sorted_list(self, file, company_list):

company_list.sort(key=lambda x: x[1], reverse=True)
for company_name, rating in company_list:
file.write(f"{company_name.strip()} {rating}\n")

def scrape_companies(self):


headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
}

for page in range(1, self.num_pages + 1):
print(f"Scraping webpage number: {page} of {self.num_pages}")

url = f"https://www.ambitionbox.com/list-of-companies?page={page}"
response = requests.get(url, headers=headers)

if response.status_code == 200:
soup = BeautifulSoup(response.text, 'lxml')

companies = soup.find_all('div', class_="companyCardWrapper")

company_ratings = []

for company in companies:
company_name = company.find('h2', class_="companyCardWrapper__companyName").text.strip()
company_star = company.find('span', class_="companyCardWrapper__companyRatingValue")

if company_name and company_star:
try:
rating = float(company_star.text)
company_ratings.append((company_name, rating))
except ValueError:
print(f"Error parsing rating for company: {company_name}")

with open("src/scrape_up/ambitionBox/company_ratings.txt", "a") as f:
f.write(f"\nPAGE: {url}\n")
f.write("COMPANY UNDER 5 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 4 < r[1] <= 5])

f.write("\nCOMPANY UNDER 4 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 3 < r[1] <= 4])

# Corrected indentation for following lines
f.write("\nCOMPANY UNDER 3 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 2 < r[1] <= 3])

f.write("\nCOMPANY UNDER 2 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 1 < r[1] <= 2])

f.write("\nCOMPANY UNDER 1 STAR\n")
self.write_sorted_list(f, [r for r in company_ratings if 0 < r[1] <= 1])
else:
print(f"Error scraping page {page}: {response.status_code}")


if __name__ == "__main__":
c = Comapiens(10)
c.scrape_companies()
Empty file.

0 comments on commit 12ee113

Please sign in to comment.