Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions linkedin_scraper/person.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class Person(Scraper):

__TOP_CARD = "pv-top-card"
__TOP_CARD = "scaffold-layout__main"
__WAIT_FOR_ELEMENT_TIMEOUT = 5

def __init__(
Expand Down Expand Up @@ -115,7 +115,7 @@ def get_experiences(self):
self.scroll_to_bottom()
main_list = self.wait_for_element_to_load(name="pvs-list", base=main)
for position in main_list.find_elements(By.XPATH,"li"):
position = position.find_element(By.CLASS_NAME,"pvs-entity")
position = position.find_element(By.CLASS_NAME,"pvs-entity--padded")

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The get_experiences() function needs more rework, as some of the data is getting repeated for certain experiences, while some data is getting omitted for some experiences.

company_logo_elem, position_details = position.find_elements(By.XPATH,"*")

# company elem
Expand Down Expand Up @@ -240,9 +240,9 @@ def get_educations(self):
self.add_education(education)

def get_name_and_location(self):
top_panels = self.driver.find_elements(By.CLASS_NAME,"pv-text-details__left-panel")
self.name = top_panels[0].find_elements(By.XPATH,"*")[0].text
self.location = top_panels[1].find_element(By.TAG_NAME,"span").text
top_panel = self.driver.find_element(By.XPATH, "//*[@class='mt2 relative']")
self.name = top_panel.find_element(By.TAG_NAME, "h1").text
self.location = top_panel.find_element(By.XPATH, "//*[@class='text-body-small inline t-black--light break-words']")
Copy link

@Eaglesight02 Eaglesight02 Jan 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are only getting the element location, not the text.

Also, I believe it is better to follow along the __TOP_CARD instead of searching over the entire webpage.

You can do something like:

top_panel = self.driver.__TOP_CARD.find_element(By.CLASS_NAME, ".mt2.relative")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for this

note it's missing a '.text' at the end for self.location



def get_about(self):
Expand Down