Skip to content

Commit

Permalink
enh:connection contact info
Browse files Browse the repository at this point in the history
  • Loading branch information
cullenwatson committed Jan 1, 2025
1 parent 47c90eb commit 35eb363
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 28 deletions.
29 changes: 15 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ _why pay $100/mo for LSN when you could do it for free and get a nice csv to go
- Scrapes staff from a company on **LinkedIn**
- Obtains skills, experiences, certifications & more
- Or fetch individuals users / comments on posts
- Scrape your own LinkedIn connections with details
- Aggregates the employees in a Pandas DataFrame

[Video Guide for StaffSpy](https://youtu.be/DNFmjvpZBTs) - _updated for release v0.2.18_
Expand Down Expand Up @@ -62,17 +61,10 @@ companies = account.scrape_companies(
company_names=['openai', 'microsoft']
)

# fetch connections
connections = account.scrape_connections(
extra_profile_data=True,
max_results=50
)

staff.to_csv("staff.csv", index=False)
users.to_csv("users.csv", index=False)
comments.to_csv("comments.csv", index=False)
companies.to_csv("companies.csv", index=False)
connections.to_csv("connections.csv", index=False)
```

#### Browser login
Expand Down Expand Up @@ -181,10 +173,11 @@ Optional

```plaintext
├── max_results (int):
| maximum number of connections to fetch (default is all)
| maximum number of connections to fetch (default is very high)
| e.g. 50 to fetch first 50 connections
|
├── extra_profile_data (bool):
| gets all profile info
| fetches educations, experiences, skills, certifications for each connection (Default false)
```

### LinkedIn notes
Expand Down Expand Up @@ -282,8 +275,16 @@ Staff
│ ├── cert_id
│ └── cert_link
└── Educational Background
├── years
├── school
└── degree
├── Educational Background
| ├── years
| ├── school
| └── degree
└── Connection Info (only when a connection and enabled on their profile)
├── email_address
├── address
├── birthday
├── websites
├── phone_numbers
└── created_at
```
32 changes: 18 additions & 14 deletions staffspy/linkedin/linkedin.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import requests

import staffspy.utils.utils as utils
from linkedin.contact_info import ContactInfoFetcher
from staffspy.utils.exceptions import TooManyRequests, BadCookies, GeoUrnNotFound
from staffspy.linkedin.certifications import CertificationFetcher
from staffspy.linkedin.employee import EmployeeFetcher
Expand Down Expand Up @@ -57,6 +58,7 @@ def __init__(self, session: requests.Session):
self.experiences = ExperiencesFetcher(self.session)
self.bio = EmployeeBioFetcher(self.session)
self.languages = LanguagesFetcher(self.session)
self.contact = ContactInfoFetcher(self.session)

def search_companies(self, company_name: str):
"""Get the company id and staff count from the company name."""
Expand Down Expand Up @@ -429,26 +431,28 @@ def fetch_all_info_for_employee(self, employee: Staff, index: int):
f"Fetching data for account {employee.id} {index:>4} / {self.num_staff} - {employee.profile_link}"
)

with ThreadPoolExecutor(max_workers=7) as executor:
task_functions = [
(self.employees.fetch_employee, (employee, self.domain), "employee"),
(self.skills.fetch_skills, (employee,), "skills"),
(self.experiences.fetch_experiences, (employee,), "experiences"),
(self.certs.fetch_certifications, (employee,), "certifications"),
(self.schools.fetch_schools, (employee,), "schools"),
(self.bio.fetch_employee_bio, (employee,), "bio"),
(self.languages.fetch_languages, (employee,), "languages"),
]

with ThreadPoolExecutor(max_workers=len(task_functions)) as executor:
tasks = {
executor.submit(
self.employees.fetch_employee, employee, self.domain
): "employee",
executor.submit(self.skills.fetch_skills, employee): "skills",
executor.submit(self.experiences.fetch_experiences, employee): (
"experiences"
),
executor.submit(self.certs.fetch_certifications, employee): (
"certifications"
),
executor.submit(self.schools.fetch_schools, employee): "schools",
executor.submit(self.bio.fetch_employee_bio, employee): "bio",
executor.submit(self.languages.fetch_languages, employee): "lanaguages",
executor.submit(func, *args): name
for func, args, name in task_functions
}

for future in as_completed(tasks):
result = future.result()

if employee.is_connection:
self.contact.fetch_contact_info(employee)

def fetch_user_profile_data_from_public_id(self, user_id: str, key: str):
"""Fetches data given the public LinkedIn user id"""
endpoint = self.public_user_id_ep.format(user_id=user_id)
Expand Down
28 changes: 28 additions & 0 deletions staffspy/utils/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,25 @@ def to_dict(self):
}


class ContactInfo(BaseModel):
email_address: str | None = None
websites: list | None = None
phone_numbers: list | None = None
address: str | None = None
birthday: str | None = None
created_at: str | None = None

def to_dict(self):
return {
"email_address": self.email_address,
"websites": self.websites,
"phone_numbers": self.phone_numbers,
"address": self.address,
"birthday": self.birthday,
"created_at": self.created_at,
}


class Certification(BaseModel):
title: str | None = None
issuer: str | None = None
Expand Down Expand Up @@ -127,6 +146,7 @@ class Staff(BaseModel):
skills: list[Skill] | None = None
experiences: list[Experience] | None = None
certifications: list[Certification] | None = None
contact_info: ContactInfo | None = None
schools: list[School] | None = None
languages: list[str] | None = None

Expand Down Expand Up @@ -182,6 +202,8 @@ def to_dict(self):
if len(sorted_experiences) > 0 and sorted_experiences[0].end_date is None
else None
)

contact_info = self.contact_info.to_dict() if self.contact_info else {}
return {
"search_term": self.search_term,
"id": self.id,
Expand Down Expand Up @@ -236,6 +258,12 @@ def to_dict(self):
"potential_emails": self.potential_emails,
"profile_photo": self.profile_photo,
"banner_photo": self.banner_photo,
"connection_created_at": contact_info.get("created_at"),
"connection_email": contact_info.get("email_address"),
"connection_phone_numbers": contact_info.get("phone_numbers"),
"connection_websites": contact_info.get("websites"),
"connection_street_address": contact_info.get("address"),
"connection_birthday": contact_info.get("birthday"),
}

def estimate_age_based_on_education(self):
Expand Down

0 comments on commit 35eb363

Please sign in to comment.