From 0bc11b002314cfca441def6e89949f4905a21b60 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 28 Jun 2025 17:56:57 -0400 Subject: [PATCH 01/14] feat(project): updated instructions to use uv insted of pip, added ruff and pre-commit hooks for linting & formatting on commit. Added vscode settings for formatting on save and vscode tasks to run the pre-commit hooks manually. Formatted codebase --- .claude/settings.local.json | 9 + .env.example | 5 + .gitignore | 222 +++++++++++- .pre-commit-config.yaml | 17 + .python-version | 1 + .vscode/settings.json | 16 + .vscode/tasks.json | 39 +++ CLAUDE.md | 124 +++++++ README.md | 182 +++++++--- README.rst | 365 -------------------- linkedin_scraper/__init__.py | 25 +- linkedin_scraper/actions.py | 55 +-- linkedin_scraper/company.py | 371 +++++++++++++------- linkedin_scraper/constants.py | 2 +- linkedin_scraper/job_search.py | 75 ++-- linkedin_scraper/jobs.py | 43 ++- linkedin_scraper/objects.py | 42 +-- linkedin_scraper/person.py | 312 +++++++++++------ linkedin_scraper/selectors.py | 2 +- main.py | 18 + pyproject.toml | 75 ++++ requirements.txt | 3 - samples/scrape_person.py | 9 +- samples/scrape_person_contacts.py | 12 +- setup.py | 47 +-- uv.lock | 553 ++++++++++++++++++++++++++++++ 26 files changed, 1823 insertions(+), 801 deletions(-) create mode 100644 .claude/settings.local.json create mode 100644 .env.example create mode 100644 .pre-commit-config.yaml create mode 100644 .python-version create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json create mode 100644 CLAUDE.md delete mode 100644 README.rst create mode 100644 main.py create mode 100644 pyproject.toml delete mode 100644 requirements.txt create mode 100644 uv.lock diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..44b1176 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,9 @@ +{ + "permissions": { + "allow": [ + "WebFetch(domain:docs.astral.sh)", + "WebFetch(domain:pre-commit.com)" + ], + "deny": [] + } +} diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..b91e175 --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +# LinkedIn credentials for scraping +# Copy this file to .env and fill in your actual credentials + +LINKEDIN_EMAIL=your-email@example.com +LINKEDIN_PASSWORD=your-password-here diff --git a/.gitignore b/.gitignore index 1d04e8b..09be0ef 100644 --- a/.gitignore +++ b/.gitignore @@ -1,15 +1,207 @@ -*.swp -linkedin_user_scraper.egg-info -linkedin_scraper.egg-info -dist -build -__pycache__ -*.pyc -chromedriver -.DS_Store -.vscode/ -.env/ -scrape.py -creds.json -venv -*.zip +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3dc49f5 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,17 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + - id: check-merge-conflict + - id: debug-statements + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.1 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - id: ruff-format diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..e4fba21 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.12 diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..aeb3c32 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,16 @@ +{ + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports": "explicit" + }, + "editor.defaultFormatter": "charliermarsh.ruff", + "[python]": { + "editor.defaultFormatter": "charliermarsh.ruff", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.fixAll": "explicit", + "source.organizeImports.ruff": "explicit" + } + } +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..cd2ad15 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,39 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "uv run pre-commit run --all-files", + "detail": "Run pre-commit hooks on all files", + "type": "shell", + "command": "uv", + "args": ["run", "pre-commit", "run", "--all-files"], + "group": { + "kind": "test", + "isDefault": true + }, + "presentation": { + "reveal": "never", + "panel": "new", + "focus": true + }, + "problemMatcher": [] + }, + { + "label": "uv run main.py", + "detail": "Run main.py", + "type": "shell", + "command": "uv", + "args": ["run", "main.py"], + "group": { + "kind": "build", + "isDefault": true + }, + "presentation": { + "reveal": "always", + "panel": "new", + "focus": true + }, + "problemMatcher": [] + }, + ] +} diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..b3d90c0 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,124 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Setup and Installation + +This is a Python package that scrapes LinkedIn profiles, companies, and job listings using Selenium WebDriver. + +### Installation with uv (Recommended) +```bash +# Install dependencies +uv sync + +# Install development dependencies (pre-commit hooks, ruff, etc.) +uv sync --dev +``` + +### Code Quality Tools +This project uses Ruff for linting and formatting, with pre-commit hooks for automation: + +```bash +# Install pre-commit hooks (run once) +uv run pre-commit install + +# Run pre-commit on all files +uv run pre-commit run --all-files + +# Manual ruff commands +uv run ruff check # Check for linting issues +uv run ruff check --fix # Fix auto-fixable issues +uv run ruff format # Format code +uv run ruff format --check # Check if code is properly formatted +``` + +Pre-commit will automatically run on every commit, ensuring code quality. + +### Environment Setup +1. Copy environment template: +```bash +cp .env.example .env +``` + +2. Edit `.env` with your LinkedIn credentials: +``` +LINKEDIN_EMAIL=your-email@example.com +LINKEDIN_PASSWORD=your-password +``` + +3. ChromeDriver is auto-detected from PATH, or set manually: +```bash +export CHROMEDRIVER=~/chromedriver +``` + +## Architecture Overview + +This is a LinkedIn scraping library with the following core components: + +### Core Classes +- **Person** (`linkedin_scraper/person.py`): Scrapes individual LinkedIn profiles including experiences, education, interests, and accomplishments +- **Company** (`linkedin_scraper/company.py`): Scrapes company profiles including about, employees, headquarters, and company details +- **Job** (`linkedin_scraper/jobs.py`): Scrapes individual job postings +- **JobSearch** (`linkedin_scraper/job_search.py`): Performs job searches and returns collections of jobs + +### Supporting Modules +- **objects.py**: Contains data model classes (Experience, Education, Institution, Contact, etc.) and base Scraper class +- **selectors.py**: CSS/XPath selectors for different LinkedIn page elements +- **actions.py**: Utility functions including login automation +- **constants.py**: Configuration constants + +### Project Structure +``` +linkedin_scraper/ +├── __init__.py # Package exports +├── person.py # Person profile scraping +├── company.py # Company profile scraping +├── jobs.py # Job posting scraping +├── job_search.py # Job search functionality +├── objects.py # Data models and base classes +├── selectors.py # Web element selectors +├── actions.py # Helper functions +└── constants.py # Configuration constants +``` + +## Usage Patterns + +### WebDriver Management +- Uses Chrome WebDriver by default +- Can accept custom driver instances via `driver` parameter +- Supports `close_on_complete` parameter to control browser cleanup +- WebDriver location configured via `CHROMEDRIVER` environment variable + +### Authentication +- Login handled via `actions.login(driver, email, password)` +- Supports both prompted and programmatic credential entry +- Can scrape with or without authentication depending on profile visibility + +### Scraping Modes +- **Automatic**: Set `scrape=True` (default) to scrape immediately on instantiation +- **Manual**: Set `scrape=False` to create object first, then call `.scrape()` method later +- **Batch**: Keep `close_on_complete=False` to reuse driver across multiple scrapes + +## Development Notes + +- This is a setuptools-based Python package (setup.py) +- Version defined in `linkedin_scraper/__init__.py` +- Uses Selenium for browser automation, requests for HTTP, lxml for HTML parsing +- No formal test framework detected - basic test scripts in `test/` directory +- Package distributed via pip as `linkedin_scraper` + +## Sample Usage +```python +from linkedin_scraper import Person, Company, actions +from selenium import webdriver + +# Setup driver and login +driver = webdriver.Chrome() +actions.login(driver, email, password) + +# Scrape person profile +person = Person("https://www.linkedin.com/in/stickerdaniel", driver=driver) + +# Scrape company +company = Company("https://www.linkedin.com/company/google", driver=driver) +``` diff --git a/README.md b/README.md index 19201ee..56cafb9 100644 --- a/README.md +++ b/README.md @@ -3,57 +3,115 @@ Scrapes Linkedin User Data [Linkedin Scraper](#linkedin-scraper) -* [Installation](#installation) -* [Setup](#setup) -* [Usage](#usage) - + [Sample Usage](#sample-usage) - + [User Scraping](#user-scraping) - + [Company Scraping](#company-scraping) - + [Job Scraping](#job-scraping) - + [Job Search Scraping](#job-search-scraping) - + [Scraping sites where login is required first](#scraping-sites-where-login-is-required-first) - + [Scraping sites and login automatically](#scraping-sites-and-login-automatically) -* [API](#api) - + [Person](#person) - - [`linkedin_url`](#linkedin_url) - - [`name`](#name) - - [`about`](#about) - - [`experiences`](#experiences) - - [`educations`](#educations) - - [`interests`](#interests) - - [`accomplishment`](#accomplishment) - - [`company`](#company) - - [`job_title`](#job_title) - - [`driver`](#driver) - - [`scrape`](#scrape) - - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue) - + [Company](#company) - - [`linkedin_url`](#linkedin_url-1) - - [`name`](#name-1) - - [`about_us`](#about_us) - - [`website`](#website) - - [`headquarters`](#headquarters) - - [`founded`](#founded) - - [`company_type`](#company_type) - - [`company_size`](#company_size) - - [`specialties`](#specialties) - - [`showcase_pages`](#showcase_pages) - - [`affiliated_companies`](#affiliated_companies) - - [`driver`](#driver-1) - - [`get_employees`](#get_employees) - - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue-1) -* [Contribution](#contribution) +- [Linkedin Scraper](#linkedin-scraper) + - [Installation](#installation) + - [Development](#development) + - [Running Scripts](#running-scripts) + - [Contributing](#contributing) + - [Development Workflow](#development-workflow) + - [Setup](#setup) + - [Environment Variables](#environment-variables) + - [ChromeDriver](#chromedriver) + - [Sponsor](#sponsor) + - [Usage](#usage) + - [Sample Usage](#sample-usage) + - [User Scraping](#user-scraping) + - [Company Scraping](#company-scraping) + - [Job Scraping](#job-scraping) + - [Job Search Scraping](#job-search-scraping) + - [Scraping sites where login is required first](#scraping-sites-where-login-is-required-first) + - [Scraping sites and login automatically](#scraping-sites-and-login-automatically) + - [API](#api) + - [Person](#person) + - [`linkedin_url`](#linkedin_url) + - [`name`](#name) + - [`about`](#about) + - [`experiences`](#experiences) + - [`educations`](#educations) + - [`interests`](#interests) + - [`accomplishment`](#accomplishment) + - [`company`](#company) + - [`job_title`](#job_title) + - [`driver`](#driver) + - [`scrape`](#scrape) + - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue) + - [Company](#company-1) + - [`linkedin_url`](#linkedin_url-1) + - [`name`](#name-1) + - [`about_us`](#about_us) + - [`website`](#website) + - [`phone`](#phone) + - [`headquarters`](#headquarters) + - [`founded`](#founded) + - [`company_type`](#company_type) + - [`company_size`](#company_size) + - [`specialties`](#specialties) + - [`showcase_pages`](#showcase_pages) + - [`affiliated_companies`](#affiliated_companies) + - [`driver`](#driver-1) + - [`get_employees`](#get_employees) + - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue-1) + - [Contribution](#contribution) ## Installation ```bash -pip3 install --user linkedin_scraper +# Clone the repository +git clone https://github.com/joeyism/linkedin_scraper.git +cd linkedin_scraper + +# Install dependencies +uv sync +``` + +## Development + +### Running Scripts + +```bash +# Run a Python script with the project dependencies +uv run your_script.py + +# Or activate the virtual environment +uv shell +python your_script.py ``` -Version **2.0.0** and before is called `linkedin_user_scraper` and can be installed via `pip3 install --user linkedin_user_scraper` +## Contributing + +If you want to contribute to this project: + +1. **Install development dependencies**: + ```bash + uv sync --dev + ``` + +2. **Set up pre-commit hooks** (automatically runs linting/formatting on commit): + ```bash + uv run pre-commit install + ``` + +3. **Environment setup for testing**: + ```bash + cp .env.example .env + # Edit .env with your LinkedIn credentials for login + ``` + +### Development Workflow + +With pre-commit installed, your code will be automatically linted and formatted on every commit. ## Setup -First, you must set your chromedriver location by + +### Environment Variables +Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: +```bash +cp .env.example .env +# Edit .env with your credentials +``` + +### ChromeDriver +The project will automatically use chromedriver from your system PATH. If you need to specify a custom location: ```bash export CHROMEDRIVER=~/chromedriver @@ -67,16 +125,24 @@ To use it, just create the class. ### Sample Usage ```python +import os +from dotenv import load_dotenv from linkedin_scraper import Person, actions from selenium import webdriver + +# Load environment variables +load_dotenv() + driver = webdriver.Chrome() -email = "some-email@email.address" -password = "password123" -actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +# Use credentials from .env file +email = os.getenv("LINKEDIN_EMAIL") +password = os.getenv("LINKEDIN_PASSWORD") +actions.login(driver, email, password) person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver) ``` + **NOTE**: The account used to log-in should have it's language set English to make sure everything works as expected. ### User Scraping @@ -126,7 +192,7 @@ job_listings = job_search.search("Machine Learning Engineer") # returns the list ### Scraping sites where login is required first 1. Run `ipython` or `python` 2. In `ipython`/`python`, run the following code (you can modify it if you need to specify your driver) -3. +3. ```python from linkedin_scraper import Person from selenium import webdriver @@ -140,25 +206,31 @@ person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = person.scrape() ``` -The reason is that LinkedIn has recently blocked people from viewing certain profiles without having previously signed in. So by setting `scrape=False`, it doesn't automatically scrape the profile, but Chrome will open the linkedin page anyways. You can login and logout, and the cookie will stay in the browser and it won't affect your profile views. Then when you run `person.scrape()`, it'll scrape and close the browser. If you want to keep the browser on so you can scrape others, run it as +The reason is that LinkedIn has recently blocked people from viewing certain profiles without having previously signed in. So by setting `scrape=False`, it doesn't automatically scrape the profile, but Chrome will open the linkedin page anyways. You can login and logout, and the cookie will stay in the browser and it won't affect your profile views. Then when you run `person.scrape()`, it'll scrape and close the browser. If you want to keep the browser on so you can scrape others, run it as **NOTE**: For version >= `2.1.0`, scraping can also occur while logged in. Beware that users will be able to see that you viewed their profile. ```python person.scrape(close_on_complete=False) -``` +``` so it doesn't close. ### Scraping sites and login automatically From verison **2.4.0** on, `actions` is a part of the library that allows signing into Linkedin first. The email and password can be provided as a variable into the function. If not provided, both will be prompted in terminal. ```python +import os +from dotenv import load_dotenv from linkedin_scraper import Person, actions from selenium import webdriver + +# Load environment variables +load_dotenv() + driver = webdriver.Chrome() -email = "some-email@email.address" -password = "password123" -actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +email = os.getenv("LINKEDIN_EMAIL") +password = os.getenv("LINKEDIN_PASSWORD") +actions.login(driver, email, password) person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) ``` @@ -193,10 +265,10 @@ This is the interests they have. A list of `linkedin_scraper.scraper.Interest` This is the accomplishments they have. A list of `linkedin_scraper.scraper.Accomplishment` #### `company` -This the most recent company or institution they have worked at. +This the most recent company or institution they have worked at. #### `job_title` -This the most recent job title they have. +This the most recent job title they have. #### `driver` This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead. @@ -214,7 +286,7 @@ When this is **True**, the scraping happens automatically. To scrape afterwards, #### `scrape(close_on_complete=True)` This is the meat of the code, where execution of this function scrapes the profile. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other profiles are desired, then you might want to set that to false so you can keep using the same driver. - + ### Company diff --git a/README.rst b/README.rst deleted file mode 100644 index 51601d5..0000000 --- a/README.rst +++ /dev/null @@ -1,365 +0,0 @@ -.. role:: raw-html-m2r(raw) - :format: html - - -Linkedin Scraper -================ - -Scrapes Linkedin User Data - -`Linkedin Scraper <#linkedin-scraper>`_ - - -* `Installation <#installation>`_ -* `Setup <#setup>`_ -* `Usage <#usage>`_ - - * `Sample Usage <#sample-usage>`_ - * `User Scraping <#user-scraping>`_ - * `Company Scraping <#company-scraping>`_ - * `Job Scraping <#job-scraping>`_ - * `Job Search Scraping <#job-search-scraping>`_ - * `Scraping sites where login is required first <#scraping-sites-where-login-is-required-first>`_ - * `Scraping sites and login automatically <#scraping-sites-and-login-automatically>`_ - -* `API <#api>`_ - - * `Person <#person>`_ - - * `\ ``linkedin_url`` <#linkedin_url>`_ - * `\ ``name`` <#name>`_ - * `\ ``about`` <#about>`_ - * `\ ``experiences`` <#experiences>`_ - * `\ ``educations`` <#educations>`_ - * `\ ``interests`` <#interests>`_ - * `\ ``accomplishment`` <#accomplishment>`_ - * `\ ``company`` <#company>`_ - * `\ ``job_title`` <#job_title>`_ - * `\ ``driver`` <#driver>`_ - * `\ ``scrape`` <#scrape>`_ - * `\ ``scrape(close_on_complete=True)`` <#scrapeclose_on_completetrue>`_ - - * `Company <#company>`_ - - * `\ ``linkedin_url`` <#linkedin_url-1>`_ - * `\ ``name`` <#name-1>`_ - * `\ ``about_us`` <#about_us>`_ - * `\ ``website`` <#website>`_ - * `\ ``headquarters`` <#headquarters>`_ - * `\ ``founded`` <#founded>`_ - * `\ ``company_type`` <#company_type>`_ - * `\ ``company_size`` <#company_size>`_ - * `\ ``specialties`` <#specialties>`_ - * `\ ``showcase_pages`` <#showcase_pages>`_ - * `\ ``affiliated_companies`` <#affiliated_companies>`_ - * `\ ``driver`` <#driver-1>`_ - * `\ ``get_employees`` <#get_employees>`_ - * `\ ``scrape(close_on_complete=True)`` <#scrapeclose_on_completetrue-1>`_ - -* `Contribution <#contribution>`_ - -Installation ------------- - -.. code-block:: bash - - pip3 install --user linkedin_scraper - -Version **2.0.0** and before is called ``linkedin_user_scraper`` and can be installed via ``pip3 install --user linkedin_user_scraper`` - -Setup ------ - -First, you must set your chromedriver location by - -.. code-block:: bash - - export CHROMEDRIVER=~/chromedriver - - -Usage ------ - -To use it, just create the class. - -Sample Usage -^^^^^^^^^^^^ - -.. code-block:: python - - from linkedin_scraper import Person, actions - from selenium import webdriver - driver = webdriver.Chrome() - - email = "some-email@email.address" - password = "password123" - actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal - person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver) - -**NOTE**\ : The account used to log-in should have it's language set English to make sure everything works as expected. - -User Scraping -^^^^^^^^^^^^^ - -.. code-block:: python - - from linkedin_scraper import Person - person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5") - -Company Scraping -^^^^^^^^^^^^^^^^ - -.. code-block:: python - - from linkedin_scraper import Company - company = Company("https://ca.linkedin.com/company/google") - -Job Scraping -^^^^^^^^^^^^ - -.. code-block:: python - - from linkedin_scraper import JobSearch, actions - from selenium import webdriver - - driver = webdriver.Chrome() - email = "some-email@email.address" - password = "password123" - actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal - input("Press Enter") - job = Job("https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3456898261", driver=driver, close_on_complete=False) - -Job Search Scraping -^^^^^^^^^^^^^^^^^^^ - -.. code-block:: python - - from linkedin_scraper import JobSearch, actions - from selenium import webdriver - - driver = webdriver.Chrome() - email = "some-email@email.address" - password = "password123" - actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal - input("Press Enter") - job_search = JobSearch(driver=driver, close_on_complete=False, scrape=False) - # job_search contains jobs from your logged in front page: - # - job_search.recommended_jobs - # - job_search.still_hiring - # - job_search.more_jobs - - job_listings = job_search.search("Machine Learning Engineer") # returns the list of `Job` from the first page - -Scraping sites where login is required first -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - - -#. Run ``ipython`` or ``python`` -#. In ``ipython``\ /\ ``python``\ , run the following code (you can modify it if you need to specify your driver) -#. - .. code-block:: python - - from linkedin_scraper import Person - from selenium import webdriver - driver = webdriver.Chrome() - person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = driver, scrape=False) - -#. Login to Linkedin -#. [OPTIONAL] Logout of Linkedin -#. In the same ``ipython``\ /\ ``python`` code, run - .. code-block:: python - - person.scrape() - -The reason is that LinkedIn has recently blocked people from viewing certain profiles without having previously signed in. So by setting ``scrape=False``\ , it doesn't automatically scrape the profile, but Chrome will open the linkedin page anyways. You can login and logout, and the cookie will stay in the browser and it won't affect your profile views. Then when you run ``person.scrape()``\ , it'll scrape and close the browser. If you want to keep the browser on so you can scrape others, run it as - -**NOTE**\ : For version >= ``2.1.0``\ , scraping can also occur while logged in. Beware that users will be able to see that you viewed their profile. - -.. code-block:: python - - person.scrape(close_on_complete=False) - -so it doesn't close. - -Scraping sites and login automatically -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -From verison **2.4.0** on, ``actions`` is a part of the library that allows signing into Linkedin first. The email and password can be provided as a variable into the function. If not provided, both will be prompted in terminal. - -.. code-block:: python - - from linkedin_scraper import Person, actions - from selenium import webdriver - driver = webdriver.Chrome() - email = "some-email@email.address" - password = "password123" - actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal - person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) - -API ---- - -Person -^^^^^^ - -A Person object can be created with the following inputs: - -.. code-block:: python - - Person(linkedin_url=None, name=None, about=[], experiences=[], educations=[], interests=[], accomplishments=[], company=None, job_title=None, driver=None, scrape=True) - -``linkedin_url`` -~~~~~~~~~~~~~~~~~~~~ - -This is the linkedin url of their profile - -``name`` -~~~~~~~~~~~~ - -This is the name of the person - -``about`` -~~~~~~~~~~~~~ - -This is the small paragraph about the person - -``experiences`` -~~~~~~~~~~~~~~~~~~~ - -This is the past experiences they have. A list of ``linkedin_scraper.scraper.Experience`` - -``educations`` -~~~~~~~~~~~~~~~~~~ - -This is the past educations they have. A list of ``linkedin_scraper.scraper.Education`` - -``interests`` -~~~~~~~~~~~~~~~~~ - -This is the interests they have. A list of ``linkedin_scraper.scraper.Interest`` - -``accomplishment`` -~~~~~~~~~~~~~~~~~~~~~~ - -This is the accomplishments they have. A list of ``linkedin_scraper.scraper.Accomplishment`` - -``company`` -~~~~~~~~~~~~~~~ - -This the most recent company or institution they have worked at. - -``job_title`` -~~~~~~~~~~~~~~~~~ - -This the most recent job title they have. - -``driver`` -~~~~~~~~~~~~~~ - -This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead. - -For example - -.. code-block:: python - - driver = webdriver.Chrome() - person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = driver) - -``scrape`` -~~~~~~~~~~~~~~ - -When this is **True**\ , the scraping happens automatically. To scrape afterwards, that can be run by the ``scrape()`` function from the ``Person`` object. - -``scrape(close_on_complete=True)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the meat of the code, where execution of this function scrapes the profile. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other profiles are desired, then you might want to set that to false so you can keep using the same driver. - -Company -^^^^^^^ - -.. code-block:: python - - Company(linkedin_url=None, name=None, about_us=None, website=None, headquarters=None, founded=None, company_type=None, company_size=None, specialties=None, showcase_pages=[], affiliated_companies=[], driver=None, scrape=True, get_employees=True) - -``linkedin_url`` -~~~~~~~~~~~~~~~~~~~~ - -This is the linkedin url of their profile - -``name`` -~~~~~~~~~~~~ - -This is the name of the company - -``about_us`` -~~~~~~~~~~~~~~~~ - -The description of the company - -``website`` -~~~~~~~~~~~~~~~ - -The website of the company - -``headquarters`` -~~~~~~~~~~~~~~~~~~~~ - -The headquarters location of the company - -``founded`` -~~~~~~~~~~~~~~~ - -When the company was founded - -``company_type`` -~~~~~~~~~~~~~~~~~~~~ - -The type of the company - -``company_size`` -~~~~~~~~~~~~~~~~~~~~ - -How many people are employeed at the company - -``specialties`` -~~~~~~~~~~~~~~~~~~~ - -What the company specializes in - -``showcase_pages`` -~~~~~~~~~~~~~~~~~~~~~~ - -Pages that the company owns to showcase their products - -``affiliated_companies`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Other companies that are affiliated with this one - -``driver`` -~~~~~~~~~~~~~~ - -This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead. - -``get_employees`` -~~~~~~~~~~~~~~~~~~~~~ - -Whether to get all the employees of company - -For example - -.. code-block:: python - - driver = webdriver.Chrome() - company = Company("https://ca.linkedin.com/company/google", driver=driver) - -``scrape(close_on_complete=True)`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -This is the meat of the code, where execution of this function scrapes the company. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other companies are desired, then you might want to set that to false so you can keep using the same driver. - -Contribution ------------- - -:raw-html-m2r:`Buy Me A Coffee` diff --git a/linkedin_scraper/__init__.py b/linkedin_scraper/__init__.py index 181d4d4..ff00620 100644 --- a/linkedin_scraper/__init__.py +++ b/linkedin_scraper/__init__.py @@ -1,12 +1,25 @@ -from os.path import dirname, basename, isfile -from .person import Person -from .objects import Institution, Experience, Education, Contact +from os.path import basename, dirname, isfile + from .company import Company -from .jobs import Job from .job_search import JobSearch +from .jobs import Job +from .objects import Contact, Education, Experience, Institution +from .person import Person __version__ = "2.11.5" import glob -modules = glob.glob(dirname(__file__)+"/*.py") -__all__ = [ basename(f)[:-3] for f in modules if isfile(f) and not f.endswith('__init__.py')] + +modules = glob.glob(dirname(__file__) + "/*.py") +__all__ = [ + basename(f)[:-3] for f in modules if isfile(f) and not f.endswith("__init__.py") +] + [ + "Company", + "JobSearch", + "Job", + "Contact", + "Education", + "Experience", + "Institution", + "Person", +] diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py index 136ffd1..d533a35 100644 --- a/linkedin_scraper/actions.py +++ b/linkedin_scraper/actions.py @@ -1,45 +1,50 @@ import getpass -from . import constants as c -from selenium.webdriver.support.wait import WebDriverWait + from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from . import constants as c + def __prompt_email_password(): - u = input("Email: ") - p = getpass.getpass(prompt="Password: ") - return (u, p) + u = input("Email: ") + p = getpass.getpass(prompt="Password: ") + return (u, p) + def page_has_loaded(driver): - page_state = driver.execute_script('return document.readyState;') - return page_state == 'complete' + page_state = driver.execute_script("return document.readyState;") + return page_state == "complete" + -def login(driver, email=None, password=None, cookie = None, timeout=10): +def login(driver, email=None, password=None, cookie=None, timeout=10): if cookie is not None: return _login_with_cookie(driver, cookie) - + if not email or not password: email, password = __prompt_email_password() - + driver.get("https://www.linkedin.com/login") - element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username"))) - - email_elem = driver.find_element(By.ID,"username") + WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username"))) + + email_elem = driver.find_element(By.ID, "username") email_elem.send_keys(email) - - password_elem = driver.find_element(By.ID,"password") + + password_elem = driver.find_element(By.ID, "password") password_elem.send_keys(password) password_elem.submit() - - if driver.current_url == 'https://www.linkedin.com/checkpoint/lg/login-submit': - remember = driver.find_element(By.ID,c.REMEMBER_PROMPT) + + if driver.current_url == "https://www.linkedin.com/checkpoint/lg/login-submit": + remember = driver.find_element(By.ID, c.REMEMBER_PROMPT) if remember: remember.submit() - - element = WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID))) - + + WebDriverWait(driver, timeout).until( + EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)) + ) + + def _login_with_cookie(driver, cookie): driver.get("https://www.linkedin.com/login") - driver.add_cookie({ - "name": "li_at", - "value": cookie - }) + driver.add_cookie({"name": "li_at", "value": cookie}) diff --git a/linkedin_scraper/company.py b/linkedin_scraper/company.py index f731272..fd9e07d 100644 --- a/linkedin_scraper/company.py +++ b/linkedin_scraper/company.py @@ -1,41 +1,43 @@ -import requests -from lxml import html +import json +import os +import time + from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from selenium.common.exceptions import NoSuchElementException +from selenium.webdriver.support.ui import WebDriverWait + from .objects import Scraper -from .person import Person -import time -import os -import json -AD_BANNER_CLASSNAME = ('ad-banner-container', '__ad') +AD_BANNER_CLASSNAME = ("ad-banner-container", "__ad") + def getchildren(elem): return elem.find_elements(By.XPATH, ".//*") -class CompanySummary(object): + +class CompanySummary: linkedin_url = None name = None followers = None - def __init__(self, linkedin_url = None, name = None, followers = None): + def __init__(self, linkedin_url=None, name=None, followers=None): self.linkedin_url = linkedin_url self.name = name self.followers = followers def __repr__(self): - if self.followers == None: - return """ {name} """.format(name = self.name) + if self.followers is None: + return f""" {self.name} """ else: - return """ {name} {followers} """.format(name = self.name, followers = self.followers) + return f""" {self.name} {self.followers} """ + class Company(Scraper): linkedin_url = None name = None - about_us =None + about_us = None website = None phone = None headquarters = None @@ -49,7 +51,26 @@ class Company(Scraper): employees = [] headcount = None - def __init__(self, linkedin_url = None, name = None, about_us =None, website = None, phone = None, headquarters = None, founded = None, industry = None, company_type = None, company_size = None, specialties = None, showcase_pages =[], affiliated_companies = [], driver = None, scrape = True, get_employees = True, close_on_complete = True): + def __init__( + self, + linkedin_url=None, + name=None, + about_us=None, + website=None, + phone=None, + headquarters=None, + founded=None, + industry=None, + company_type=None, + company_size=None, + specialties=None, + showcase_pages=None, + affiliated_companies=None, + driver=None, + scrape=True, + get_employees=True, + close_on_complete=True, + ): self.linkedin_url = linkedin_url self.name = name self.about_us = about_us @@ -61,13 +82,15 @@ def __init__(self, linkedin_url = None, name = None, about_us =None, website = N self.company_type = company_type self.company_size = company_size self.specialties = specialties - self.showcase_pages = showcase_pages - self.affiliated_companies = affiliated_companies + self.showcase_pages = showcase_pages or [] + self.affiliated_companies = affiliated_companies or [] if driver is None: try: - if os.getenv("CHROMEDRIVER") == None: - driver_path = os.path.join(os.path.dirname(__file__), 'drivers/chromedriver') + if os.getenv("CHROMEDRIVER") is None: + driver_path = os.path.join( + os.path.dirname(__file__), "drivers/chromedriver" + ) else: driver_path = os.getenv("CHROMEDRIVER") @@ -79,28 +102,39 @@ def __init__(self, linkedin_url = None, name = None, about_us =None, website = N self.driver = driver if scrape: - self.scrape(get_employees=get_employees, close_on_complete=close_on_complete) + self.scrape( + get_employees=get_employees, close_on_complete=close_on_complete + ) def __get_text_under_subtitle(self, elem): return "\n".join(elem.text.split("\n")[1:]) def __get_text_under_subtitle_by_class(self, driver, class_name): - return self.__get_text_under_subtitle(driver.find_element(By.CLASS_NAME, class_name)) + return self.__get_text_under_subtitle( + driver.find_element(By.CLASS_NAME, class_name) + ) def scrape(self, get_employees=True, close_on_complete=True): if self.is_signed_in(): - self.scrape_logged_in(get_employees = get_employees, close_on_complete = close_on_complete) + self.scrape_logged_in( + get_employees=get_employees, close_on_complete=close_on_complete + ) else: - self.scrape_not_logged_in(get_employees = get_employees, close_on_complete = close_on_complete) + self.scrape_not_logged_in( + get_employees=get_employees, close_on_complete=close_on_complete + ) def __parse_employee__(self, employee_raw): - try: # print() employee_object = {} - employee_object['name'] = (employee_raw.text.split("\n") or [""])[0].strip() - employee_object['designation'] = (employee_raw.text.split("\n") or [""])[3].strip() - employee_object['linkedin_url'] = employee_raw.find_element(By.TAG_NAME, "a").get_attribute("href") + employee_object["name"] = (employee_raw.text.split("\n") or [""])[0].strip() + employee_object["designation"] = (employee_raw.text.split("\n") or [""])[ + 3 + ].strip() + employee_object["linkedin_url"] = employee_raw.find_element( + By.TAG_NAME, "a" + ).get_attribute("href") # print(employee_raw.text, employee_object) # _person = Person( # # linkedin_url = employee_raw.find_element_by_tag_name("a").get_attribute("href"), @@ -114,7 +148,7 @@ def __parse_employee__(self, employee_raw): # print(_person, employee_object) # return _person return employee_object - except Exception as e: + except Exception: # print(e) return None @@ -125,16 +159,24 @@ def get_employees(self, wait_time=10): driver = self.driver try: - see_all_employees = driver.find_element(By.XPATH,'//a[@data-control-name="topcard_see_all_employees"]') + driver.find_element( + By.XPATH, '//a[@data-control-name="topcard_see_all_employees"]' + ) except: pass driver.get(os.path.join(self.linkedin_url, "people")) - _ = WebDriverWait(driver, 3).until(EC.presence_of_all_elements_located((By.XPATH, '//span[@dir="ltr"]'))) + _ = WebDriverWait(driver, 3).until( + EC.presence_of_all_elements_located((By.XPATH, '//span[@dir="ltr"]')) + ) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));" + ) time.sleep(1) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight*3/4));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight*3/4));" + ) time.sleep(1) results_list = driver.find_element(By.CLASS_NAME, list_css) @@ -143,15 +185,19 @@ def get_employees(self, wait_time=10): total.append(self.__parse_employee__(res)) def is_loaded(previous_results): - loop = 0 - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight));") - results_li = results_list.find_elements(By.TAG_NAME, "li") - while len(results_li) == previous_results and loop <= 5: - time.sleep(1) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight));") + loop = 0 + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight));" + ) results_li = results_list.find_elements(By.TAG_NAME, "li") - loop += 1 - return loop <= 5 + while len(results_li) == previous_results and loop <= 5: + time.sleep(1) + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight));" + ) + results_li = results_list.find_elements(By.TAG_NAME, "li") + loop += 1 + return loop <= 5 def get_data(previous_results): results_li = results_list.find_elements(By.TAG_NAME, "li") @@ -161,56 +207,74 @@ def get_data(previous_results): results_li_len = len(results_li) while is_loaded(results_li_len): try: - driver.find_element(By.XPATH,next_xpath).click() + driver.find_element(By.XPATH, next_xpath).click() except: pass - _ = WebDriverWait(driver, wait_time).until(EC.presence_of_element_located((By.CLASS_NAME, list_css))) + _ = WebDriverWait(driver, wait_time).until( + EC.presence_of_element_located((By.CLASS_NAME, list_css)) + ) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));" + ) time.sleep(1) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight*2/3));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight*2/3));" + ) time.sleep(1) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight*3/4));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight*3/4));" + ) time.sleep(1) - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight));") + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight));" + ) time.sleep(1) get_data(results_li_len) results_li_len = len(total) return total - - - def scrape_logged_in(self, get_employees = True, close_on_complete = True): + def scrape_logged_in(self, get_employees=True, close_on_complete=True): driver = self.driver driver.get(self.linkedin_url) - _ = WebDriverWait(driver, 3).until(EC.presence_of_all_elements_located((By.XPATH, '//div[@dir="ltr"]'))) + _ = WebDriverWait(driver, 3).until( + EC.presence_of_all_elements_located((By.XPATH, '//div[@dir="ltr"]')) + ) navigation = driver.find_element(By.CLASS_NAME, "org-page-navigation__items ") - self.name = driver.find_element(By.CLASS_NAME,"org-top-card-summary__title").text.strip() + self.name = driver.find_element( + By.CLASS_NAME, "org-top-card-summary__title" + ).text.strip() # Click About Tab or View All Link try: - self.__find_first_available_element__( - navigation.find_elements(By.XPATH, "//a[@data-control-name='page_member_main_nav_about_tab']"), - navigation.find_elements(By.XPATH, "//a[@data-control-name='org_about_module_see_all_view_link']"), - ).click() + self.__find_first_available_element__( + navigation.find_elements( + By.XPATH, "//a[@data-control-name='page_member_main_nav_about_tab']" + ), + navigation.find_elements( + By.XPATH, + "//a[@data-control-name='org_about_module_see_all_view_link']", + ), + ).click() except: - driver.get(os.path.join(self.linkedin_url, "about")) + driver.get(os.path.join(self.linkedin_url, "about")) - _ = WebDriverWait(driver, 3).until(EC.presence_of_all_elements_located((By.TAG_NAME, 'section'))) + _ = WebDriverWait(driver, 3).until( + EC.presence_of_all_elements_located((By.TAG_NAME, "section")) + ) time.sleep(3) - if 'Cookie Policy' in driver.find_elements(By.TAG_NAME, "section")[1].text or any(classname in driver.find_elements(By.TAG_NAME, "section")[1].get_attribute('class') for classname in AD_BANNER_CLASSNAME): - section_id = 4 - else: - section_id = 3 - #section ID is no longer needed, we are using class name now. - #grid = driver.find_elements_by_tag_name("section")[section_id] - grid = driver.find_element(By.CLASS_NAME, "artdeco-card.org-page-details-module__card-spacing.artdeco-card.org-about-module__margin-bottom") + # section ID is no longer needed, we are using class name now. + # grid = driver.find_elements_by_tag_name("section")[section_id] + grid = driver.find_element( + By.CLASS_NAME, + "artdeco-card.org-page-details-module__card-spacing.artdeco-card.org-about-module__margin-bottom", + ) print(grid) descWrapper = grid.find_elements(By.TAG_NAME, "p") if len(descWrapper) > 0: @@ -218,30 +282,30 @@ def scrape_logged_in(self, get_employees = True, close_on_complete = True): labels = grid.find_elements(By.TAG_NAME, "dt") values = grid.find_elements(By.TAG_NAME, "dd") num_attributes = min(len(labels), len(values)) - #print("The length of the labels is " + str(len(labels)), "The length of the values is " + str(len(values))) + # print("The length of the labels is " + str(len(labels)), "The length of the values is " + str(len(values))) # if num_attributes == 0: # exit() x_off = 0 for i in range(num_attributes): txt = labels[i].text.strip() - if txt == 'Website': - self.website = values[i+x_off].text.strip() - if txt == 'Phone': - self.phone = values[i+x_off].text.strip() - elif txt == 'Industry': - self.industry = values[i+x_off].text.strip() - elif txt == 'Company size': - self.company_size = values[i+x_off].text.strip() + if txt == "Website": + self.website = values[i + x_off].text.strip() + if txt == "Phone": + self.phone = values[i + x_off].text.strip() + elif txt == "Industry": + self.industry = values[i + x_off].text.strip() + elif txt == "Company size": + self.company_size = values[i + x_off].text.strip() if len(values) > len(labels): x_off = 1 - elif txt == 'Headquarters': - self.headquarters = values[i+x_off].text.strip() - elif txt == 'Type': - self.company_type = values[i+x_off].text.strip() - elif txt == 'Founded': - self.founded = values[i+x_off].text.strip() - elif txt == 'Specialties': - self.specialties = "\n".join(values[i+x_off].text.strip().split(", ")) + elif txt == "Headquarters": + self.headquarters = values[i + x_off].text.strip() + elif txt == "Type": + self.company_type = values[i + x_off].text.strip() + elif txt == "Founded": + self.founded = values[i + x_off].text.strip() + elif txt == "Specialties": + self.specialties = "\n".join(values[i + x_off].text.strip().split(", ")) try: grid = driver.find_element(By.CLASS_NAME, "mt1") @@ -249,35 +313,60 @@ def scrape_logged_in(self, get_employees = True, close_on_complete = True): for span in spans: txt = span.text.strip() if "See all" in txt and "employees on LinkedIn" in txt: - self.headcount = int(txt.replace("See all", "").replace("employees on LinkedIn", "").strip()) - except NoSuchElementException: # Does not exist in page, skip it + self.headcount = int( + txt.replace("See all", "") + .replace("employees on LinkedIn", "") + .strip() + ) + except NoSuchElementException: # Does not exist in page, skip it pass - driver.execute_script("window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));") - + driver.execute_script( + "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));" + ) try: - _ = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.CLASS_NAME, 'company-list'))) + _ = WebDriverWait(driver, 3).until( + EC.presence_of_element_located((By.CLASS_NAME, "company-list")) + ) showcase, affiliated = driver.find_elements(By.CLASS_NAME, "company-list") - driver.find_element(By.ID,"org-related-companies-module__show-more-btn").click() + driver.find_element( + By.ID, "org-related-companies-module__show-more-btn" + ).click() # get showcase - for showcase_company in showcase.find_elements(By.CLASS_NAME, "org-company-card"): + for showcase_company in showcase.find_elements( + By.CLASS_NAME, "org-company-card" + ): companySummary = CompanySummary( - linkedin_url = showcase_company.find_element(By.CLASS_NAME, "company-name-link").get_attribute("href"), - name = showcase_company.find_element(By.CLASS_NAME, "company-name-link").text.strip(), - followers = showcase_company.find_element(By.CLASS_NAME, "company-followers-count").text.strip() - ) + linkedin_url=showcase_company.find_element( + By.CLASS_NAME, "company-name-link" + ).get_attribute("href"), + name=showcase_company.find_element( + By.CLASS_NAME, "company-name-link" + ).text.strip(), + followers=showcase_company.find_element( + By.CLASS_NAME, "company-followers-count" + ).text.strip(), + ) self.showcase_pages.append(companySummary) # affiliated company - for affiliated_company in showcase.find_element(By.CLASS_NAME, "org-company-card"): + for affiliated_company in showcase.find_element( + By.CLASS_NAME, "org-company-card" + ): companySummary = CompanySummary( - linkedin_url = affiliated_company.find_element(By.CLASS_NAME, "company-name-link").get_attribute("href"), - name = affiliated_company.find_element(By.CLASS_NAME, "company-name-link").text.strip(), - followers = affiliated_company.find_element(By.CLASS_NAME, "company-followers-count").text.strip() - ) + linkedin_url=affiliated_company.find_element( + By.CLASS_NAME, "company-name-link" + ).get_attribute("href"), + name=affiliated_company.find_element( + By.CLASS_NAME, "company-name-link" + ).text.strip(), + followers=affiliated_company.find_element( + By.CLASS_NAME, "company-followers-count" + ).text.strip(), + ) self.affiliated_companies.append(companySummary) except: @@ -291,37 +380,51 @@ def scrape_logged_in(self, get_employees = True, close_on_complete = True): if close_on_complete: driver.close() - def scrape_not_logged_in(self, close_on_complete = True, retry_limit = 10, get_employees = True): + def scrape_not_logged_in( + self, close_on_complete=True, retry_limit=10, get_employees=True + ): driver = self.driver retry_times = 0 while self.is_signed_in() and retry_times <= retry_limit: - page = driver.get(self.linkedin_url) + driver.get(self.linkedin_url) retry_times = retry_times + 1 self.name = driver.find_element(By.CLASS_NAME, "name").text.strip() - self.about_us = driver.find_element(By.CLASS_NAME, "basic-info-description").text.strip() - self.specialties = self.__get_text_under_subtitle_by_class(driver, "specialties") + self.about_us = driver.find_element( + By.CLASS_NAME, "basic-info-description" + ).text.strip() + self.specialties = self.__get_text_under_subtitle_by_class( + driver, "specialties" + ) self.website = self.__get_text_under_subtitle_by_class(driver, "website") self.phone = self.__get_text_under_subtitle_by_class(driver, "phone") self.headquarters = driver.find_element(By.CLASS_NAME, "adr").text.strip() self.industry = driver.find_element(By.CLASS_NAME, "industry").text.strip() - self.company_size = driver.find_element(By.CLASS_NAME, "company-size").text.strip() + self.company_size = driver.find_element( + By.CLASS_NAME, "company-size" + ).text.strip() self.company_type = self.__get_text_under_subtitle_by_class(driver, "type") self.founded = self.__get_text_under_subtitle_by_class(driver, "founded") # get showcase try: - driver.find_element(By.ID,"view-other-showcase-pages-dialog").click() - WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, 'dialog'))) - - showcase_pages = driver.find_elements(By.CLASS_NAME, "company-showcase-pages")[1] + driver.find_element(By.ID, "view-other-showcase-pages-dialog").click() + WebDriverWait(driver, 3).until( + EC.presence_of_element_located((By.ID, "dialog")) + ) + + showcase_pages = driver.find_elements( + By.CLASS_NAME, "company-showcase-pages" + )[1] for showcase_company in showcase_pages.find_elements(By.TAG_NAME, "li"): name_elem = showcase_company.find_element(By.CLASS_NAME, "name") companySummary = CompanySummary( - linkedin_url = name_elem.find_element(By.TAG_NAME, "a").get_attribute("href"), - name = name_elem.text.strip(), - followers = showcase_company.text.strip().split("\n")[1] + linkedin_url=name_elem.find_element(By.TAG_NAME, "a").get_attribute( + "href" + ), + name=name_elem.text.strip(), + followers=showcase_company.text.strip().split("\n")[1], ) self.showcase_pages.append(companySummary) driver.find_element(By.CLASS_NAME, "dialog-close").click() @@ -330,14 +433,22 @@ def scrape_not_logged_in(self, close_on_complete = True, retry_limit = 10, get_e # affiliated company try: - affiliated_pages = driver.find_element(By.CLASS_NAME, "affiliated-companies") - for i, affiliated_page in enumerate(affiliated_pages.find_elements(By.CLASS_NAME, "affiliated-company-name")): + affiliated_pages = driver.find_element( + By.CLASS_NAME, "affiliated-companies" + ) + for i, affiliated_page in enumerate( + affiliated_pages.find_elements(By.CLASS_NAME, "affiliated-company-name") + ): if i % 3 == 0: - affiliated_pages.find_element(By.CLASS_NAME, "carousel-control-next").click() + affiliated_pages.find_element( + By.CLASS_NAME, "carousel-control-next" + ).click() companySummary = CompanySummary( - linkedin_url = affiliated_page.find_element(By.TAG_NAME, "a").get_attribute("href"), - name = affiliated_page.text.strip() + linkedin_url=affiliated_page.find_element( + By.TAG_NAME, "a" + ).get_attribute("href"), + name=affiliated_page.text.strip(), ) self.affiliated_companies.append(companySummary) except: @@ -353,18 +464,18 @@ def scrape_not_logged_in(self, close_on_complete = True, retry_limit = 10, get_e def __repr__(self): _output = {} - _output['name'] = self.name - _output['about_us'] = self.about_us - _output['specialties'] = self.specialties - _output['website'] = self.website - _output['phone'] = self.phone - _output['industry'] = self.industry - _output['company_type'] = self.name - _output['headquarters'] = self.headquarters - _output['company_size'] = self.company_size - _output['founded'] = self.founded - _output['affiliated_companies'] = self.affiliated_companies - _output['employees'] = self.employees - _output['headcount'] = self.headcount - - return json.dumps(_output).replace('\n', '') + _output["name"] = self.name + _output["about_us"] = self.about_us + _output["specialties"] = self.specialties + _output["website"] = self.website + _output["phone"] = self.phone + _output["industry"] = self.industry + _output["company_type"] = self.name + _output["headquarters"] = self.headquarters + _output["company_size"] = self.company_size + _output["founded"] = self.founded + _output["affiliated_companies"] = self.affiliated_companies + _output["employees"] = self.employees + _output["headcount"] = self.headcount + + return json.dumps(_output).replace("\n", "") diff --git a/linkedin_scraper/constants.py b/linkedin_scraper/constants.py index fc31b04..56cbc5a 100644 --- a/linkedin_scraper/constants.py +++ b/linkedin_scraper/constants.py @@ -1,2 +1,2 @@ VERIFY_LOGIN_ID = "global-nav__primary-link" -REMEMBER_PROMPT = 'remember-me-prompt__form-primary' +REMEMBER_PROMPT = "remember-me-prompt__form-primary" diff --git a/linkedin_scraper/job_search.py b/linkedin_scraper/job_search.py index c77a5ed..ea7f1b8 100644 --- a/linkedin_scraper/job_search.py +++ b/linkedin_scraper/job_search.py @@ -1,22 +1,22 @@ import os -from typing import List -from time import sleep import urllib.parse +from time import sleep -from .objects import Scraper -from . import constants as c from .jobs import Job - -from selenium.webdriver.common.by import By -from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.common.keys import Keys +from .objects import Scraper class JobSearch(Scraper): AREAS = ["recommended_jobs", None, "still_hiring", "more_jobs"] - def __init__(self, driver, base_url="https://www.linkedin.com/jobs/", close_on_complete=False, scrape=True, scrape_recommended_jobs=True): + def __init__( + self, + driver, + base_url="https://www.linkedin.com/jobs/", + close_on_complete=False, + scrape=True, + scrape_recommended_jobs=True, + ): super().__init__() self.driver = driver self.base_url = base_url @@ -24,46 +24,67 @@ def __init__(self, driver, base_url="https://www.linkedin.com/jobs/", close_on_c if scrape: self.scrape(close_on_complete, scrape_recommended_jobs) - def scrape(self, close_on_complete=True, scrape_recommended_jobs=True): if self.is_signed_in(): - self.scrape_logged_in(close_on_complete=close_on_complete, scrape_recommended_jobs=scrape_recommended_jobs) + self.scrape_logged_in( + close_on_complete=close_on_complete, + scrape_recommended_jobs=scrape_recommended_jobs, + ) else: - raise NotImplemented("This part is not implemented yet") - + raise NotImplementedError("This part is not implemented yet") def scrape_job_card(self, base_element) -> Job: - job_div = self.wait_for_element_to_load(name="job-card-list__title", base=base_element) + job_div = self.wait_for_element_to_load( + name="job-card-list__title", base=base_element + ) job_title = job_div.text.strip() linkedin_url = job_div.get_attribute("href") - company = base_element.find_element_by_class_name("artdeco-entity-lockup__subtitle").text - location = base_element.find_element_by_class_name("job-card-container__metadata-wrapper").text - job = Job(linkedin_url=linkedin_url, job_title=job_title, company=company, location=location, scrape=False, driver=self.driver) + company = base_element.find_element_by_class_name( + "artdeco-entity-lockup__subtitle" + ).text + location = base_element.find_element_by_class_name( + "job-card-container__metadata-wrapper" + ).text + job = Job( + linkedin_url=linkedin_url, + job_title=job_title, + company=company, + location=location, + scrape=False, + driver=self.driver, + ) return job - def scrape_logged_in(self, close_on_complete=True, scrape_recommended_jobs=True): driver = self.driver driver.get(self.base_url) if scrape_recommended_jobs: self.focus() sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) - job_area = self.wait_for_element_to_load(name="scaffold-finite-scroll__content") - areas = self.wait_for_all_elements_to_load(name="artdeco-card", base=job_area) + job_area = self.wait_for_element_to_load( + name="scaffold-finite-scroll__content" + ) + areas = self.wait_for_all_elements_to_load( + name="artdeco-card", base=job_area + ) for i, area in enumerate(areas): area_name = self.AREAS[i] if not area_name: continue area_results = [] - for job_posting in area.find_elements_by_class_name("jobs-job-board-list__item"): + for job_posting in area.find_elements_by_class_name( + "jobs-job-board-list__item" + ): job = self.scrape_job_card(job_posting) area_results.append(job) setattr(self, area_name, area_results) return - - def search(self, search_term: str) -> List[Job]: - url = os.path.join(self.base_url, "search") + f"?keywords={urllib.parse.quote(search_term)}&refresh=true" + def search(self, search_term: str) -> list[Job]: + url = ( + os.path.join(self.base_url, "search") + + f"?keywords={urllib.parse.quote(search_term)}&refresh=true" + ) self.driver.get(url) self.scroll_to_bottom() self.focus() @@ -85,7 +106,9 @@ def search(self, search_term: str) -> List[Job]: sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) job_results = [] - for job_card in self.wait_for_all_elements_to_load(name="job-card-list", base=job_listing): + for job_card in self.wait_for_all_elements_to_load( + name="job-card-list", base=job_listing + ): job = self.scrape_job_card(job_card) job_results.append(job) return job_results diff --git a/linkedin_scraper/jobs.py b/linkedin_scraper/jobs.py index 110f2bb..be8fea5 100644 --- a/linkedin_scraper/jobs.py +++ b/linkedin_scraper/jobs.py @@ -1,14 +1,10 @@ from selenium.common.exceptions import TimeoutException +from selenium.webdriver.common.by import By from .objects import Scraper -from . import constants as c -from selenium.webdriver.common.by import By -from selenium.webdriver.support.wait import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC class Job(Scraper): - def __init__( self, linkedin_url=None, @@ -46,7 +42,7 @@ def scrape(self, close_on_complete=True): if self.is_signed_in(): self.scrape_logged_in(close_on_complete=close_on_complete) else: - raise NotImplemented("This part is not implemented yet") + raise NotImplementedError("This part is not implemented yet") def to_dict(self): return { @@ -58,25 +54,38 @@ def to_dict(self): "posted_date": self.posted_date, "applicant_count": self.applicant_count, "job_description": self.job_description, - "benefits": self.benefits + "benefits": self.benefits, } - def scrape_logged_in(self, close_on_complete=True): driver = self.driver - + driver.get(self.linkedin_url) self.focus() - self.job_title = self.wait_for_element_to_load(name="job-details-jobs-unified-top-card__job-title").text.strip() - self.company = self.wait_for_element_to_load(name="job-details-jobs-unified-top-card__company-name").text.strip() - self.company_linkedin_url = self.wait_for_element_to_load(name="job-details-jobs-unified-top-card__company-name").find_element(By.TAG_NAME,"a").get_attribute("href") - primary_descriptions = self.wait_for_element_to_load(name="job-details-jobs-unified-top-card__primary-description-container").find_elements(By.TAG_NAME, "span") + self.job_title = self.wait_for_element_to_load( + name="job-details-jobs-unified-top-card__job-title" + ).text.strip() + self.company = self.wait_for_element_to_load( + name="job-details-jobs-unified-top-card__company-name" + ).text.strip() + self.company_linkedin_url = ( + self.wait_for_element_to_load( + name="job-details-jobs-unified-top-card__company-name" + ) + .find_element(By.TAG_NAME, "a") + .get_attribute("href") + ) + primary_descriptions = self.wait_for_element_to_load( + name="job-details-jobs-unified-top-card__primary-description-container" + ).find_elements(By.TAG_NAME, "span") texts = [span.text for span in primary_descriptions if span.text.strip() != ""] self.location = texts[0] self.posted_date = texts[3] - + try: - self.applicant_count = self.wait_for_element_to_load(name="jobs-unified-top-card__applicant-count").text.strip() + self.applicant_count = self.wait_for_element_to_load( + name="jobs-unified-top-card__applicant-count" + ).text.strip() except TimeoutException: self.applicant_count = 0 job_description_elem = self.wait_for_element_to_load(name="jobs-description") @@ -85,7 +94,9 @@ def scrape_logged_in(self, close_on_complete=True): job_description_elem.find_element(By.TAG_NAME, "button").click() self.job_description = job_description_elem.text.strip() try: - self.benefits = self.wait_for_element_to_load(name="jobs-unified-description__salary-main-rail-card").text.strip() + self.benefits = self.wait_for_element_to_load( + name="jobs-unified-description__salary-main-rail-card" + ).text.strip() except TimeoutException: self.benefits = None diff --git a/linkedin_scraper/objects.py b/linkedin_scraper/objects.py index b807804..6721725 100644 --- a/linkedin_scraper/objects.py +++ b/linkedin_scraper/objects.py @@ -1,14 +1,13 @@ from dataclasses import dataclass from time import sleep -from selenium.webdriver import Chrome - -from . import constants as c - from selenium import webdriver +from selenium.webdriver import Chrome from selenium.webdriver.common.by import By -from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.wait import WebDriverWait + +from . import constants as c @dataclass @@ -80,26 +79,17 @@ def mouse_click(self, elem): def wait_for_element_to_load(self, by=By.CLASS_NAME, name="pv-top-card", base=None): base = base or self.driver return WebDriverWait(base, self.WAIT_FOR_ELEMENT_TIMEOUT).until( - EC.presence_of_element_located( - ( - by, - name - ) - ) + EC.presence_of_element_located((by, name)) ) - def wait_for_all_elements_to_load(self, by=By.CLASS_NAME, name="pv-top-card", base=None): + def wait_for_all_elements_to_load( + self, by=By.CLASS_NAME, name="pv-top-card", base=None + ): base = base or self.driver return WebDriverWait(base, self.WAIT_FOR_ELEMENT_TIMEOUT).until( - EC.presence_of_all_elements_located( - ( - by, - name - ) - ) + EC.presence_of_all_elements_located((by, name)) ) - def is_signed_in(self): try: WebDriverWait(self.driver, self.WAIT_FOR_ELEMENT_TIMEOUT).until( @@ -113,7 +103,7 @@ def is_signed_in(self): self.driver.find_element(By.CLASS_NAME, c.VERIFY_LOGIN_ID) return True - except Exception as e: + except Exception: pass return False @@ -123,11 +113,11 @@ def scroll_to_half(self): ) def scroll_to_bottom(self): - self.driver.execute_script( - "window.scrollTo(0, document.body.scrollHeight);" - ) + self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") - def scroll_class_name_element_to_page_percent(self, class_name:str, page_percent:float): + def scroll_class_name_element_to_page_percent( + self, class_name: str, page_percent: float + ): self.driver.execute_script( f'elem = document.getElementsByClassName("{class_name}")[0]; elem.scrollTo(0, elem.scrollHeight*{str(page_percent)});' ) @@ -142,7 +132,7 @@ def __find_element_by_class_name__(self, class_name): def __find_element_by_xpath__(self, tag_name): try: - self.driver.find_element(By.XPATH,tag_name) + self.driver.find_element(By.XPATH, tag_name) return True except: pass @@ -150,7 +140,7 @@ def __find_element_by_xpath__(self, tag_name): def __find_enabled_element_by_xpath__(self, tag_name): try: - elem = self.driver.find_element(By.XPATH,tag_name) + elem = self.driver.find_element(By.XPATH, tag_name) return elem.is_enabled() except: pass diff --git a/linkedin_scraper/person.py b/linkedin_scraper/person.py index 3ab7194..7d738cb 100644 --- a/linkedin_scraper/person.py +++ b/linkedin_scraper/person.py @@ -1,16 +1,15 @@ -import requests +import os + from selenium import webdriver +from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from selenium.common.exceptions import NoSuchElementException -from .objects import Experience, Education, Scraper, Interest, Accomplishment, Contact -import os -from linkedin_scraper import selectors +from selenium.webdriver.support.ui import WebDriverWait +from .objects import Accomplishment, Contact, Education, Experience, Interest, Scraper -class Person(Scraper): +class Person(Scraper): __TOP_CARD = "main" __WAIT_FOR_ELEMENT_TIMEOUT = 5 @@ -44,7 +43,7 @@ def __init__( if driver is None: try: - if os.getenv("CHROMEDRIVER") == None: + if os.getenv("CHROMEDRIVER") is None: driver_path = os.path.join( os.path.dirname(__file__), "drivers/chromedriver" ) @@ -97,12 +96,14 @@ def _click_see_more_by_class_name(self, class_name): ) div = self.driver.find_element(By.CLASS_NAME, class_name) div.find_element(By.TAG_NAME, "button").click() - except Exception as e: + except Exception: pass def is_open_to_work(self): try: - return "#OPEN_TO_WORK" in self.driver.find_element(By.CLASS_NAME,"pv-top-card-profile-picture").find_element(By.TAG_NAME,"img").get_attribute("title") + return "#OPEN_TO_WORK" in self.driver.find_element( + By.CLASS_NAME, "pv-top-card-profile-picture" + ).find_element(By.TAG_NAME, "img").get_attribute("title") except: return False @@ -114,55 +115,83 @@ def get_experiences(self): self.scroll_to_half() self.scroll_to_bottom() main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main) - for position in main_list.find_elements(By.CLASS_NAME, "pvs-list__paged-list-item"): - position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']") - + for position in main_list.find_elements( + By.CLASS_NAME, "pvs-list__paged-list-item" + ): + position = position.find_element( + By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']" + ) + # Fix: Handle case where more than 2 elements are returned elements = position.find_elements(By.XPATH, "*") if len(elements) < 2: continue # Skip if we don't have enough elements - + company_logo_elem = elements[0] position_details = elements[1] # company elem try: - company_linkedin_url = company_logo_elem.find_element(By.XPATH,"*").get_attribute("href") + company_linkedin_url = company_logo_elem.find_element( + By.XPATH, "*" + ).get_attribute("href") if not company_linkedin_url: continue except NoSuchElementException: continue # position details - position_details_list = position_details.find_elements(By.XPATH,"*") - position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None - position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None - + position_details_list = position_details.find_elements(By.XPATH, "*") + position_summary_details = ( + position_details_list[0] if len(position_details_list) > 0 else None + ) + position_summary_text = ( + position_details_list[1] if len(position_details_list) > 1 else None + ) + if not position_summary_details: continue - - outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*") + + outer_positions = position_summary_details.find_element( + By.XPATH, "*" + ).find_elements(By.XPATH, "*") if len(outer_positions) == 4: - position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text - company = outer_positions[1].find_element(By.TAG_NAME,"span").text - work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text - location = outer_positions[3].find_element(By.TAG_NAME,"span").text + position_title = ( + outer_positions[0].find_element(By.TAG_NAME, "span").text + ) + company = outer_positions[1].find_element(By.TAG_NAME, "span").text + work_times = outer_positions[2].find_element(By.TAG_NAME, "span").text + location = outer_positions[3].find_element(By.TAG_NAME, "span").text elif len(outer_positions) == 3: if "·" in outer_positions[2].text: - position_title = outer_positions[0].find_element(By.TAG_NAME,"span").text - company = outer_positions[1].find_element(By.TAG_NAME,"span").text - work_times = outer_positions[2].find_element(By.TAG_NAME,"span").text + position_title = ( + outer_positions[0].find_element(By.TAG_NAME, "span").text + ) + company = outer_positions[1].find_element(By.TAG_NAME, "span").text + work_times = ( + outer_positions[2].find_element(By.TAG_NAME, "span").text + ) location = "" else: position_title = "" - company = outer_positions[0].find_element(By.TAG_NAME,"span").text - work_times = outer_positions[1].find_element(By.TAG_NAME,"span").text - location = outer_positions[2].find_element(By.TAG_NAME,"span").text + company = outer_positions[0].find_element(By.TAG_NAME, "span").text + work_times = ( + outer_positions[1].find_element(By.TAG_NAME, "span").text + ) + location = outer_positions[2].find_element(By.TAG_NAME, "span").text else: position_title = "" - company = outer_positions[0].find_element(By.TAG_NAME,"span").text if outer_positions else "" - work_times = outer_positions[1].find_element(By.TAG_NAME,"span").text if len(outer_positions) > 1 else "" + company = ( + outer_positions[0].find_element(By.TAG_NAME, "span").text + if outer_positions + else "" + ) + work_times = ( + outer_positions[1].find_element(By.TAG_NAME, "span").text + if len(outer_positions) > 1 + else "" + ) location = "" # Safely extract times and duration @@ -175,31 +204,60 @@ def get_experiences(self): duration = None from_date = " ".join(times.split(" ")[:2]) if times else "" - to_date = " ".join(times.split(" ")[3:]) if times and len(times.split(" ")) > 3 else "" - - if position_summary_text and any(element.get_attribute("class") == "pvs-list__container" for element in position_summary_text.find_elements(By.XPATH, "*")): + to_date = ( + " ".join(times.split(" ")[3:]) + if times and len(times.split(" ")) > 3 + else "" + ) + + if position_summary_text and any( + element.get_attribute("class") == "pvs-list__container" + for element in position_summary_text.find_elements(By.XPATH, "*") + ): try: - inner_positions = (position_summary_text.find_element(By.CLASS_NAME,"pvs-list__container") - .find_element(By.XPATH,"*").find_element(By.XPATH,"*").find_element(By.XPATH,"*") - .find_elements(By.CLASS_NAME,"pvs-list__paged-list-item")) + inner_positions = ( + position_summary_text.find_element( + By.CLASS_NAME, "pvs-list__container" + ) + .find_element(By.XPATH, "*") + .find_element(By.XPATH, "*") + .find_element(By.XPATH, "*") + .find_elements(By.CLASS_NAME, "pvs-list__paged-list-item") + ) except NoSuchElementException: inner_positions = [] else: inner_positions = [] - + if len(inner_positions) > 1: descriptions = inner_positions for description in descriptions: try: - res = description.find_element(By.TAG_NAME,"a").find_elements(By.XPATH,"*") + res = description.find_element(By.TAG_NAME, "a").find_elements( + By.XPATH, "*" + ) position_title_elem = res[0] if len(res) > 0 else None work_times_elem = res[1] if len(res) > 1 else None location_elem = res[2] if len(res) > 2 else None - location = location_elem.find_element(By.XPATH,"*").text if location_elem else None - position_title = position_title_elem.find_element(By.XPATH,"*").find_element(By.TAG_NAME,"*").text if position_title_elem else "" - work_times = work_times_elem.find_element(By.XPATH,"*").text if work_times_elem else "" - + location = ( + location_elem.find_element(By.XPATH, "*").text + if location_elem + else None + ) + position_title = ( + position_title_elem.find_element(By.XPATH, "*") + .find_element(By.TAG_NAME, "*") + .text + if position_title_elem + else "" + ) + work_times = ( + work_times_elem.find_element(By.XPATH, "*").text + if work_times_elem + else "" + ) + # Safely extract times and duration if work_times: parts = work_times.split("·") @@ -208,9 +266,13 @@ def get_experiences(self): else: times = "" duration = None - + from_date = " ".join(times.split(" ")[:2]) if times else "" - to_date = " ".join(times.split(" ")[3:]) if times and len(times.split(" ")) > 3 else "" + to_date = ( + " ".join(times.split(" ")[3:]) + if times and len(times.split(" ")) > 3 + else "" + ) experience = Experience( position_title=position_title, @@ -220,14 +282,16 @@ def get_experiences(self): location=location, description=description, institution_name=company, - linkedin_url=company_linkedin_url + linkedin_url=company_linkedin_url, ) self.add_experience(experience) - except (NoSuchElementException, IndexError) as e: + except (NoSuchElementException, IndexError): # Skip this description if elements are missing continue else: - description = position_summary_text.text if position_summary_text else "" + description = ( + position_summary_text.text if position_summary_text else "" + ) experience = Experience( position_title=position_title, @@ -237,7 +301,7 @@ def get_experiences(self): location=location, description=description, institution_name=company, - linkedin_url=company_linkedin_url + linkedin_url=company_linkedin_url, ) self.add_experience(experience) @@ -249,57 +313,83 @@ def get_educations(self): self.scroll_to_half() self.scroll_to_bottom() main_list = self.wait_for_element_to_load(name="pvs-list__container", base=main) - for position in main_list.find_elements(By.CLASS_NAME,"pvs-list__paged-list-item"): + for position in main_list.find_elements( + By.CLASS_NAME, "pvs-list__paged-list-item" + ): try: - position = position.find_element(By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']") - + position = position.find_element( + By.CSS_SELECTOR, "div[data-view-name='profile-component-entity']" + ) + # Fix: Handle case where more than 2 elements are returned - elements = position.find_elements(By.XPATH,"*") + elements = position.find_elements(By.XPATH, "*") if len(elements) < 2: continue # Skip if we don't have enough elements - + institution_logo_elem = elements[0] position_details = elements[1] # institution elem try: - institution_linkedin_url = institution_logo_elem.find_element(By.XPATH,"*").get_attribute("href") + institution_linkedin_url = institution_logo_elem.find_element( + By.XPATH, "*" + ).get_attribute("href") except NoSuchElementException: institution_linkedin_url = None # position details - position_details_list = position_details.find_elements(By.XPATH,"*") - position_summary_details = position_details_list[0] if len(position_details_list) > 0 else None - position_summary_text = position_details_list[1] if len(position_details_list) > 1 else None - + position_details_list = position_details.find_elements(By.XPATH, "*") + position_summary_details = ( + position_details_list[0] if len(position_details_list) > 0 else None + ) + position_summary_text = ( + position_details_list[1] if len(position_details_list) > 1 else None + ) + if not position_summary_details: continue - - outer_positions = position_summary_details.find_element(By.XPATH,"*").find_elements(By.XPATH,"*") - institution_name = outer_positions[0].find_element(By.TAG_NAME,"span").text if outer_positions else "" - degree = outer_positions[1].find_element(By.TAG_NAME,"span").text if len(outer_positions) > 1 else None + outer_positions = position_summary_details.find_element( + By.XPATH, "*" + ).find_elements(By.XPATH, "*") + + institution_name = ( + outer_positions[0].find_element(By.TAG_NAME, "span").text + if outer_positions + else "" + ) + degree = ( + outer_positions[1].find_element(By.TAG_NAME, "span").text + if len(outer_positions) > 1 + else None + ) from_date = None to_date = None - + if len(outer_positions) > 2: try: - times = outer_positions[2].find_element(By.TAG_NAME,"span").text + times = ( + outer_positions[2].find_element(By.TAG_NAME, "span").text + ) if times and "-" in times: split_times = times.split(" ") - dash_index = split_times.index("-") if "-" in split_times else -1 - + dash_index = ( + split_times.index("-") if "-" in split_times else -1 + ) + if dash_index > 0: - from_date = split_times[dash_index-1] + from_date = split_times[dash_index - 1] if dash_index < len(split_times) - 1: to_date = split_times[-1] except (NoSuchElementException, ValueError): from_date = None to_date = None - description = position_summary_text.text if position_summary_text else "" + description = ( + position_summary_text.text if position_summary_text else "" + ) education = Education( from_date=from_date, @@ -307,30 +397,35 @@ def get_educations(self): description=description, degree=degree, institution_name=institution_name, - linkedin_url=institution_linkedin_url + linkedin_url=institution_linkedin_url, ) self.add_education(education) - except (NoSuchElementException, IndexError) as e: + except (NoSuchElementException, IndexError): # Skip this education entry if elements are missing continue def get_name_and_location(self): top_panel = self.driver.find_element(By.XPATH, "//*[@class='mt2 relative']") self.name = top_panel.find_element(By.TAG_NAME, "h1").text - self.location = top_panel.find_element(By.XPATH, "//*[@class='text-body-small inline t-black--light break-words']").text + self.location = top_panel.find_element( + By.XPATH, "//*[@class='text-body-small inline t-black--light break-words']" + ).text def get_about(self): try: - about = self.driver.find_element(By.ID,"about").find_element(By.XPATH,"..").find_element(By.CLASS_NAME,"display-flex").text - except NoSuchElementException : - about=None + about = ( + self.driver.find_element(By.ID, "about") + .find_element(By.XPATH, "..") + .find_element(By.CLASS_NAME, "display-flex") + .text + ) + except NoSuchElementException: + about = None self.about = about def scrape_logged_in(self, close_on_complete=True): driver = self.driver - duration = None - - root = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until( + WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until( EC.presence_of_element_located( ( By.TAG_NAME, @@ -365,7 +460,6 @@ def scrape_logged_in(self, close_on_complete=True): # get interest try: - _ = WebDriverWait(driver, self.__WAIT_FOR_ELEMENT_TIMEOUT).until( EC.presence_of_element_located( ( @@ -374,11 +468,13 @@ def scrape_logged_in(self, close_on_complete=True): ) ) ) - interestContainer = driver.find_element(By.XPATH, - "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']" + interestContainer = driver.find_element( + By.XPATH, + "//*[@class='pv-profile-section pv-interests-section artdeco-container-card artdeco-card ember-view']", ) - for interestElement in interestContainer.find_elements(By.XPATH, - "//*[@class='pv-interest-entity pv-profile-section__card-item ember-view']" + for interestElement in interestContainer.find_elements( + By.XPATH, + "//*[@class='pv-interest-entity pv-profile-section__card-item ember-view']", ): interest = Interest( interestElement.find_element(By.TAG_NAME, "h3").text.strip() @@ -397,16 +493,18 @@ def scrape_logged_in(self, close_on_complete=True): ) ) ) - acc = driver.find_element(By.XPATH, - "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']" + acc = driver.find_element( + By.XPATH, + "//*[@class='pv-profile-section pv-accomplishments-section artdeco-container-card artdeco-card ember-view']", ) - for block in acc.find_elements(By.XPATH, - "//div[@class='pv-accomplishments-block__content break-words']" + for block in acc.find_elements( + By.XPATH, + "//div[@class='pv-accomplishments-block__content break-words']", ): category = block.find_element(By.TAG_NAME, "h3") - for title in block.find_element(By.TAG_NAME, - "ul" - ).find_elements(By.TAG_NAME, "li"): + for title in block.find_element(By.TAG_NAME, "ul").find_elements( + By.TAG_NAME, "li" + ): accomplishment = Accomplishment(category.text, title.text) self.add_accomplishment(accomplishment) except: @@ -420,11 +518,23 @@ def scrape_logged_in(self, close_on_complete=True): ) connections = driver.find_element(By.CLASS_NAME, "mn-connections") if connections is not None: - for conn in connections.find_elements(By.CLASS_NAME, "mn-connection-card"): - anchor = conn.find_element(By.CLASS_NAME, "mn-connection-card__link") + for conn in connections.find_elements( + By.CLASS_NAME, "mn-connection-card" + ): + anchor = conn.find_element( + By.CLASS_NAME, "mn-connection-card__link" + ) url = anchor.get_attribute("href") - name = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__name").text.strip() - occupation = conn.find_element(By.CLASS_NAME, "mn-connection-card__details").find_element(By.CLASS_NAME, "mn-connection-card__occupation").text.strip() + name = ( + conn.find_element(By.CLASS_NAME, "mn-connection-card__details") + .find_element(By.CLASS_NAME, "mn-connection-card__name") + .text.strip() + ) + occupation = ( + conn.find_element(By.CLASS_NAME, "mn-connection-card__details") + .find_element(By.CLASS_NAME, "mn-connection-card__occupation") + .text.strip() + ) contact = Contact(name=name, occupation=occupation, url=url) self.add_contact(contact) @@ -457,12 +567,4 @@ def job_title(self): return None def __repr__(self): - return "".format( - name=self.name, - about=self.about, - exp=self.experiences, - edu=self.educations, - int=self.interests, - acc=self.accomplishments, - conn=self.contacts, - ) + return f"" diff --git a/linkedin_scraper/selectors.py b/linkedin_scraper/selectors.py index 2770a7a..963f102 100644 --- a/linkedin_scraper/selectors.py +++ b/linkedin_scraper/selectors.py @@ -1 +1 @@ -NAME = 'text-heading-xlarge' +NAME = "text-heading-xlarge" diff --git a/main.py b/main.py new file mode 100644 index 0000000..37985ab --- /dev/null +++ b/main.py @@ -0,0 +1,18 @@ +import os + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Person, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + person = Person("https://www.linkedin.com/in/stickerdaniel/", driver=driver) + print(person) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..39591a5 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,75 @@ +[project] +name = "linkedin-scraper" +version = "2.11.5" +description = "Scrapes user data from LinkedIn" +readme = "README.md" +requires-python = ">=3.10" +authors = [ + { name = "Joey Sham", email = "sham.joey@gmail.com" } +] +keywords = ["linkedin", "scraping", "scraper"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] +dependencies = [ + "lxml>=6.0.0", + "python-dotenv>=1.1.1", + "requests>=2.32.4", + "selenium>=4.33.0", +] + +[project.urls] +Homepage = "https://github.com/joeyism/linkedin_scraper" +Repository = "https://github.com/joeyism/linkedin_scraper" +Issues = "https://github.com/joeyism/linkedin_scraper/issues" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.uv] +dev-dependencies = [ + "pre-commit>=4.2.0", + "python-dotenv>=1.0.0", + "ruff>=0.12.1", +] + +[tool.ruff] +# Same as Black. +line-length = 88 +indent-width = 4 + +# Assume Python 3.10+ +target-version = "py310" + +[tool.ruff.lint] +# Enable pycodestyle (`E`) and Pyflakes (`F`) codes by default. +# Also enable pyupgrade (`UP`), flake8-bugbear (`B`), and isort (`I`) +select = ["E4", "E7", "E9", "F", "UP", "B", "I"] +ignore = ["E722"] # Ignore bare except clauses for now + +# Allow fix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9ce88ac..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -selenium -requests -lxml diff --git a/samples/scrape_person.py b/samples/scrape_person.py index 7d4e93f..2bae31a 100644 --- a/samples/scrape_person.py +++ b/samples/scrape_person.py @@ -1,9 +1,14 @@ import os -from linkedin_scraper import Person, actions + from selenium import webdriver + +from linkedin_scraper import Person, actions + driver = webdriver.Chrome("./chromedriver") email = os.getenv("LINKEDIN_USER") password = os.getenv("LINKEDIN_PASSWORD") -actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +actions.login( + driver, email, password +) # if email and password isnt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) diff --git a/samples/scrape_person_contacts.py b/samples/scrape_person_contacts.py index 6c72b6c..a6e0feb 100644 --- a/samples/scrape_person_contacts.py +++ b/samples/scrape_person_contacts.py @@ -1,19 +1,25 @@ import os -from linkedin_scraper import Person, actions + from selenium import webdriver from selenium.webdriver.chrome.options import Options +from linkedin_scraper import Person, actions + chrome_options = Options() chrome_options.add_argument("--headless") driver = webdriver.Chrome("./chromedriver", options=chrome_options) email = os.getenv("LINKEDIN_USER") password = os.getenv("LINKEDIN_PASSWORD") -actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +actions.login( + driver, email, password +) # if email and password isnt given, it'll prompt in terminal person = Person("https://www.linkedin.com/in/adrian0350", contacts=[], driver=driver) print("Person: " + person.name) print("Person contacts: ") for contact in person.contacts: - print("Contact: " + contact.name + " - " + contact.occupation + " -> " + contact.url) + print( + "Contact: " + contact.name + " - " + contact.occupation + " -> " + contact.url + ) diff --git a/setup.py b/setup.py index 5362a4f..95ce104 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,37 @@ -from setuptools import setup, find_packages +import re + # To use a consistent encoding from codecs import open from os import path -import re + +from setuptools import setup here = path.abspath(path.dirname(__file__)) version = re.search( - '^__version__\s*=\s*"(.*)"', - open('linkedin_scraper/__init__.py').read(), - re.M - ).group(1) + '^__version__\s*=\s*"(.*)"', open("linkedin_scraper/__init__.py").read(), re.M +).group(1) # Get the long description from the README file -with open(path.join(here, 'README.md'), encoding='utf-8') as f: +with open(path.join(here, "README.md"), encoding="utf-8") as f: long_description = f.read() -setup( - name = 'linkedin_scraper', - packages = ['linkedin_scraper'], # this must be the same as the name above - version = version, - description = 'Scrapes user data from Linkedin', - long_description = long_description, - long_description_content_type='text/markdown', - author = 'Joey Sham', - author_email = 'sham.joey@gmail.com', - url = 'https://github.com/joeyism/linkedin_scraper', # use the URL to the github repo - download_url = 'https://github.com/joeyism/linkedin_scraper/dist/' + version + '.tar.gz', - keywords = ['linkedin', 'scraping', 'scraper'], - classifiers = [], - install_requires=[package.split("\n")[0] for package in open("requirements.txt", "r").readlines()] +setup( + name="linkedin_scraper", + packages=["linkedin_scraper"], # this must be the same as the name above + version=version, + description="Scrapes user data from Linkedin", + long_description=long_description, + long_description_content_type="text/markdown", + author="Joey Sham", + author_email="sham.joey@gmail.com", + url="https://github.com/joeyism/linkedin_scraper", # use the URL to the github repo + download_url="https://github.com/joeyism/linkedin_scraper/dist/" + + version + + ".tar.gz", + keywords=["linkedin", "scraping", "scraper"], + classifiers=[], + install_requires=[ + package.split("\n")[0] for package in open("requirements.txt", "r").readlines() + ], ) - diff --git a/uv.lock b/uv.lock new file mode 100644 index 0000000..afb12c7 --- /dev/null +++ b/uv.lock @@ -0,0 +1,553 @@ +version = 1 +revision = 2 +requires-python = ">=3.10" + +[[package]] +name = "attrs" +version = "25.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/1367933a8532ee6ff8d63537de4f1177af4bff9f3e829baf7331f595bb24/attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b", size = 812032, upload-time = "2025-03-13T11:10:22.779Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/77/06/bb80f5f86020c4551da315d78b3ab75e8228f89f0162f2c3a819e407941a/attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3", size = 63815, upload-time = "2025-03-13T11:10:21.14Z" }, +] + +[[package]] +name = "certifi" +version = "2025.6.15" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/73/f7/f14b46d4bcd21092d7d3ccef689615220d8a08fb25e564b65d20738e672e/certifi-2025.6.15.tar.gz", hash = "sha256:d747aa5a8b9bbbb1bb8c22bb13e22bd1f18e9796defa16bab421f7f7a317323b", size = 158753, upload-time = "2025-06-15T02:45:51.329Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/ae/320161bd181fc06471eed047ecce67b693fd7515b16d495d8932db763426/certifi-2025.6.15-py3-none-any.whl", hash = "sha256:2e0c7ce7cb5d8f8634ca55d2ba7e6ec2689a2fd6537d8dec1296a477a4910057", size = 157650, upload-time = "2025-06-15T02:45:49.977Z" }, +] + +[[package]] +name = "cffi" +version = "1.17.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pycparser" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621, upload-time = "2024-09-04T20:45:21.852Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804, upload-time = "2024-09-04T20:43:48.186Z" }, + { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299, upload-time = "2024-09-04T20:43:49.812Z" }, + { url = "https://files.pythonhosted.org/packages/34/33/e1b8a1ba29025adbdcda5fb3a36f94c03d771c1b7b12f726ff7fef2ebe36/cffi-1.17.1-cp311-cp311-win32.whl", hash = "sha256:85a950a4ac9c359340d5963966e3e0a94a676bd6245a4b55bc43949eee26a655", size = 171727, upload-time = "2024-09-04T20:44:09.481Z" }, + { url = "https://files.pythonhosted.org/packages/3d/97/50228be003bb2802627d28ec0627837ac0bf35c90cf769812056f235b2d1/cffi-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:caaf0640ef5f5517f49bc275eca1406b0ffa6aa184892812030f04c2abf589a0", size = 181400, upload-time = "2024-09-04T20:44:10.873Z" }, + { url = "https://files.pythonhosted.org/packages/86/c5/28b2d6f799ec0bdecf44dced2ec5ed43e0eb63097b0f58c293583b406582/cffi-1.17.1-cp312-cp312-win32.whl", hash = "sha256:a08d7e755f8ed21095a310a693525137cfe756ce62d066e53f502a83dc550f65", size = 172448, upload-time = "2024-09-04T20:44:26.208Z" }, + { url = "https://files.pythonhosted.org/packages/50/b9/db34c4755a7bd1cb2d1603ac3863f22bcecbd1ba29e5ee841a4bc510b294/cffi-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:51392eae71afec0d0c8fb1a53b204dbb3bcabcb3c9b807eedf3e1e6ccf2de903", size = 181976, upload-time = "2024-09-04T20:44:27.578Z" }, + { url = "https://files.pythonhosted.org/packages/bf/ee/f94057fa6426481d663b88637a9a10e859e492c73d0384514a17d78ee205/cffi-1.17.1-cp313-cp313-win32.whl", hash = "sha256:e03eab0a8677fa80d646b5ddece1cbeaf556c313dcfac435ba11f107ba117b5d", size = 172475, upload-time = "2024-09-04T20:44:43.733Z" }, + { url = "https://files.pythonhosted.org/packages/7c/fc/6a8cb64e5f0324877d503c854da15d76c1e50eb722e320b15345c4d0c6de/cffi-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:f6a16c31041f09ead72d69f583767292f750d24913dadacf5756b966aacb3f1a", size = 182009, upload-time = "2024-09-04T20:44:45.309Z" }, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/11/74/539e56497d9bd1d484fd863dd69cbbfa653cd2aa27abfe35653494d85e94/cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560", size = 7114, upload-time = "2023-08-12T20:38:17.776Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c5/55/51844dd50c4fc7a33b653bfaba4c2456f06955289ca770a5dbd5fd267374/cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9", size = 7249, upload-time = "2023-08-12T20:38:16.269Z" }, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/33/89c2ced2b67d1c2a61c19c6751aa8902d46ce3dacb23600a283619f5a12d/charset_normalizer-3.4.2.tar.gz", hash = "sha256:5baececa9ecba31eff645232d59845c07aa030f0c81ee70184a90d35099a0e63", size = 126367, upload-time = "2025-05-02T08:34:42.01Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/95/28/9901804da60055b406e1a1c5ba7aac1276fb77f1dde635aabfc7fd84b8ab/charset_normalizer-3.4.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c48ed483eb946e6c04ccbe02c6b4d1d48e51944b6db70f697e089c193404941", size = 201818, upload-time = "2025-05-02T08:31:46.725Z" }, + { url = "https://files.pythonhosted.org/packages/d9/9b/892a8c8af9110935e5adcbb06d9c6fe741b6bb02608c6513983048ba1a18/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2d318c11350e10662026ad0eb71bb51c7812fc8590825304ae0bdd4ac283acd", size = 144649, upload-time = "2025-05-02T08:31:48.889Z" }, + { url = "https://files.pythonhosted.org/packages/7b/a5/4179abd063ff6414223575e008593861d62abfc22455b5d1a44995b7c101/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9cbfacf36cb0ec2897ce0ebc5d08ca44213af24265bd56eca54bee7923c48fd6", size = 155045, upload-time = "2025-05-02T08:31:50.757Z" }, + { url = "https://files.pythonhosted.org/packages/3b/95/bc08c7dfeddd26b4be8c8287b9bb055716f31077c8b0ea1cd09553794665/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:18dd2e350387c87dabe711b86f83c9c78af772c748904d372ade190b5c7c9d4d", size = 147356, upload-time = "2025-05-02T08:31:52.634Z" }, + { url = "https://files.pythonhosted.org/packages/a8/2d/7a5b635aa65284bf3eab7653e8b4151ab420ecbae918d3e359d1947b4d61/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8075c35cd58273fee266c58c0c9b670947c19df5fb98e7b66710e04ad4e9ff86", size = 149471, upload-time = "2025-05-02T08:31:56.207Z" }, + { url = "https://files.pythonhosted.org/packages/ae/38/51fc6ac74251fd331a8cfdb7ec57beba8c23fd5493f1050f71c87ef77ed0/charset_normalizer-3.4.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5bf4545e3b962767e5c06fe1738f951f77d27967cb2caa64c28be7c4563e162c", size = 151317, upload-time = "2025-05-02T08:31:57.613Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/edee1e32215ee6e9e46c3e482645b46575a44a2d72c7dfd49e49f60ce6bf/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a6ab32f7210554a96cd9e33abe3ddd86732beeafc7a28e9955cdf22ffadbab0", size = 146368, upload-time = "2025-05-02T08:31:59.468Z" }, + { url = "https://files.pythonhosted.org/packages/26/2c/ea3e66f2b5f21fd00b2825c94cafb8c326ea6240cd80a91eb09e4a285830/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:b33de11b92e9f75a2b545d6e9b6f37e398d86c3e9e9653c4864eb7e89c5773ef", size = 154491, upload-time = "2025-05-02T08:32:01.219Z" }, + { url = "https://files.pythonhosted.org/packages/52/47/7be7fa972422ad062e909fd62460d45c3ef4c141805b7078dbab15904ff7/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8755483f3c00d6c9a77f490c17e6ab0c8729e39e6390328e42521ef175380ae6", size = 157695, upload-time = "2025-05-02T08:32:03.045Z" }, + { url = "https://files.pythonhosted.org/packages/2f/42/9f02c194da282b2b340f28e5fb60762de1151387a36842a92b533685c61e/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:68a328e5f55ec37c57f19ebb1fdc56a248db2e3e9ad769919a58672958e8f366", size = 154849, upload-time = "2025-05-02T08:32:04.651Z" }, + { url = "https://files.pythonhosted.org/packages/67/44/89cacd6628f31fb0b63201a618049be4be2a7435a31b55b5eb1c3674547a/charset_normalizer-3.4.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:21b2899062867b0e1fde9b724f8aecb1af14f2778d69aacd1a5a1853a597a5db", size = 150091, upload-time = "2025-05-02T08:32:06.719Z" }, + { url = "https://files.pythonhosted.org/packages/1f/79/4b8da9f712bc079c0f16b6d67b099b0b8d808c2292c937f267d816ec5ecc/charset_normalizer-3.4.2-cp310-cp310-win32.whl", hash = "sha256:e8082b26888e2f8b36a042a58307d5b917ef2b1cacab921ad3323ef91901c71a", size = 98445, upload-time = "2025-05-02T08:32:08.66Z" }, + { url = "https://files.pythonhosted.org/packages/7d/d7/96970afb4fb66497a40761cdf7bd4f6fca0fc7bafde3a84f836c1f57a926/charset_normalizer-3.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:f69a27e45c43520f5487f27627059b64aaf160415589230992cec34c5e18a509", size = 105782, upload-time = "2025-05-02T08:32:10.46Z" }, + { url = "https://files.pythonhosted.org/packages/05/85/4c40d00dcc6284a1c1ad5de5e0996b06f39d8232f1031cd23c2f5c07ee86/charset_normalizer-3.4.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:be1e352acbe3c78727a16a455126d9ff83ea2dfdcbc83148d2982305a04714c2", size = 198794, upload-time = "2025-05-02T08:32:11.945Z" }, + { url = "https://files.pythonhosted.org/packages/41/d9/7a6c0b9db952598e97e93cbdfcb91bacd89b9b88c7c983250a77c008703c/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa88ca0b1932e93f2d961bf3addbb2db902198dca337d88c89e1559e066e7645", size = 142846, upload-time = "2025-05-02T08:32:13.946Z" }, + { url = "https://files.pythonhosted.org/packages/66/82/a37989cda2ace7e37f36c1a8ed16c58cf48965a79c2142713244bf945c89/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d524ba3f1581b35c03cb42beebab4a13e6cdad7b36246bd22541fa585a56cccd", size = 153350, upload-time = "2025-05-02T08:32:15.873Z" }, + { url = "https://files.pythonhosted.org/packages/df/68/a576b31b694d07b53807269d05ec3f6f1093e9545e8607121995ba7a8313/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28a1005facc94196e1fb3e82a3d442a9d9110b8434fc1ded7a24a2983c9888d8", size = 145657, upload-time = "2025-05-02T08:32:17.283Z" }, + { url = "https://files.pythonhosted.org/packages/92/9b/ad67f03d74554bed3aefd56fe836e1623a50780f7c998d00ca128924a499/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fdb20a30fe1175ecabed17cbf7812f7b804b8a315a25f24678bcdf120a90077f", size = 147260, upload-time = "2025-05-02T08:32:18.807Z" }, + { url = "https://files.pythonhosted.org/packages/a6/e6/8aebae25e328160b20e31a7e9929b1578bbdc7f42e66f46595a432f8539e/charset_normalizer-3.4.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f5d9ed7f254402c9e7d35d2f5972c9bbea9040e99cd2861bd77dc68263277c7", size = 149164, upload-time = "2025-05-02T08:32:20.333Z" }, + { url = "https://files.pythonhosted.org/packages/8b/f2/b3c2f07dbcc248805f10e67a0262c93308cfa149a4cd3d1fe01f593e5fd2/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:efd387a49825780ff861998cd959767800d54f8308936b21025326de4b5a42b9", size = 144571, upload-time = "2025-05-02T08:32:21.86Z" }, + { url = "https://files.pythonhosted.org/packages/60/5b/c3f3a94bc345bc211622ea59b4bed9ae63c00920e2e8f11824aa5708e8b7/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:f0aa37f3c979cf2546b73e8222bbfa3dc07a641585340179d768068e3455e544", size = 151952, upload-time = "2025-05-02T08:32:23.434Z" }, + { url = "https://files.pythonhosted.org/packages/e2/4d/ff460c8b474122334c2fa394a3f99a04cf11c646da895f81402ae54f5c42/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:e70e990b2137b29dc5564715de1e12701815dacc1d056308e2b17e9095372a82", size = 155959, upload-time = "2025-05-02T08:32:24.993Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2b/b964c6a2fda88611a1fe3d4c400d39c66a42d6c169c924818c848f922415/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0c8c57f84ccfc871a48a47321cfa49ae1df56cd1d965a09abe84066f6853b9c0", size = 153030, upload-time = "2025-05-02T08:32:26.435Z" }, + { url = "https://files.pythonhosted.org/packages/59/2e/d3b9811db26a5ebf444bc0fa4f4be5aa6d76fc6e1c0fd537b16c14e849b6/charset_normalizer-3.4.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6b66f92b17849b85cad91259efc341dce9c1af48e2173bf38a85c6329f1033e5", size = 148015, upload-time = "2025-05-02T08:32:28.376Z" }, + { url = "https://files.pythonhosted.org/packages/90/07/c5fd7c11eafd561bb51220d600a788f1c8d77c5eef37ee49454cc5c35575/charset_normalizer-3.4.2-cp311-cp311-win32.whl", hash = "sha256:daac4765328a919a805fa5e2720f3e94767abd632ae410a9062dff5412bae65a", size = 98106, upload-time = "2025-05-02T08:32:30.281Z" }, + { url = "https://files.pythonhosted.org/packages/a8/05/5e33dbef7e2f773d672b6d79f10ec633d4a71cd96db6673625838a4fd532/charset_normalizer-3.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:e53efc7c7cee4c1e70661e2e112ca46a575f90ed9ae3fef200f2a25e954f4b28", size = 105402, upload-time = "2025-05-02T08:32:32.191Z" }, + { url = "https://files.pythonhosted.org/packages/d7/a4/37f4d6035c89cac7930395a35cc0f1b872e652eaafb76a6075943754f095/charset_normalizer-3.4.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0c29de6a1a95f24b9a1aa7aefd27d2487263f00dfd55a77719b530788f75cff7", size = 199936, upload-time = "2025-05-02T08:32:33.712Z" }, + { url = "https://files.pythonhosted.org/packages/ee/8a/1a5e33b73e0d9287274f899d967907cd0bf9c343e651755d9307e0dbf2b3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cddf7bd982eaa998934a91f69d182aec997c6c468898efe6679af88283b498d3", size = 143790, upload-time = "2025-05-02T08:32:35.768Z" }, + { url = "https://files.pythonhosted.org/packages/66/52/59521f1d8e6ab1482164fa21409c5ef44da3e9f653c13ba71becdd98dec3/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fcbe676a55d7445b22c10967bceaaf0ee69407fbe0ece4d032b6eb8d4565982a", size = 153924, upload-time = "2025-05-02T08:32:37.284Z" }, + { url = "https://files.pythonhosted.org/packages/86/2d/fb55fdf41964ec782febbf33cb64be480a6b8f16ded2dbe8db27a405c09f/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d41c4d287cfc69060fa91cae9683eacffad989f1a10811995fa309df656ec214", size = 146626, upload-time = "2025-05-02T08:32:38.803Z" }, + { url = "https://files.pythonhosted.org/packages/8c/73/6ede2ec59bce19b3edf4209d70004253ec5f4e319f9a2e3f2f15601ed5f7/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e594135de17ab3866138f496755f302b72157d115086d100c3f19370839dd3a", size = 148567, upload-time = "2025-05-02T08:32:40.251Z" }, + { url = "https://files.pythonhosted.org/packages/09/14/957d03c6dc343c04904530b6bef4e5efae5ec7d7990a7cbb868e4595ee30/charset_normalizer-3.4.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cf713fe9a71ef6fd5adf7a79670135081cd4431c2943864757f0fa3a65b1fafd", size = 150957, upload-time = "2025-05-02T08:32:41.705Z" }, + { url = "https://files.pythonhosted.org/packages/0d/c8/8174d0e5c10ccebdcb1b53cc959591c4c722a3ad92461a273e86b9f5a302/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a370b3e078e418187da8c3674eddb9d983ec09445c99a3a263c2011993522981", size = 145408, upload-time = "2025-05-02T08:32:43.709Z" }, + { url = "https://files.pythonhosted.org/packages/58/aa/8904b84bc8084ac19dc52feb4f5952c6df03ffb460a887b42615ee1382e8/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a955b438e62efdf7e0b7b52a64dc5c3396e2634baa62471768a64bc2adb73d5c", size = 153399, upload-time = "2025-05-02T08:32:46.197Z" }, + { url = "https://files.pythonhosted.org/packages/c2/26/89ee1f0e264d201cb65cf054aca6038c03b1a0c6b4ae998070392a3ce605/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:7222ffd5e4de8e57e03ce2cef95a4c43c98fcb72ad86909abdfc2c17d227fc1b", size = 156815, upload-time = "2025-05-02T08:32:48.105Z" }, + { url = "https://files.pythonhosted.org/packages/fd/07/68e95b4b345bad3dbbd3a8681737b4338ff2c9df29856a6d6d23ac4c73cb/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bee093bf902e1d8fc0ac143c88902c3dfc8941f7ea1d6a8dd2bcb786d33db03d", size = 154537, upload-time = "2025-05-02T08:32:49.719Z" }, + { url = "https://files.pythonhosted.org/packages/77/1a/5eefc0ce04affb98af07bc05f3bac9094513c0e23b0562d64af46a06aae4/charset_normalizer-3.4.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dedb8adb91d11846ee08bec4c8236c8549ac721c245678282dcb06b221aab59f", size = 149565, upload-time = "2025-05-02T08:32:51.404Z" }, + { url = "https://files.pythonhosted.org/packages/37/a0/2410e5e6032a174c95e0806b1a6585eb21e12f445ebe239fac441995226a/charset_normalizer-3.4.2-cp312-cp312-win32.whl", hash = "sha256:db4c7bf0e07fc3b7d89ac2a5880a6a8062056801b83ff56d8464b70f65482b6c", size = 98357, upload-time = "2025-05-02T08:32:53.079Z" }, + { url = "https://files.pythonhosted.org/packages/6c/4f/c02d5c493967af3eda9c771ad4d2bbc8df6f99ddbeb37ceea6e8716a32bc/charset_normalizer-3.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:5a9979887252a82fefd3d3ed2a8e3b937a7a809f65dcb1e068b090e165bbe99e", size = 105776, upload-time = "2025-05-02T08:32:54.573Z" }, + { url = "https://files.pythonhosted.org/packages/ea/12/a93df3366ed32db1d907d7593a94f1fe6293903e3e92967bebd6950ed12c/charset_normalizer-3.4.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:926ca93accd5d36ccdabd803392ddc3e03e6d4cd1cf17deff3b989ab8e9dbcf0", size = 199622, upload-time = "2025-05-02T08:32:56.363Z" }, + { url = "https://files.pythonhosted.org/packages/04/93/bf204e6f344c39d9937d3c13c8cd5bbfc266472e51fc8c07cb7f64fcd2de/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eba9904b0f38a143592d9fc0e19e2df0fa2e41c3c3745554761c5f6447eedabf", size = 143435, upload-time = "2025-05-02T08:32:58.551Z" }, + { url = "https://files.pythonhosted.org/packages/22/2a/ea8a2095b0bafa6c5b5a55ffdc2f924455233ee7b91c69b7edfcc9e02284/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3fddb7e2c84ac87ac3a947cb4e66d143ca5863ef48e4a5ecb83bd48619e4634e", size = 153653, upload-time = "2025-05-02T08:33:00.342Z" }, + { url = "https://files.pythonhosted.org/packages/b6/57/1b090ff183d13cef485dfbe272e2fe57622a76694061353c59da52c9a659/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98f862da73774290f251b9df8d11161b6cf25b599a66baf087c1ffe340e9bfd1", size = 146231, upload-time = "2025-05-02T08:33:02.081Z" }, + { url = "https://files.pythonhosted.org/packages/e2/28/ffc026b26f441fc67bd21ab7f03b313ab3fe46714a14b516f931abe1a2d8/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c9379d65defcab82d07b2a9dfbfc2e95bc8fe0ebb1b176a3190230a3ef0e07c", size = 148243, upload-time = "2025-05-02T08:33:04.063Z" }, + { url = "https://files.pythonhosted.org/packages/c0/0f/9abe9bd191629c33e69e47c6ef45ef99773320e9ad8e9cb08b8ab4a8d4cb/charset_normalizer-3.4.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e635b87f01ebc977342e2697d05b56632f5f879a4f15955dfe8cef2448b51691", size = 150442, upload-time = "2025-05-02T08:33:06.418Z" }, + { url = "https://files.pythonhosted.org/packages/67/7c/a123bbcedca91d5916c056407f89a7f5e8fdfce12ba825d7d6b9954a1a3c/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1c95a1e2902a8b722868587c0e1184ad5c55631de5afc0eb96bc4b0d738092c0", size = 145147, upload-time = "2025-05-02T08:33:08.183Z" }, + { url = "https://files.pythonhosted.org/packages/ec/fe/1ac556fa4899d967b83e9893788e86b6af4d83e4726511eaaad035e36595/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ef8de666d6179b009dce7bcb2ad4c4a779f113f12caf8dc77f0162c29d20490b", size = 153057, upload-time = "2025-05-02T08:33:09.986Z" }, + { url = "https://files.pythonhosted.org/packages/2b/ff/acfc0b0a70b19e3e54febdd5301a98b72fa07635e56f24f60502e954c461/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:32fc0341d72e0f73f80acb0a2c94216bd704f4f0bce10aedea38f30502b271ff", size = 156454, upload-time = "2025-05-02T08:33:11.814Z" }, + { url = "https://files.pythonhosted.org/packages/92/08/95b458ce9c740d0645feb0e96cea1f5ec946ea9c580a94adfe0b617f3573/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:289200a18fa698949d2b39c671c2cc7a24d44096784e76614899a7ccf2574b7b", size = 154174, upload-time = "2025-05-02T08:33:13.707Z" }, + { url = "https://files.pythonhosted.org/packages/78/be/8392efc43487ac051eee6c36d5fbd63032d78f7728cb37aebcc98191f1ff/charset_normalizer-3.4.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4a476b06fbcf359ad25d34a057b7219281286ae2477cc5ff5e3f70a246971148", size = 149166, upload-time = "2025-05-02T08:33:15.458Z" }, + { url = "https://files.pythonhosted.org/packages/44/96/392abd49b094d30b91d9fbda6a69519e95802250b777841cf3bda8fe136c/charset_normalizer-3.4.2-cp313-cp313-win32.whl", hash = "sha256:aaeeb6a479c7667fbe1099af9617c83aaca22182d6cf8c53966491a0f1b7ffb7", size = 98064, upload-time = "2025-05-02T08:33:17.06Z" }, + { url = "https://files.pythonhosted.org/packages/e9/b0/0200da600134e001d91851ddc797809e2fe0ea72de90e09bec5a2fbdaccb/charset_normalizer-3.4.2-cp313-cp313-win_amd64.whl", hash = "sha256:aa6af9e7d59f9c12b33ae4e9450619cf2488e2bbe9b44030905877f0b2324980", size = 105641, upload-time = "2025-05-02T08:33:18.753Z" }, + { url = "https://files.pythonhosted.org/packages/20/94/c5790835a017658cbfabd07f3bfb549140c3ac458cfc196323996b10095a/charset_normalizer-3.4.2-py3-none-any.whl", hash = "sha256:7f56930ab0abd1c45cd15be65cc741c28b1c9a34876ce8c17a2fa107810c0af0", size = 52626, upload-time = "2025-05-02T08:34:40.053Z" }, +] + +[[package]] +name = "distlib" +version = "0.3.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0d/dd/1bec4c5ddb504ca60fc29472f3d27e8d4da1257a854e1d96742f15c1d02d/distlib-0.3.9.tar.gz", hash = "sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403", size = 613923, upload-time = "2024-10-09T18:35:47.551Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/91/a1/cf2472db20f7ce4a6be1253a81cfdf85ad9c7885ffbed7047fb72c24cf87/distlib-0.3.9-py2.py3-none-any.whl", hash = "sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87", size = 468973, upload-time = "2024-10-09T18:35:44.272Z" }, +] + +[[package]] +name = "exceptiongroup" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/36/f4/c6e662dade71f56cd2f3735141b265c3c79293c109549c1e6933b0651ffc/exceptiongroup-1.3.0-py3-none-any.whl", hash = "sha256:4d111e6e0c13d0644cad6ddaa7ed0261a0b36971f6d23e7ec9b4b9097da78a10", size = 16674, upload-time = "2025-05-10T17:42:49.33Z" }, +] + +[[package]] +name = "filelock" +version = "3.18.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/10/c23352565a6544bdc5353e0b15fc1c563352101f30e24bf500207a54df9a/filelock-3.18.0.tar.gz", hash = "sha256:adbc88eabb99d2fec8c9c1b229b171f18afa655400173ddc653d5d01501fb9f2", size = 18075, upload-time = "2025-03-14T07:11:40.47Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4d/36/2a115987e2d8c300a974597416d9de88f2444426de9571f4b59b2cca3acc/filelock-3.18.0-py3-none-any.whl", hash = "sha256:c401f4f8377c4464e6db25fff06205fd89bdd83b65eb0488ed1b160f780e21de", size = 16215, upload-time = "2025-03-14T07:11:39.145Z" }, +] + +[[package]] +name = "h11" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, +] + +[[package]] +name = "identify" +version = "2.6.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/88/d193a27416618628a5eea64e3223acd800b40749a96ffb322a9b55a49ed1/identify-2.6.12.tar.gz", hash = "sha256:d8de45749f1efb108badef65ee8386f0f7bb19a7f26185f74de6367bffbaf0e6", size = 99254, upload-time = "2025-05-23T20:37:53.3Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/cd/18f8da995b658420625f7ef13f037be53ae04ec5ad33f9b718240dcfd48c/identify-2.6.12-py2.py3-none-any.whl", hash = "sha256:ad9672d5a72e0d2ff7c5c8809b62dfa60458626352fb0eb7b55e69bdc45334a2", size = 99145, upload-time = "2025-05-23T20:37:51.495Z" }, +] + +[[package]] +name = "idna" +version = "3.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490, upload-time = "2024-09-15T18:07:39.745Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442, upload-time = "2024-09-15T18:07:37.964Z" }, +] + +[[package]] +name = "linkedin-scraper" +version = "2.11.5" +source = { editable = "." } +dependencies = [ + { name = "lxml" }, + { name = "python-dotenv" }, + { name = "requests" }, + { name = "selenium" }, +] + +[package.dev-dependencies] +dev = [ + { name = "pre-commit" }, + { name = "python-dotenv" }, + { name = "ruff" }, +] + +[package.metadata] +requires-dist = [ + { name = "lxml", specifier = ">=6.0.0" }, + { name = "python-dotenv", specifier = ">=1.1.1" }, + { name = "requests", specifier = ">=2.32.4" }, + { name = "selenium", specifier = ">=4.33.0" }, +] + +[package.metadata.requires-dev] +dev = [ + { name = "pre-commit", specifier = ">=4.2.0" }, + { name = "python-dotenv", specifier = ">=1.0.0" }, + { name = "ruff", specifier = ">=0.12.1" }, +] + +[[package]] +name = "lxml" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c5/ed/60eb6fa2923602fba988d9ca7c5cdbd7cf25faa795162ed538b527a35411/lxml-6.0.0.tar.gz", hash = "sha256:032e65120339d44cdc3efc326c9f660f5f7205f3a535c1fdbf898b29ea01fb72", size = 4096938, upload-time = "2025-06-26T16:28:19.373Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/e9/9c3ca02fbbb7585116c2e274b354a2d92b5c70561687dd733ec7b2018490/lxml-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:35bc626eec405f745199200ccb5c6b36f202675d204aa29bb52e27ba2b71dea8", size = 8399057, upload-time = "2025-06-26T16:25:02.169Z" }, + { url = "https://files.pythonhosted.org/packages/86/25/10a6e9001191854bf283515020f3633b1b1f96fd1b39aa30bf8fff7aa666/lxml-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:246b40f8a4aec341cbbf52617cad8ab7c888d944bfe12a6abd2b1f6cfb6f6082", size = 4569676, upload-time = "2025-06-26T16:25:05.431Z" }, + { url = "https://files.pythonhosted.org/packages/f5/a5/378033415ff61d9175c81de23e7ad20a3ffb614df4ffc2ffc86bc6746ffd/lxml-6.0.0-cp310-cp310-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2793a627e95d119e9f1e19720730472f5543a6d84c50ea33313ce328d870f2dd", size = 5291361, upload-time = "2025-06-26T16:25:07.901Z" }, + { url = "https://files.pythonhosted.org/packages/5a/a6/19c87c4f3b9362b08dc5452a3c3bce528130ac9105fc8fff97ce895ce62e/lxml-6.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:46b9ed911f36bfeb6338e0b482e7fe7c27d362c52fde29f221fddbc9ee2227e7", size = 5008290, upload-time = "2025-06-28T18:47:13.196Z" }, + { url = "https://files.pythonhosted.org/packages/09/d1/e9b7ad4b4164d359c4d87ed8c49cb69b443225cb495777e75be0478da5d5/lxml-6.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2b4790b558bee331a933e08883c423f65bbcd07e278f91b2272489e31ab1e2b4", size = 5163192, upload-time = "2025-06-28T18:47:17.279Z" }, + { url = "https://files.pythonhosted.org/packages/56/d6/b3eba234dc1584744b0b374a7f6c26ceee5dc2147369a7e7526e25a72332/lxml-6.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e2030956cf4886b10be9a0285c6802e078ec2391e1dd7ff3eb509c2c95a69b76", size = 5076973, upload-time = "2025-06-26T16:25:10.936Z" }, + { url = "https://files.pythonhosted.org/packages/8e/47/897142dd9385dcc1925acec0c4afe14cc16d310ce02c41fcd9010ac5d15d/lxml-6.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d23854ecf381ab1facc8f353dcd9adeddef3652268ee75297c1164c987c11dc", size = 5297795, upload-time = "2025-06-26T16:25:14.282Z" }, + { url = "https://files.pythonhosted.org/packages/fb/db/551ad84515c6f415cea70193a0ff11d70210174dc0563219f4ce711655c6/lxml-6.0.0-cp310-cp310-manylinux_2_31_armv7l.whl", hash = "sha256:43fe5af2d590bf4691531b1d9a2495d7aab2090547eaacd224a3afec95706d76", size = 4776547, upload-time = "2025-06-26T16:25:17.123Z" }, + { url = "https://files.pythonhosted.org/packages/e0/14/c4a77ab4f89aaf35037a03c472f1ccc54147191888626079bd05babd6808/lxml-6.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74e748012f8c19b47f7d6321ac929a9a94ee92ef12bc4298c47e8b7219b26541", size = 5124904, upload-time = "2025-06-26T16:25:19.485Z" }, + { url = "https://files.pythonhosted.org/packages/70/b4/12ae6a51b8da106adec6a2e9c60f532350a24ce954622367f39269e509b1/lxml-6.0.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:43cfbb7db02b30ad3926e8fceaef260ba2fb7df787e38fa2df890c1ca7966c3b", size = 4805804, upload-time = "2025-06-26T16:25:21.949Z" }, + { url = "https://files.pythonhosted.org/packages/a9/b6/2e82d34d49f6219cdcb6e3e03837ca5fb8b7f86c2f35106fb8610ac7f5b8/lxml-6.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:34190a1ec4f1e84af256495436b2d196529c3f2094f0af80202947567fdbf2e7", size = 5323477, upload-time = "2025-06-26T16:25:24.475Z" }, + { url = "https://files.pythonhosted.org/packages/a1/e6/b83ddc903b05cd08a5723fefd528eee84b0edd07bdf87f6c53a1fda841fd/lxml-6.0.0-cp310-cp310-win32.whl", hash = "sha256:5967fe415b1920a3877a4195e9a2b779249630ee49ece22021c690320ff07452", size = 3613840, upload-time = "2025-06-26T16:25:27.345Z" }, + { url = "https://files.pythonhosted.org/packages/40/af/874fb368dd0c663c030acb92612341005e52e281a102b72a4c96f42942e1/lxml-6.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:f3389924581d9a770c6caa4df4e74b606180869043b9073e2cec324bad6e306e", size = 3993584, upload-time = "2025-06-26T16:25:29.391Z" }, + { url = "https://files.pythonhosted.org/packages/4a/f4/d296bc22c17d5607653008f6dd7b46afdfda12efd31021705b507df652bb/lxml-6.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:522fe7abb41309e9543b0d9b8b434f2b630c5fdaf6482bee642b34c8c70079c8", size = 3681400, upload-time = "2025-06-26T16:25:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/7c/23/828d4cc7da96c611ec0ce6147bbcea2fdbde023dc995a165afa512399bbf/lxml-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4ee56288d0df919e4aac43b539dd0e34bb55d6a12a6562038e8d6f3ed07f9e36", size = 8438217, upload-time = "2025-06-26T16:25:34.349Z" }, + { url = "https://files.pythonhosted.org/packages/f1/33/5ac521212c5bcb097d573145d54b2b4a3c9766cda88af5a0e91f66037c6e/lxml-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:b8dd6dd0e9c1992613ccda2bcb74fc9d49159dbe0f0ca4753f37527749885c25", size = 4590317, upload-time = "2025-06-26T16:25:38.103Z" }, + { url = "https://files.pythonhosted.org/packages/2b/2e/45b7ca8bee304c07f54933c37afe7dd4d39ff61ba2757f519dcc71bc5d44/lxml-6.0.0-cp311-cp311-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:d7ae472f74afcc47320238b5dbfd363aba111a525943c8a34a1b657c6be934c3", size = 5221628, upload-time = "2025-06-26T16:25:40.878Z" }, + { url = "https://files.pythonhosted.org/packages/32/23/526d19f7eb2b85da1f62cffb2556f647b049ebe2a5aa8d4d41b1fb2c7d36/lxml-6.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5592401cdf3dc682194727c1ddaa8aa0f3ddc57ca64fd03226a430b955eab6f6", size = 4949429, upload-time = "2025-06-28T18:47:20.046Z" }, + { url = "https://files.pythonhosted.org/packages/ac/cc/f6be27a5c656a43a5344e064d9ae004d4dcb1d3c9d4f323c8189ddfe4d13/lxml-6.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58ffd35bd5425c3c3b9692d078bf7ab851441434531a7e517c4984d5634cd65b", size = 5087909, upload-time = "2025-06-28T18:47:22.834Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e6/8ec91b5bfbe6972458bc105aeb42088e50e4b23777170404aab5dfb0c62d/lxml-6.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f720a14aa102a38907c6d5030e3d66b3b680c3e6f6bc95473931ea3c00c59967", size = 5031713, upload-time = "2025-06-26T16:25:43.226Z" }, + { url = "https://files.pythonhosted.org/packages/33/cf/05e78e613840a40e5be3e40d892c48ad3e475804db23d4bad751b8cadb9b/lxml-6.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2a5e8d207311a0170aca0eb6b160af91adc29ec121832e4ac151a57743a1e1e", size = 5232417, upload-time = "2025-06-26T16:25:46.111Z" }, + { url = "https://files.pythonhosted.org/packages/ac/8c/6b306b3e35c59d5f0b32e3b9b6b3b0739b32c0dc42a295415ba111e76495/lxml-6.0.0-cp311-cp311-manylinux_2_31_armv7l.whl", hash = "sha256:2dd1cc3ea7e60bfb31ff32cafe07e24839df573a5e7c2d33304082a5019bcd58", size = 4681443, upload-time = "2025-06-26T16:25:48.837Z" }, + { url = "https://files.pythonhosted.org/packages/59/43/0bd96bece5f7eea14b7220476835a60d2b27f8e9ca99c175f37c085cb154/lxml-6.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2cfcf84f1defed7e5798ef4f88aa25fcc52d279be731ce904789aa7ccfb7e8d2", size = 5074542, upload-time = "2025-06-26T16:25:51.65Z" }, + { url = "https://files.pythonhosted.org/packages/e2/3d/32103036287a8ca012d8518071f8852c68f2b3bfe048cef2a0202eb05910/lxml-6.0.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a52a4704811e2623b0324a18d41ad4b9fabf43ce5ff99b14e40a520e2190c851", size = 4729471, upload-time = "2025-06-26T16:25:54.571Z" }, + { url = "https://files.pythonhosted.org/packages/ca/a8/7be5d17df12d637d81854bd8648cd329f29640a61e9a72a3f77add4a311b/lxml-6.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c16304bba98f48a28ae10e32a8e75c349dd742c45156f297e16eeb1ba9287a1f", size = 5256285, upload-time = "2025-06-26T16:25:56.997Z" }, + { url = "https://files.pythonhosted.org/packages/cd/d0/6cb96174c25e0d749932557c8d51d60c6e292c877b46fae616afa23ed31a/lxml-6.0.0-cp311-cp311-win32.whl", hash = "sha256:f8d19565ae3eb956d84da3ef367aa7def14a2735d05bd275cd54c0301f0d0d6c", size = 3612004, upload-time = "2025-06-26T16:25:59.11Z" }, + { url = "https://files.pythonhosted.org/packages/ca/77/6ad43b165dfc6dead001410adeb45e88597b25185f4479b7ca3b16a5808f/lxml-6.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b2d71cdefda9424adff9a3607ba5bbfc60ee972d73c21c7e3c19e71037574816", size = 4003470, upload-time = "2025-06-26T16:26:01.655Z" }, + { url = "https://files.pythonhosted.org/packages/a0/bc/4c50ec0eb14f932a18efc34fc86ee936a66c0eb5f2fe065744a2da8a68b2/lxml-6.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:8a2e76efbf8772add72d002d67a4c3d0958638696f541734304c7f28217a9cab", size = 3682477, upload-time = "2025-06-26T16:26:03.808Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/d01d735c298d7e0ddcedf6f028bf556577e5ab4f4da45175ecd909c79378/lxml-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:78718d8454a6e928470d511bf8ac93f469283a45c354995f7d19e77292f26108", size = 8429515, upload-time = "2025-06-26T16:26:06.776Z" }, + { url = "https://files.pythonhosted.org/packages/06/37/0e3eae3043d366b73da55a86274a590bae76dc45aa004b7042e6f97803b1/lxml-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:84ef591495ffd3f9dcabffd6391db7bb70d7230b5c35ef5148354a134f56f2be", size = 4601387, upload-time = "2025-06-26T16:26:09.511Z" }, + { url = "https://files.pythonhosted.org/packages/a3/28/e1a9a881e6d6e29dda13d633885d13acb0058f65e95da67841c8dd02b4a8/lxml-6.0.0-cp312-cp312-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:2930aa001a3776c3e2601cb8e0a15d21b8270528d89cc308be4843ade546b9ab", size = 5228928, upload-time = "2025-06-26T16:26:12.337Z" }, + { url = "https://files.pythonhosted.org/packages/9a/55/2cb24ea48aa30c99f805921c1c7860c1f45c0e811e44ee4e6a155668de06/lxml-6.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:219e0431ea8006e15005767f0351e3f7f9143e793e58519dc97fe9e07fae5563", size = 4952289, upload-time = "2025-06-28T18:47:25.602Z" }, + { url = "https://files.pythonhosted.org/packages/31/c0/b25d9528df296b9a3306ba21ff982fc5b698c45ab78b94d18c2d6ae71fd9/lxml-6.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:bd5913b4972681ffc9718bc2d4c53cde39ef81415e1671ff93e9aa30b46595e7", size = 5111310, upload-time = "2025-06-28T18:47:28.136Z" }, + { url = "https://files.pythonhosted.org/packages/e9/af/681a8b3e4f668bea6e6514cbcb297beb6de2b641e70f09d3d78655f4f44c/lxml-6.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:390240baeb9f415a82eefc2e13285016f9c8b5ad71ec80574ae8fa9605093cd7", size = 5025457, upload-time = "2025-06-26T16:26:15.068Z" }, + { url = "https://files.pythonhosted.org/packages/69/f8/693b1a10a891197143c0673fcce5b75fc69132afa81a36e4568c12c8faba/lxml-6.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ca50bd612438258a91b5b3788c6621c1f05c8c478e7951899f492be42defc0da", size = 5257565, upload-time = "2025-06-26T16:26:17.906Z" }, + { url = "https://files.pythonhosted.org/packages/a8/96/e08ff98f2c6426c98c8964513c5dab8d6eb81dadcd0af6f0c538ada78d33/lxml-6.0.0-cp312-cp312-manylinux_2_31_armv7l.whl", hash = "sha256:c24b8efd9c0f62bad0439283c2c795ef916c5a6b75f03c17799775c7ae3c0c9e", size = 4713390, upload-time = "2025-06-26T16:26:20.292Z" }, + { url = "https://files.pythonhosted.org/packages/a8/83/6184aba6cc94d7413959f6f8f54807dc318fdcd4985c347fe3ea6937f772/lxml-6.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:afd27d8629ae94c5d863e32ab0e1d5590371d296b87dae0a751fb22bf3685741", size = 5066103, upload-time = "2025-06-26T16:26:22.765Z" }, + { url = "https://files.pythonhosted.org/packages/ee/01/8bf1f4035852d0ff2e36a4d9aacdbcc57e93a6cd35a54e05fa984cdf73ab/lxml-6.0.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:54c4855eabd9fc29707d30141be99e5cd1102e7d2258d2892314cf4c110726c3", size = 4791428, upload-time = "2025-06-26T16:26:26.461Z" }, + { url = "https://files.pythonhosted.org/packages/5c/f7/5495829a864bc5f8b0798d2b52a807c89966523140f3d6fa3a58ab6720ea/lxml-6.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:36531f81c8214e293097cd2b7873f178997dae33d3667caaae8bdfb9666b76c0", size = 5281290, upload-time = "2025-06-26T16:26:29.406Z" }, + { url = "https://files.pythonhosted.org/packages/79/56/6b8edb79d9ed294ccc4e881f4db1023af56ba451909b9ce79f2a2cd7c532/lxml-6.0.0-cp312-cp312-win32.whl", hash = "sha256:690b20e3388a7ec98e899fd54c924e50ba6693874aa65ef9cb53de7f7de9d64a", size = 3613495, upload-time = "2025-06-26T16:26:31.588Z" }, + { url = "https://files.pythonhosted.org/packages/0b/1e/cc32034b40ad6af80b6fd9b66301fc0f180f300002e5c3eb5a6110a93317/lxml-6.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:310b719b695b3dd442cdfbbe64936b2f2e231bb91d998e99e6f0daf991a3eba3", size = 4014711, upload-time = "2025-06-26T16:26:33.723Z" }, + { url = "https://files.pythonhosted.org/packages/55/10/dc8e5290ae4c94bdc1a4c55865be7e1f31dfd857a88b21cbba68b5fea61b/lxml-6.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:8cb26f51c82d77483cdcd2b4a53cda55bbee29b3c2f3ddeb47182a2a9064e4eb", size = 3674431, upload-time = "2025-06-26T16:26:35.959Z" }, + { url = "https://files.pythonhosted.org/packages/79/21/6e7c060822a3c954ff085e5e1b94b4a25757c06529eac91e550f3f5cd8b8/lxml-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6da7cd4f405fd7db56e51e96bff0865b9853ae70df0e6720624049da76bde2da", size = 8414372, upload-time = "2025-06-26T16:26:39.079Z" }, + { url = "https://files.pythonhosted.org/packages/a4/f6/051b1607a459db670fc3a244fa4f06f101a8adf86cda263d1a56b3a4f9d5/lxml-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:b34339898bb556a2351a1830f88f751679f343eabf9cf05841c95b165152c9e7", size = 4593940, upload-time = "2025-06-26T16:26:41.891Z" }, + { url = "https://files.pythonhosted.org/packages/8e/74/dd595d92a40bda3c687d70d4487b2c7eff93fd63b568acd64fedd2ba00fe/lxml-6.0.0-cp313-cp313-manylinux2010_i686.manylinux2014_i686.manylinux_2_12_i686.manylinux_2_17_i686.whl", hash = "sha256:51a5e4c61a4541bd1cd3ba74766d0c9b6c12d6a1a4964ef60026832aac8e79b3", size = 5214329, upload-time = "2025-06-26T16:26:44.669Z" }, + { url = "https://files.pythonhosted.org/packages/52/46/3572761efc1bd45fcafb44a63b3b0feeb5b3f0066886821e94b0254f9253/lxml-6.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d18a25b19ca7307045581b18b3ec9ead2b1db5ccd8719c291f0cd0a5cec6cb81", size = 4947559, upload-time = "2025-06-28T18:47:31.091Z" }, + { url = "https://files.pythonhosted.org/packages/94/8a/5e40de920e67c4f2eef9151097deb9b52d86c95762d8ee238134aff2125d/lxml-6.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d4f0c66df4386b75d2ab1e20a489f30dc7fd9a06a896d64980541506086be1f1", size = 5102143, upload-time = "2025-06-28T18:47:33.612Z" }, + { url = "https://files.pythonhosted.org/packages/7c/4b/20555bdd75d57945bdabfbc45fdb1a36a1a0ff9eae4653e951b2b79c9209/lxml-6.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f4b481b6cc3a897adb4279216695150bbe7a44c03daba3c894f49d2037e0a24", size = 5021931, upload-time = "2025-06-26T16:26:47.503Z" }, + { url = "https://files.pythonhosted.org/packages/d4/dd/39c8507c16db6031f8c1ddf70ed95dbb0a6d466a40002a3522c128aba472/lxml-6.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2ae06fbab4f1bb7db4f7c8ca9897dc8db4447d1a2b9bee78474ad403437bcc29", size = 5247467, upload-time = "2025-06-26T16:26:49.998Z" }, + { url = "https://files.pythonhosted.org/packages/4d/56/732d49def0631ad633844cfb2664563c830173a98d5efd9b172e89a4800d/lxml-6.0.0-cp313-cp313-manylinux_2_31_armv7l.whl", hash = "sha256:1fa377b827ca2023244a06554c6e7dc6828a10aaf74ca41965c5d8a4925aebb4", size = 4720601, upload-time = "2025-06-26T16:26:52.564Z" }, + { url = "https://files.pythonhosted.org/packages/8f/7f/6b956fab95fa73462bca25d1ea7fc8274ddf68fb8e60b78d56c03b65278e/lxml-6.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:1676b56d48048a62ef77a250428d1f31f610763636e0784ba67a9740823988ca", size = 5060227, upload-time = "2025-06-26T16:26:55.054Z" }, + { url = "https://files.pythonhosted.org/packages/97/06/e851ac2924447e8b15a294855caf3d543424364a143c001014d22c8ca94c/lxml-6.0.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:0e32698462aacc5c1cf6bdfebc9c781821b7e74c79f13e5ffc8bfe27c42b1abf", size = 4790637, upload-time = "2025-06-26T16:26:57.384Z" }, + { url = "https://files.pythonhosted.org/packages/52/03/0e764ce00b95e008d76b99d432f1807f3574fb2945b496a17807a1645dbd/lxml-6.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7488a43033c958637b1a08cddc9188eb06d3ad36582cebc7d4815980b47e27ef", size = 5272430, upload-time = "2025-06-26T16:27:00.031Z" }, + { url = "https://files.pythonhosted.org/packages/5f/01/d48cc141bc47bc1644d20fe97bbd5e8afb30415ec94f146f2f76d0d9d098/lxml-6.0.0-cp313-cp313-win32.whl", hash = "sha256:5fcd7d3b1d8ecb91445bd71b9c88bdbeae528fefee4f379895becfc72298d181", size = 3612896, upload-time = "2025-06-26T16:27:04.251Z" }, + { url = "https://files.pythonhosted.org/packages/f4/87/6456b9541d186ee7d4cb53bf1b9a0d7f3b1068532676940fdd594ac90865/lxml-6.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:2f34687222b78fff795feeb799a7d44eca2477c3d9d3a46ce17d51a4f383e32e", size = 4013132, upload-time = "2025-06-26T16:27:06.415Z" }, + { url = "https://files.pythonhosted.org/packages/b7/42/85b3aa8f06ca0d24962f8100f001828e1f1f1a38c954c16e71154ed7d53a/lxml-6.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:21db1ec5525780fd07251636eb5f7acb84003e9382c72c18c542a87c416ade03", size = 3672642, upload-time = "2025-06-26T16:27:09.888Z" }, + { url = "https://files.pythonhosted.org/packages/66/e1/2c22a3cff9e16e1d717014a1e6ec2bf671bf56ea8716bb64466fcf820247/lxml-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:dbdd7679a6f4f08152818043dbb39491d1af3332128b3752c3ec5cebc0011a72", size = 3898804, upload-time = "2025-06-26T16:27:59.751Z" }, + { url = "https://files.pythonhosted.org/packages/2b/3a/d68cbcb4393a2a0a867528741fafb7ce92dac5c9f4a1680df98e5e53e8f5/lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:40442e2a4456e9910875ac12951476d36c0870dcb38a68719f8c4686609897c4", size = 4216406, upload-time = "2025-06-28T18:47:45.518Z" }, + { url = "https://files.pythonhosted.org/packages/15/8f/d9bfb13dff715ee3b2a1ec2f4a021347ea3caf9aba93dea0cfe54c01969b/lxml-6.0.0-pp310-pypy310_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:db0efd6bae1c4730b9c863fc4f5f3c0fa3e8f05cae2c44ae141cb9dfc7d091dc", size = 4326455, upload-time = "2025-06-28T18:47:48.411Z" }, + { url = "https://files.pythonhosted.org/packages/01/8b/fde194529ee8a27e6f5966d7eef05fa16f0567e4a8e8abc3b855ef6b3400/lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9ab542c91f5a47aaa58abdd8ea84b498e8e49fe4b883d67800017757a3eb78e8", size = 4268788, upload-time = "2025-06-26T16:28:02.776Z" }, + { url = "https://files.pythonhosted.org/packages/99/a8/3b8e2581b4f8370fc9e8dc343af4abdfadd9b9229970fc71e67bd31c7df1/lxml-6.0.0-pp310-pypy310_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:013090383863b72c62a702d07678b658fa2567aa58d373d963cca245b017e065", size = 4411394, upload-time = "2025-06-26T16:28:05.179Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a5/899a4719e02ff4383f3f96e5d1878f882f734377f10dfb69e73b5f223e44/lxml-6.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c86df1c9af35d903d2b52d22ea3e66db8058d21dc0f59842ca5deb0595921141", size = 3517946, upload-time = "2025-06-26T16:28:07.665Z" }, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/43/16/fc88b08840de0e0a72a2f9d8c6bae36be573e475a6326ae854bcc549fc45/nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f", size = 47437, upload-time = "2024-06-04T18:44:11.171Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/1d/1b658dbd2b9fa9c4c9f32accbfc0205d532c8c6194dc0f2a4c0428e7128a/nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9", size = 22314, upload-time = "2024-06-04T18:44:08.352Z" }, +] + +[[package]] +name = "outcome" +version = "1.3.0.post0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/98/df/77698abfac98571e65ffeb0c1fba8ffd692ab8458d617a0eed7d9a8d38f2/outcome-1.3.0.post0.tar.gz", hash = "sha256:9dcf02e65f2971b80047b377468e72a268e15c0af3cf1238e6ff14f7f91143b8", size = 21060, upload-time = "2023-10-26T04:26:04.361Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/8b/5ab7257531a5d830fc8000c476e63c935488d74609b50f9384a643ec0a62/outcome-1.3.0.post0-py2.py3-none-any.whl", hash = "sha256:e771c5ce06d1415e356078d3bdd68523f284b4ce5419828922b6871e65eda82b", size = 10692, upload-time = "2023-10-26T04:26:02.532Z" }, +] + +[[package]] +name = "platformdirs" +version = "4.3.8" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362, upload-time = "2025-05-07T22:47:42.121Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567, upload-time = "2025-05-07T22:47:40.376Z" }, +] + +[[package]] +name = "pre-commit" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cfgv" }, + { name = "identify" }, + { name = "nodeenv" }, + { name = "pyyaml" }, + { name = "virtualenv" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/08/39/679ca9b26c7bb2999ff122d50faa301e49af82ca9c066ec061cfbc0c6784/pre_commit-4.2.0.tar.gz", hash = "sha256:601283b9757afd87d40c4c4a9b2b5de9637a8ea02eaff7adc2d0fb4e04841146", size = 193424, upload-time = "2025-03-18T21:35:20.987Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/88/74/a88bf1b1efeae488a0c0b7bdf71429c313722d1fc0f377537fbe554e6180/pre_commit-4.2.0-py2.py3-none-any.whl", hash = "sha256:a009ca7205f1eb497d10b845e52c838a98b6cdd2102a6c8e4540e94ee75c58bd", size = 220707, upload-time = "2025-03-18T21:35:19.343Z" }, +] + +[[package]] +name = "pycparser" +version = "2.22" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1d/b2/31537cf4b1ca988837256c910a668b553fceb8f069bedc4b1c826024b52c/pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6", size = 172736, upload-time = "2024-03-30T13:22:22.564Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/13/a3/a812df4e2dd5696d1f351d58b8fe16a405b234ad2886a0dab9183fb78109/pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc", size = 117552, upload-time = "2024-03-30T13:22:20.476Z" }, +] + +[[package]] +name = "pysocks" +version = "1.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bd/11/293dd436aea955d45fc4e8a35b6ae7270f5b8e00b53cf6c024c83b657a11/PySocks-1.7.1.tar.gz", hash = "sha256:3f8804571ebe159c380ac6de37643bb4685970655d3bba243530d6558b799aa0", size = 284429, upload-time = "2019-09-20T02:07:35.714Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/59/b4572118e098ac8e46e399a1dd0f2d85403ce8bbaad9ec79373ed6badaf9/PySocks-1.7.1-py3-none-any.whl", hash = "sha256:2725bd0a9925919b9b51739eea5f9e2bae91e83288108a9ad338b2e3a4435ee5", size = 16725, upload-time = "2019-09-20T02:06:22.938Z" }, +] + +[[package]] +name = "python-dotenv" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/b0/4bc07ccd3572a2f9df7e6782f52b0c6c90dcbb803ac4a167702d7d0dfe1e/python_dotenv-1.1.1.tar.gz", hash = "sha256:a8a6399716257f45be6a007360200409fce5cda2661e3dec71d23dc15f6189ab", size = 41978, upload-time = "2025-06-24T04:21:07.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5f/ed/539768cf28c661b5b068d66d96a2f155c4971a5d55684a514c1a0e0dec2f/python_dotenv-1.1.1-py3-none-any.whl", hash = "sha256:31f23644fe2602f88ff55e1f5c79ba497e01224ee7737937930c448e4d0e24dc", size = 20556, upload-time = "2025-06-24T04:21:06.073Z" }, +] + +[[package]] +name = "pyyaml" +version = "6.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/54/ed/79a089b6be93607fa5cdaedf301d7dfb23af5f25c398d5ead2525b063e17/pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e", size = 130631, upload-time = "2024-08-06T20:33:50.674Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9b/95/a3fac87cb7158e231b5a6012e438c647e1a87f09f8e0d123acec8ab8bf71/PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086", size = 184199, upload-time = "2024-08-06T20:31:40.178Z" }, + { url = "https://files.pythonhosted.org/packages/c7/7a/68bd47624dab8fd4afbfd3c48e3b79efe09098ae941de5b58abcbadff5cb/PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf", size = 171758, upload-time = "2024-08-06T20:31:42.173Z" }, + { url = "https://files.pythonhosted.org/packages/49/ee/14c54df452143b9ee9f0f29074d7ca5516a36edb0b4cc40c3f280131656f/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237", size = 718463, upload-time = "2024-08-06T20:31:44.263Z" }, + { url = "https://files.pythonhosted.org/packages/4d/61/de363a97476e766574650d742205be468921a7b532aa2499fcd886b62530/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b", size = 719280, upload-time = "2024-08-06T20:31:50.199Z" }, + { url = "https://files.pythonhosted.org/packages/6b/4e/1523cb902fd98355e2e9ea5e5eb237cbc5f3ad5f3075fa65087aa0ecb669/PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed", size = 751239, upload-time = "2024-08-06T20:31:52.292Z" }, + { url = "https://files.pythonhosted.org/packages/b7/33/5504b3a9a4464893c32f118a9cc045190a91637b119a9c881da1cf6b7a72/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180", size = 695802, upload-time = "2024-08-06T20:31:53.836Z" }, + { url = "https://files.pythonhosted.org/packages/5c/20/8347dcabd41ef3a3cdc4f7b7a2aff3d06598c8779faa189cdbf878b626a4/PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68", size = 720527, upload-time = "2024-08-06T20:31:55.565Z" }, + { url = "https://files.pythonhosted.org/packages/be/aa/5afe99233fb360d0ff37377145a949ae258aaab831bde4792b32650a4378/PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99", size = 144052, upload-time = "2024-08-06T20:31:56.914Z" }, + { url = "https://files.pythonhosted.org/packages/b5/84/0fa4b06f6d6c958d207620fc60005e241ecedceee58931bb20138e1e5776/PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e", size = 161774, upload-time = "2024-08-06T20:31:58.304Z" }, + { url = "https://files.pythonhosted.org/packages/f8/aa/7af4e81f7acba21a4c6be026da38fd2b872ca46226673c89a758ebdc4fd2/PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774", size = 184612, upload-time = "2024-08-06T20:32:03.408Z" }, + { url = "https://files.pythonhosted.org/packages/8b/62/b9faa998fd185f65c1371643678e4d58254add437edb764a08c5a98fb986/PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee", size = 172040, upload-time = "2024-08-06T20:32:04.926Z" }, + { url = "https://files.pythonhosted.org/packages/ad/0c/c804f5f922a9a6563bab712d8dcc70251e8af811fce4524d57c2c0fd49a4/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c", size = 736829, upload-time = "2024-08-06T20:32:06.459Z" }, + { url = "https://files.pythonhosted.org/packages/51/16/6af8d6a6b210c8e54f1406a6b9481febf9c64a3109c541567e35a49aa2e7/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317", size = 764167, upload-time = "2024-08-06T20:32:08.338Z" }, + { url = "https://files.pythonhosted.org/packages/75/e4/2c27590dfc9992f73aabbeb9241ae20220bd9452df27483b6e56d3975cc5/PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85", size = 762952, upload-time = "2024-08-06T20:32:14.124Z" }, + { url = "https://files.pythonhosted.org/packages/9b/97/ecc1abf4a823f5ac61941a9c00fe501b02ac3ab0e373c3857f7d4b83e2b6/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4", size = 735301, upload-time = "2024-08-06T20:32:16.17Z" }, + { url = "https://files.pythonhosted.org/packages/45/73/0f49dacd6e82c9430e46f4a027baa4ca205e8b0a9dce1397f44edc23559d/PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e", size = 756638, upload-time = "2024-08-06T20:32:18.555Z" }, + { url = "https://files.pythonhosted.org/packages/22/5f/956f0f9fc65223a58fbc14459bf34b4cc48dec52e00535c79b8db361aabd/PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5", size = 143850, upload-time = "2024-08-06T20:32:19.889Z" }, + { url = "https://files.pythonhosted.org/packages/ed/23/8da0bbe2ab9dcdd11f4f4557ccaf95c10b9811b13ecced089d43ce59c3c8/PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44", size = 161980, upload-time = "2024-08-06T20:32:21.273Z" }, + { url = "https://files.pythonhosted.org/packages/86/0c/c581167fc46d6d6d7ddcfb8c843a4de25bdd27e4466938109ca68492292c/PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab", size = 183873, upload-time = "2024-08-06T20:32:25.131Z" }, + { url = "https://files.pythonhosted.org/packages/a8/0c/38374f5bb272c051e2a69281d71cba6fdb983413e6758b84482905e29a5d/PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725", size = 173302, upload-time = "2024-08-06T20:32:26.511Z" }, + { url = "https://files.pythonhosted.org/packages/c3/93/9916574aa8c00aa06bbac729972eb1071d002b8e158bd0e83a3b9a20a1f7/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5", size = 739154, upload-time = "2024-08-06T20:32:28.363Z" }, + { url = "https://files.pythonhosted.org/packages/95/0f/b8938f1cbd09739c6da569d172531567dbcc9789e0029aa070856f123984/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425", size = 766223, upload-time = "2024-08-06T20:32:30.058Z" }, + { url = "https://files.pythonhosted.org/packages/b9/2b/614b4752f2e127db5cc206abc23a8c19678e92b23c3db30fc86ab731d3bd/PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476", size = 767542, upload-time = "2024-08-06T20:32:31.881Z" }, + { url = "https://files.pythonhosted.org/packages/d4/00/dd137d5bcc7efea1836d6264f049359861cf548469d18da90cd8216cf05f/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48", size = 731164, upload-time = "2024-08-06T20:32:37.083Z" }, + { url = "https://files.pythonhosted.org/packages/c9/1f/4f998c900485e5c0ef43838363ba4a9723ac0ad73a9dc42068b12aaba4e4/PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b", size = 756611, upload-time = "2024-08-06T20:32:38.898Z" }, + { url = "https://files.pythonhosted.org/packages/df/d1/f5a275fdb252768b7a11ec63585bc38d0e87c9e05668a139fea92b80634c/PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4", size = 140591, upload-time = "2024-08-06T20:32:40.241Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e8/4f648c598b17c3d06e8753d7d13d57542b30d56e6c2dedf9c331ae56312e/PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8", size = 156338, upload-time = "2024-08-06T20:32:41.93Z" }, + { url = "https://files.pythonhosted.org/packages/ef/e3/3af305b830494fa85d95f6d95ef7fa73f2ee1cc8ef5b495c7c3269fb835f/PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba", size = 181309, upload-time = "2024-08-06T20:32:43.4Z" }, + { url = "https://files.pythonhosted.org/packages/45/9f/3b1c20a0b7a3200524eb0076cc027a970d320bd3a6592873c85c92a08731/PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1", size = 171679, upload-time = "2024-08-06T20:32:44.801Z" }, + { url = "https://files.pythonhosted.org/packages/7c/9a/337322f27005c33bcb656c655fa78325b730324c78620e8328ae28b64d0c/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133", size = 733428, upload-time = "2024-08-06T20:32:46.432Z" }, + { url = "https://files.pythonhosted.org/packages/a3/69/864fbe19e6c18ea3cc196cbe5d392175b4cf3d5d0ac1403ec3f2d237ebb5/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484", size = 763361, upload-time = "2024-08-06T20:32:51.188Z" }, + { url = "https://files.pythonhosted.org/packages/04/24/b7721e4845c2f162d26f50521b825fb061bc0a5afcf9a386840f23ea19fa/PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5", size = 759523, upload-time = "2024-08-06T20:32:53.019Z" }, + { url = "https://files.pythonhosted.org/packages/2b/b2/e3234f59ba06559c6ff63c4e10baea10e5e7df868092bf9ab40e5b9c56b6/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc", size = 726660, upload-time = "2024-08-06T20:32:54.708Z" }, + { url = "https://files.pythonhosted.org/packages/fe/0f/25911a9f080464c59fab9027482f822b86bf0608957a5fcc6eaac85aa515/PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652", size = 751597, upload-time = "2024-08-06T20:32:56.985Z" }, + { url = "https://files.pythonhosted.org/packages/14/0d/e2c3b43bbce3cf6bd97c840b46088a3031085179e596d4929729d8d68270/PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183", size = 140527, upload-time = "2024-08-06T20:33:03.001Z" }, + { url = "https://files.pythonhosted.org/packages/fa/de/02b54f42487e3d3c6efb3f89428677074ca7bf43aae402517bc7cca949f3/PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563", size = 156446, upload-time = "2024-08-06T20:33:04.33Z" }, +] + +[[package]] +name = "requests" +version = "2.32.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "charset-normalizer" }, + { name = "idna" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e1/0a/929373653770d8a0d7ea76c37de6e41f11eb07559b103b1c02cafb3f7cf8/requests-2.32.4.tar.gz", hash = "sha256:27d0316682c8a29834d3264820024b62a36942083d52caf2f14c0591336d3422", size = 135258, upload-time = "2025-06-09T16:43:07.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847, upload-time = "2025-06-09T16:43:05.728Z" }, +] + +[[package]] +name = "ruff" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/97/38/796a101608a90494440856ccfb52b1edae90de0b817e76bfade66b12d320/ruff-0.12.1.tar.gz", hash = "sha256:806bbc17f1104fd57451a98a58df35388ee3ab422e029e8f5cf30aa4af2c138c", size = 4413426, upload-time = "2025-06-26T20:34:14.784Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/bf/3dba52c1d12ab5e78d75bd78ad52fb85a6a1f29cc447c2423037b82bed0d/ruff-0.12.1-py3-none-linux_armv6l.whl", hash = "sha256:6013a46d865111e2edb71ad692fbb8262e6c172587a57c0669332a449384a36b", size = 10305649, upload-time = "2025-06-26T20:33:39.242Z" }, + { url = "https://files.pythonhosted.org/packages/8c/65/dab1ba90269bc8c81ce1d499a6517e28fe6f87b2119ec449257d0983cceb/ruff-0.12.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b3f75a19e03a4b0757d1412edb7f27cffb0c700365e9d6b60bc1b68d35bc89e0", size = 11120201, upload-time = "2025-06-26T20:33:42.207Z" }, + { url = "https://files.pythonhosted.org/packages/3f/3e/2d819ffda01defe857fa2dd4cba4d19109713df4034cc36f06bbf582d62a/ruff-0.12.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:9a256522893cb7e92bb1e1153283927f842dea2e48619c803243dccc8437b8be", size = 10466769, upload-time = "2025-06-26T20:33:44.102Z" }, + { url = "https://files.pythonhosted.org/packages/63/37/bde4cf84dbd7821c8de56ec4ccc2816bce8125684f7b9e22fe4ad92364de/ruff-0.12.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:069052605fe74c765a5b4272eb89880e0ff7a31e6c0dbf8767203c1fbd31c7ff", size = 10660902, upload-time = "2025-06-26T20:33:45.98Z" }, + { url = "https://files.pythonhosted.org/packages/0e/3a/390782a9ed1358c95e78ccc745eed1a9d657a537e5c4c4812fce06c8d1a0/ruff-0.12.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a684f125a4fec2d5a6501a466be3841113ba6847827be4573fddf8308b83477d", size = 10167002, upload-time = "2025-06-26T20:33:47.81Z" }, + { url = "https://files.pythonhosted.org/packages/6d/05/f2d4c965009634830e97ffe733201ec59e4addc5b1c0efa035645baa9e5f/ruff-0.12.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bdecdef753bf1e95797593007569d8e1697a54fca843d78f6862f7dc279e23bd", size = 11751522, upload-time = "2025-06-26T20:33:49.857Z" }, + { url = "https://files.pythonhosted.org/packages/35/4e/4bfc519b5fcd462233f82fc20ef8b1e5ecce476c283b355af92c0935d5d9/ruff-0.12.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:70d52a058c0e7b88b602f575d23596e89bd7d8196437a4148381a3f73fcd5010", size = 12520264, upload-time = "2025-06-26T20:33:52.199Z" }, + { url = "https://files.pythonhosted.org/packages/85/b2/7756a6925da236b3a31f234b4167397c3e5f91edb861028a631546bad719/ruff-0.12.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:84d0a69d1e8d716dfeab22d8d5e7c786b73f2106429a933cee51d7b09f861d4e", size = 12133882, upload-time = "2025-06-26T20:33:54.231Z" }, + { url = "https://files.pythonhosted.org/packages/dd/00/40da9c66d4a4d51291e619be6757fa65c91b92456ff4f01101593f3a1170/ruff-0.12.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6cc32e863adcf9e71690248607ccdf25252eeeab5193768e6873b901fd441fed", size = 11608941, upload-time = "2025-06-26T20:33:56.202Z" }, + { url = "https://files.pythonhosted.org/packages/91/e7/f898391cc026a77fbe68dfea5940f8213622474cb848eb30215538a2dadf/ruff-0.12.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7fd49a4619f90d5afc65cf42e07b6ae98bb454fd5029d03b306bd9e2273d44cc", size = 11602887, upload-time = "2025-06-26T20:33:58.47Z" }, + { url = "https://files.pythonhosted.org/packages/f6/02/0891872fc6aab8678084f4cf8826f85c5d2d24aa9114092139a38123f94b/ruff-0.12.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:ed5af6aaaea20710e77698e2055b9ff9b3494891e1b24d26c07055459bb717e9", size = 10521742, upload-time = "2025-06-26T20:34:00.465Z" }, + { url = "https://files.pythonhosted.org/packages/2a/98/d6534322c74a7d47b0f33b036b2498ccac99d8d8c40edadb552c038cecf1/ruff-0.12.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:801d626de15e6bf988fbe7ce59b303a914ff9c616d5866f8c79eb5012720ae13", size = 10149909, upload-time = "2025-06-26T20:34:02.603Z" }, + { url = "https://files.pythonhosted.org/packages/34/5c/9b7ba8c19a31e2b6bd5e31aa1e65b533208a30512f118805371dbbbdf6a9/ruff-0.12.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:2be9d32a147f98a1972c1e4df9a6956d612ca5f5578536814372113d09a27a6c", size = 11136005, upload-time = "2025-06-26T20:34:04.723Z" }, + { url = "https://files.pythonhosted.org/packages/dc/34/9bbefa4d0ff2c000e4e533f591499f6b834346025e11da97f4ded21cb23e/ruff-0.12.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:49b7ce354eed2a322fbaea80168c902de9504e6e174fd501e9447cad0232f9e6", size = 11648579, upload-time = "2025-06-26T20:34:06.766Z" }, + { url = "https://files.pythonhosted.org/packages/6f/1c/20cdb593783f8f411839ce749ec9ae9e4298c2b2079b40295c3e6e2089e1/ruff-0.12.1-py3-none-win32.whl", hash = "sha256:d973fa626d4c8267848755bd0414211a456e99e125dcab147f24daa9e991a245", size = 10519495, upload-time = "2025-06-26T20:34:08.718Z" }, + { url = "https://files.pythonhosted.org/packages/cf/56/7158bd8d3cf16394928f47c637d39a7d532268cd45220bdb6cd622985760/ruff-0.12.1-py3-none-win_amd64.whl", hash = "sha256:9e1123b1c033f77bd2590e4c1fe7e8ea72ef990a85d2484351d408224d603013", size = 11547485, upload-time = "2025-06-26T20:34:11.008Z" }, + { url = "https://files.pythonhosted.org/packages/91/d0/6902c0d017259439d6fd2fd9393cea1cfe30169940118b007d5e0ea7e954/ruff-0.12.1-py3-none-win_arm64.whl", hash = "sha256:78ad09a022c64c13cc6077707f036bab0fac8cd7088772dcd1e5be21c5002efc", size = 10691209, upload-time = "2025-06-26T20:34:12.928Z" }, +] + +[[package]] +name = "selenium" +version = "4.33.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "trio" }, + { name = "trio-websocket" }, + { name = "typing-extensions" }, + { name = "urllib3", extra = ["socks"] }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5f/7e/4145666dd275760b56d0123a9439915af167932dd6caa19b5f8b281ae297/selenium-4.33.0.tar.gz", hash = "sha256:d90974db95d2cdeb34d2fb1b13f03dc904f53e6c5d228745b0635ada10cd625d", size = 882387, upload-time = "2025-05-23T17:45:22.046Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/c0/092fde36918574e144613de73ba43c36ab8d31e7d36bb44c35261909452d/selenium-4.33.0-py3-none-any.whl", hash = "sha256:af9ea757813918bddfe05cc677bf63c8a0cd277ebf8474b3dd79caa5727fca85", size = 9370835, upload-time = "2025-05-23T17:45:19.448Z" }, +] + +[[package]] +name = "sniffio" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" }, +] + +[[package]] +name = "sortedcontainers" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" }, +] + +[[package]] +name = "trio" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" }, + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "idna" }, + { name = "outcome" }, + { name = "sniffio" }, + { name = "sortedcontainers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/01/c1/68d582b4d3a1c1f8118e18042464bb12a7c1b75d64d75111b297687041e3/trio-0.30.0.tar.gz", hash = "sha256:0781c857c0c81f8f51e0089929a26b5bb63d57f927728a5586f7e36171f064df", size = 593776, upload-time = "2025-04-21T00:48:19.507Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/8e/3f6dfda475ecd940e786defe6df6c500734e686c9cd0a0f8ef6821e9b2f2/trio-0.30.0-py3-none-any.whl", hash = "sha256:3bf4f06b8decf8d3cf00af85f40a89824669e2d033bb32469d34840edcfc22a5", size = 499194, upload-time = "2025-04-21T00:48:17.167Z" }, +] + +[[package]] +name = "trio-websocket" +version = "0.12.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, + { name = "outcome" }, + { name = "trio" }, + { name = "wsproto" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d1/3c/8b4358e81f2f2cfe71b66a267f023a91db20a817b9425dd964873796980a/trio_websocket-0.12.2.tar.gz", hash = "sha256:22c72c436f3d1e264d0910a3951934798dcc5b00ae56fc4ee079d46c7cf20fae", size = 33549, upload-time = "2025-02-25T05:16:58.947Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c7/19/eb640a397bba49ba49ef9dbe2e7e5c04202ba045b6ce2ec36e9cadc51e04/trio_websocket-0.12.2-py3-none-any.whl", hash = "sha256:df605665f1db533f4a386c94525870851096a223adcb97f72a07e8b4beba45b6", size = 21221, upload-time = "2025-02-25T05:16:57.545Z" }, +] + +[[package]] +name = "typing-extensions" +version = "4.13.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967, upload-time = "2025-04-10T14:19:05.416Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806, upload-time = "2025-04-10T14:19:03.967Z" }, +] + +[[package]] +name = "urllib3" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672, upload-time = "2025-04-10T15:23:39.232Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680, upload-time = "2025-04-10T15:23:37.377Z" }, +] + +[package.optional-dependencies] +socks = [ + { name = "pysocks" }, +] + +[[package]] +name = "virtualenv" +version = "20.31.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "distlib" }, + { name = "filelock" }, + { name = "platformdirs" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/2c/444f465fb2c65f40c3a104fd0c495184c4f2336d65baf398e3c75d72ea94/virtualenv-20.31.2.tar.gz", hash = "sha256:e10c0a9d02835e592521be48b332b6caee6887f332c111aa79a09b9e79efc2af", size = 6076316, upload-time = "2025-05-08T17:58:23.811Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f3/40/b1c265d4b2b62b58576588510fc4d1fe60a86319c8de99fd8e9fec617d2c/virtualenv-20.31.2-py3-none-any.whl", hash = "sha256:36efd0d9650ee985f0cad72065001e66d49a6f24eb44d98980f630686243cf11", size = 6057982, upload-time = "2025-05-08T17:58:21.15Z" }, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e6/30/fba0d96b4b5fbf5948ed3f4681f7da2f9f64512e1d303f94b4cc174c24a5/websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da", size = 54648, upload-time = "2024-04-23T22:16:16.976Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/84/44687a29792a70e111c5c477230a72c4b957d88d16141199bf9acb7537a3/websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526", size = 58826, upload-time = "2024-04-23T22:16:14.422Z" }, +] + +[[package]] +name = "wsproto" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c9/4a/44d3c295350d776427904d73c189e10aeae66d7f555bb2feee16d1e4ba5a/wsproto-1.2.0.tar.gz", hash = "sha256:ad565f26ecb92588a3e43bc3d96164de84cd9902482b130d0ddbaa9664a85065", size = 53425, upload-time = "2022-08-23T19:58:21.447Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/58/e860788190eba3bcce367f74d29c4675466ce8dddfba85f7827588416f01/wsproto-1.2.0-py3-none-any.whl", hash = "sha256:b9acddd652b585d75b20477888c56642fdade28bdfd3579aa24a4d2c037dd736", size = 24226, upload-time = "2022-08-23T19:58:19.96Z" }, +] From b3b85e175b6f1812b9ce49333162db77692709cd Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 28 Jun 2025 18:01:59 -0400 Subject: [PATCH 02/14] fix(pre-commit): update ruff hook arguments to remove exit code option --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3dc49f5..c527309 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,5 +13,5 @@ repos: rev: v0.12.1 hooks: - id: ruff - args: [--fix, --exit-non-zero-on-fix] + args: [--fix] - id: ruff-format From 6c54c194d2c632bc2fd2b59afcba29edc73da738 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 28 Jun 2025 20:40:09 -0400 Subject: [PATCH 03/14] fix(docs): add back README.rst --- README.rst | 352 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 352 insertions(+) create mode 100644 README.rst diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..56cafb9 --- /dev/null +++ b/README.rst @@ -0,0 +1,352 @@ +# Linkedin Scraper + +Scrapes Linkedin User Data + +[Linkedin Scraper](#linkedin-scraper) +- [Linkedin Scraper](#linkedin-scraper) + - [Installation](#installation) + - [Development](#development) + - [Running Scripts](#running-scripts) + - [Contributing](#contributing) + - [Development Workflow](#development-workflow) + - [Setup](#setup) + - [Environment Variables](#environment-variables) + - [ChromeDriver](#chromedriver) + - [Sponsor](#sponsor) + - [Usage](#usage) + - [Sample Usage](#sample-usage) + - [User Scraping](#user-scraping) + - [Company Scraping](#company-scraping) + - [Job Scraping](#job-scraping) + - [Job Search Scraping](#job-search-scraping) + - [Scraping sites where login is required first](#scraping-sites-where-login-is-required-first) + - [Scraping sites and login automatically](#scraping-sites-and-login-automatically) + - [API](#api) + - [Person](#person) + - [`linkedin_url`](#linkedin_url) + - [`name`](#name) + - [`about`](#about) + - [`experiences`](#experiences) + - [`educations`](#educations) + - [`interests`](#interests) + - [`accomplishment`](#accomplishment) + - [`company`](#company) + - [`job_title`](#job_title) + - [`driver`](#driver) + - [`scrape`](#scrape) + - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue) + - [Company](#company-1) + - [`linkedin_url`](#linkedin_url-1) + - [`name`](#name-1) + - [`about_us`](#about_us) + - [`website`](#website) + - [`phone`](#phone) + - [`headquarters`](#headquarters) + - [`founded`](#founded) + - [`company_type`](#company_type) + - [`company_size`](#company_size) + - [`specialties`](#specialties) + - [`showcase_pages`](#showcase_pages) + - [`affiliated_companies`](#affiliated_companies) + - [`driver`](#driver-1) + - [`get_employees`](#get_employees) + - [`scrape(close_on_complete=True)`](#scrapeclose_on_completetrue-1) + - [Contribution](#contribution) + +## Installation + +```bash +# Clone the repository +git clone https://github.com/joeyism/linkedin_scraper.git +cd linkedin_scraper + +# Install dependencies +uv sync +``` + +## Development + +### Running Scripts + +```bash +# Run a Python script with the project dependencies +uv run your_script.py + +# Or activate the virtual environment +uv shell +python your_script.py +``` + +## Contributing + +If you want to contribute to this project: + +1. **Install development dependencies**: + ```bash + uv sync --dev + ``` + +2. **Set up pre-commit hooks** (automatically runs linting/formatting on commit): + ```bash + uv run pre-commit install + ``` + +3. **Environment setup for testing**: + ```bash + cp .env.example .env + # Edit .env with your LinkedIn credentials for login + ``` + +### Development Workflow + +With pre-commit installed, your code will be automatically linted and formatted on every commit. + +## Setup + +### Environment Variables +Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: +```bash +cp .env.example .env +# Edit .env with your credentials +``` + +### ChromeDriver +The project will automatically use chromedriver from your system PATH. If you need to specify a custom location: + +```bash +export CHROMEDRIVER=~/chromedriver +``` + +## Sponsor +Message me if you'd like to sponsor me + +## Usage +To use it, just create the class. + +### Sample Usage +```python +import os +from dotenv import load_dotenv +from linkedin_scraper import Person, actions +from selenium import webdriver + +# Load environment variables +load_dotenv() + +driver = webdriver.Chrome() + +# Use credentials from .env file +email = os.getenv("LINKEDIN_EMAIL") +password = os.getenv("LINKEDIN_PASSWORD") +actions.login(driver, email, password) +person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver) +``` + + +**NOTE**: The account used to log-in should have it's language set English to make sure everything works as expected. + +### User Scraping +```python +from linkedin_scraper import Person +person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5") +``` + +### Company Scraping +```python +from linkedin_scraper import Company +company = Company("https://ca.linkedin.com/company/google") +``` + +### Job Scraping +```python +from linkedin_scraper import Job, actions +from selenium import webdriver + +driver = webdriver.Chrome() +email = "some-email@email.address" +password = "password123" +actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +input("Press Enter") +job = Job("https://www.linkedin.com/jobs/collections/recommended/?currentJobId=3456898261", driver=driver, close_on_complete=False) +``` + +### Job Search Scraping +```python +from linkedin_scraper import JobSearch, actions +from selenium import webdriver + +driver = webdriver.Chrome() +email = "some-email@email.address" +password = "password123" +actions.login(driver, email, password) # if email and password isnt given, it'll prompt in terminal +input("Press Enter") +job_search = JobSearch(driver=driver, close_on_complete=False, scrape=False) +# job_search contains jobs from your logged in front page: +# - job_search.recommended_jobs +# - job_search.still_hiring +# - job_search.more_jobs + +job_listings = job_search.search("Machine Learning Engineer") # returns the list of `Job` from the first page +``` + +### Scraping sites where login is required first +1. Run `ipython` or `python` +2. In `ipython`/`python`, run the following code (you can modify it if you need to specify your driver) +3. +```python +from linkedin_scraper import Person +from selenium import webdriver +driver = webdriver.Chrome() +person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = driver, scrape=False) +``` +4. Login to Linkedin +5. [OPTIONAL] Logout of Linkedin +6. In the same `ipython`/`python` code, run +```python +person.scrape() +``` + +The reason is that LinkedIn has recently blocked people from viewing certain profiles without having previously signed in. So by setting `scrape=False`, it doesn't automatically scrape the profile, but Chrome will open the linkedin page anyways. You can login and logout, and the cookie will stay in the browser and it won't affect your profile views. Then when you run `person.scrape()`, it'll scrape and close the browser. If you want to keep the browser on so you can scrape others, run it as + +**NOTE**: For version >= `2.1.0`, scraping can also occur while logged in. Beware that users will be able to see that you viewed their profile. + +```python +person.scrape(close_on_complete=False) +``` +so it doesn't close. + +### Scraping sites and login automatically +From verison **2.4.0** on, `actions` is a part of the library that allows signing into Linkedin first. The email and password can be provided as a variable into the function. If not provided, both will be prompted in terminal. + +```python +import os +from dotenv import load_dotenv +from linkedin_scraper import Person, actions +from selenium import webdriver + +# Load environment variables +load_dotenv() + +driver = webdriver.Chrome() +email = os.getenv("LINKEDIN_EMAIL") +password = os.getenv("LINKEDIN_PASSWORD") +actions.login(driver, email, password) +person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) +``` + + +## API + +### Person +A Person object can be created with the following inputs: + +```python +Person(linkedin_url=None, name=None, about=[], experiences=[], educations=[], interests=[], accomplishments=[], company=None, job_title=None, driver=None, scrape=True) +``` +#### `linkedin_url` +This is the linkedin url of their profile + +#### `name` +This is the name of the person + +#### `about` +This is the small paragraph about the person + +#### `experiences` +This is the past experiences they have. A list of `linkedin_scraper.scraper.Experience` + +#### `educations` +This is the past educations they have. A list of `linkedin_scraper.scraper.Education` + +#### `interests` +This is the interests they have. A list of `linkedin_scraper.scraper.Interest` + +#### `accomplishment` +This is the accomplishments they have. A list of `linkedin_scraper.scraper.Accomplishment` + +#### `company` +This the most recent company or institution they have worked at. + +#### `job_title` +This the most recent job title they have. + +#### `driver` +This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead. + +For example +```python +driver = webdriver.Chrome() +person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver = driver) +``` + +#### `scrape` +When this is **True**, the scraping happens automatically. To scrape afterwards, that can be run by the `scrape()` function from the `Person` object. + + +#### `scrape(close_on_complete=True)` +This is the meat of the code, where execution of this function scrapes the profile. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other profiles are desired, then you might want to set that to false so you can keep using the same driver. + + + + +### Company + +```python +Company(linkedin_url=None, name=None, about_us=None, website=None, phone=None, headquarters=None, founded=None, company_type=None, company_size=None, specialties=None, showcase_pages=[], affiliated_companies=[], driver=None, scrape=True, get_employees=True) +``` + +#### `linkedin_url` +This is the linkedin url of their profile + +#### `name` +This is the name of the company + +#### `about_us` +The description of the company + +#### `website` +The website of the company + +#### `phone` +The phone of the company + +#### `headquarters` +The headquarters location of the company + +#### `founded` +When the company was founded + +#### `company_type` +The type of the company + +#### `company_size` +How many people are employeed at the company + +#### `specialties` +What the company specializes in + +#### `showcase_pages` +Pages that the company owns to showcase their products + +#### `affiliated_companies` +Other companies that are affiliated with this one + +#### `driver` +This is the driver from which to scraper the Linkedin profile. A driver using Chrome is created by default. However, if a driver is passed in, that will be used instead. + +#### `get_employees` +Whether to get all the employees of company + +For example +```python +driver = webdriver.Chrome() +company = Company("https://ca.linkedin.com/company/google", driver=driver) +``` + + +#### `scrape(close_on_complete=True)` +This is the meat of the code, where execution of this function scrapes the company. If *close_on_complete* is True (which it is by default), then the browser will close upon completion. If scraping of other companies are desired, then you might want to set that to false so you can keep using the same driver. + +## Contribution + +Buy Me A Coffee From f07f7bcb5c738bd89a599c96b0662913f69c53ca Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Fri, 4 Jul 2025 14:46:46 -0400 Subject: [PATCH 04/14] feat(examples): tested and refactored person profile and contacts examples --- examples/person_contacts_example.py | 31 +++++++++++++++++++++++++++++ examples/person_profile_example.py | 23 +++++++++++++++++++++ linkedin_scraper/company.py | 25 ++++++++++++++++++++--- samples/company_profile_example.py | 23 +++++++++++++++++++++ samples/scrape_person.py | 14 ------------- samples/scrape_person_contacts.py | 25 ----------------------- 6 files changed, 99 insertions(+), 42 deletions(-) create mode 100644 examples/person_contacts_example.py create mode 100644 examples/person_profile_example.py create mode 100644 samples/company_profile_example.py delete mode 100644 samples/scrape_person.py delete mode 100644 samples/scrape_person_contacts.py diff --git a/examples/person_contacts_example.py b/examples/person_contacts_example.py new file mode 100644 index 0000000..43068a2 --- /dev/null +++ b/examples/person_contacts_example.py @@ -0,0 +1,31 @@ +import os + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Person, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + # Example profile + person = Person( + "https://www.linkedin.com/in/stickerdaniel/", contacts=[], driver=driver + ) + + for contact in person.contacts: + print( + "Contact: " + + contact.name + + " - " + + contact.occupation + + " -> " + + contact.url + ) + + +if __name__ == "__main__": + main() diff --git a/examples/person_profile_example.py b/examples/person_profile_example.py new file mode 100644 index 0000000..095094d --- /dev/null +++ b/examples/person_profile_example.py @@ -0,0 +1,23 @@ +import os +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Person, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + # Example profile + person = Person("https://www.linkedin.com/in/stickerdaniel/", driver=driver) + + print("Person Profile:") + pprint(vars(person)) + + +if __name__ == "__main__": + main() diff --git a/linkedin_scraper/company.py b/linkedin_scraper/company.py index fd9e07d..ec1fb22 100644 --- a/linkedin_scraper/company.py +++ b/linkedin_scraper/company.py @@ -153,11 +153,13 @@ def __parse_employee__(self, employee_raw): return None def get_employees(self, wait_time=10): + print("DEBUG: Starting get_employees") total = [] list_css = "list-style-none" next_xpath = '//button[@aria-label="Next"]' driver = self.driver + print(f"DEBUG: Navigating to {os.path.join(self.linkedin_url, 'people')}") try: driver.find_element( By.XPATH, '//a[@data-control-name="topcard_see_all_employees"]' @@ -166,9 +168,24 @@ def get_employees(self, wait_time=10): pass driver.get(os.path.join(self.linkedin_url, "people")) - _ = WebDriverWait(driver, 3).until( - EC.presence_of_all_elements_located((By.XPATH, '//span[@dir="ltr"]')) - ) + print("DEBUG: Waiting for span elements to load") + try: + _ = WebDriverWait(driver, 3).until( + EC.presence_of_all_elements_located((By.XPATH, '//span[@dir="ltr"]')) + ) + print("DEBUG: Span elements loaded successfully") + except: + print( + "DEBUG: Failed to find span elements with dir='ltr', trying alternative wait" + ) + try: + _ = WebDriverWait(driver, 3).until( + EC.presence_of_element_located((By.TAG_NAME, "main")) + ) + print("DEBUG: Main element loaded successfully") + except: + print("DEBUG: Failed to find main element, continuing anyway") + time.sleep(2) driver.execute_script( "window.scrollTo(0, Math.ceil(document.body.scrollHeight/2));" @@ -373,7 +390,9 @@ def scrape_logged_in(self, get_employees=True, close_on_complete=True): pass if get_employees: + print("DEBUG: About to call get_employees") self.employees = self.get_employees() + print("DEBUG: get_employees completed") driver.get(self.linkedin_url) diff --git a/samples/company_profile_example.py b/samples/company_profile_example.py new file mode 100644 index 0000000..31ea5e6 --- /dev/null +++ b/samples/company_profile_example.py @@ -0,0 +1,23 @@ +import os +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Company, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + # Example company - replace with any LinkedIn company URL + company = Company("https://www.linkedin.com/company/google/", driver=driver) + + print("Company Profile:") + pprint(vars(company)) + + +if __name__ == "__main__": + main() diff --git a/samples/scrape_person.py b/samples/scrape_person.py deleted file mode 100644 index 2bae31a..0000000 --- a/samples/scrape_person.py +++ /dev/null @@ -1,14 +0,0 @@ -import os - -from selenium import webdriver - -from linkedin_scraper import Person, actions - -driver = webdriver.Chrome("./chromedriver") - -email = os.getenv("LINKEDIN_USER") -password = os.getenv("LINKEDIN_PASSWORD") -actions.login( - driver, email, password -) # if email and password isnt given, it'll prompt in terminal -person = Person("https://www.linkedin.com/in/andre-iguodala-65b48ab5", driver=driver) diff --git a/samples/scrape_person_contacts.py b/samples/scrape_person_contacts.py deleted file mode 100644 index a6e0feb..0000000 --- a/samples/scrape_person_contacts.py +++ /dev/null @@ -1,25 +0,0 @@ -import os - -from selenium import webdriver -from selenium.webdriver.chrome.options import Options - -from linkedin_scraper import Person, actions - -chrome_options = Options() -chrome_options.add_argument("--headless") -driver = webdriver.Chrome("./chromedriver", options=chrome_options) - -email = os.getenv("LINKEDIN_USER") -password = os.getenv("LINKEDIN_PASSWORD") -actions.login( - driver, email, password -) # if email and password isnt given, it'll prompt in terminal -person = Person("https://www.linkedin.com/in/adrian0350", contacts=[], driver=driver) - -print("Person: " + person.name) -print("Person contacts: ") - -for contact in person.contacts: - print( - "Contact: " + contact.name + " - " + contact.occupation + " -> " + contact.url - ) From 7e90e261806b59aa134129503611b990b5daa514 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Fri, 4 Jul 2025 14:48:47 -0400 Subject: [PATCH 05/14] feat(examples): add company profile example script and resolve company people scraping bug --- .../company_profile_example.py | 0 linkedin_scraper/company.py | 18 +++++++++--------- 2 files changed, 9 insertions(+), 9 deletions(-) rename {samples => examples}/company_profile_example.py (100%) diff --git a/samples/company_profile_example.py b/examples/company_profile_example.py similarity index 100% rename from samples/company_profile_example.py rename to examples/company_profile_example.py diff --git a/linkedin_scraper/company.py b/linkedin_scraper/company.py index ec1fb22..bf5dea4 100644 --- a/linkedin_scraper/company.py +++ b/linkedin_scraper/company.py @@ -46,9 +46,9 @@ class Company(Scraper): company_type = None company_size = None specialties = None - showcase_pages = [] - affiliated_companies = [] - employees = [] + showcase_pages: list = [] + affiliated_companies: list = [] + employees: list = [] headcount = None def __init__( @@ -153,13 +153,13 @@ def __parse_employee__(self, employee_raw): return None def get_employees(self, wait_time=10): - print("DEBUG: Starting get_employees") + # print("DEBUG: Starting get_employees") total = [] list_css = "list-style-none" next_xpath = '//button[@aria-label="Next"]' driver = self.driver - print(f"DEBUG: Navigating to {os.path.join(self.linkedin_url, 'people')}") + # print(f"DEBUG: Navigating to {os.path.join(self.linkedin_url, 'people')}") try: driver.find_element( By.XPATH, '//a[@data-control-name="topcard_see_all_employees"]' @@ -168,12 +168,12 @@ def get_employees(self, wait_time=10): pass driver.get(os.path.join(self.linkedin_url, "people")) - print("DEBUG: Waiting for span elements to load") + # print("DEBUG: Waiting for span elements to load") try: _ = WebDriverWait(driver, 3).until( EC.presence_of_all_elements_located((By.XPATH, '//span[@dir="ltr"]')) ) - print("DEBUG: Span elements loaded successfully") + # print("DEBUG: Span elements loaded successfully") except: print( "DEBUG: Failed to find span elements with dir='ltr', trying alternative wait" @@ -182,9 +182,9 @@ def get_employees(self, wait_time=10): _ = WebDriverWait(driver, 3).until( EC.presence_of_element_located((By.TAG_NAME, "main")) ) - print("DEBUG: Main element loaded successfully") + # print("DEBUG: Main element loaded successfully") except: - print("DEBUG: Failed to find main element, continuing anyway") + # print("DEBUG: Failed to find main element, continuing anyway") time.sleep(2) driver.execute_script( From d7d7d23a4f025f9694d8ff20e397c178f61600f1 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Fri, 4 Jul 2025 14:56:30 -0400 Subject: [PATCH 06/14] feat(examples): add job details example script and fix stale element reference issue --- examples/job_details_example.py | 30 ++++++++++++++++++++++++++++++ linkedin_scraper/jobs.py | 16 +++++++++++++--- 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 examples/job_details_example.py diff --git a/examples/job_details_example.py b/examples/job_details_example.py new file mode 100644 index 0000000..1bd5664 --- /dev/null +++ b/examples/job_details_example.py @@ -0,0 +1,30 @@ +import os +import sys +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Job, actions + + +def main(): + if len(sys.argv) < 2: + print("Usage: python job_details_example.py ") + print( + "Example: python job_details_example.py 'https://www.linkedin.com/jobs/view/3543210987'" + ) + sys.exit(1) + + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + job = Job(sys.argv[1], driver=driver) + + print("Job Details:") + pprint(vars(job)) + + +if __name__ == "__main__": + main() diff --git a/linkedin_scraper/jobs.py b/linkedin_scraper/jobs.py index be8fea5..26031b7 100644 --- a/linkedin_scraper/jobs.py +++ b/linkedin_scraper/jobs.py @@ -75,10 +75,20 @@ def scrape_logged_in(self, close_on_complete=True): .find_element(By.TAG_NAME, "a") .get_attribute("href") ) - primary_descriptions = self.wait_for_element_to_load( + primary_descriptions_elem = self.wait_for_element_to_load( name="job-details-jobs-unified-top-card__primary-description-container" - ).find_elements(By.TAG_NAME, "span") - texts = [span.text for span in primary_descriptions if span.text.strip() != ""] + ) + primary_descriptions = primary_descriptions_elem.find_elements( + By.TAG_NAME, "span" + ) + texts = [] + for span in primary_descriptions: + try: + text = span.text.strip() + if text: + texts.append(text) + except: + continue self.location = texts[0] self.posted_date = texts[3] From c6a5c85a1894d1197e14b01a26bd9c8d81433130 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Fri, 4 Jul 2025 15:02:32 -0400 Subject: [PATCH 07/14] feat(examples): fix job scraping with new selectors and add search / recommended jobs examples --- .claude/settings.local.json | 3 +- examples/job_search_example.py | 33 +++++++ examples/recommended_jobs_example.py | 34 ++++++++ linkedin_scraper/job_search.py | 126 +++++++++++++++------------ 4 files changed, 138 insertions(+), 58 deletions(-) create mode 100644 examples/job_search_example.py create mode 100644 examples/recommended_jobs_example.py diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 44b1176..60ca00c 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -2,7 +2,8 @@ "permissions": { "allow": [ "WebFetch(domain:docs.astral.sh)", - "WebFetch(domain:pre-commit.com)" + "WebFetch(domain:pre-commit.com)", + "Bash(uv run ruff check:*)" ], "deny": [] } diff --git a/examples/job_search_example.py b/examples/job_search_example.py new file mode 100644 index 0000000..994720b --- /dev/null +++ b/examples/job_search_example.py @@ -0,0 +1,33 @@ +import os +import sys +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import JobSearch, actions + + +def main(): + if len(sys.argv) < 2: + print("Usage: python job_search_example.py ") + print("Example: python job_search_example.py 'python developer'") + sys.exit(1) + + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + keywords = sys.argv[1] + + job_search = JobSearch(driver=driver, close_on_complete=False, scrape=False) + jobs = job_search.search(keywords) + + print(f"Job Search Results for '{keywords}':") + for i, job in enumerate(jobs): + print(f"\n--- Job {i + 1} ---") + pprint(vars(job)) + + +if __name__ == "__main__": + main() diff --git a/examples/recommended_jobs_example.py b/examples/recommended_jobs_example.py new file mode 100644 index 0000000..4e3b778 --- /dev/null +++ b/examples/recommended_jobs_example.py @@ -0,0 +1,34 @@ +import os +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import JobSearch, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + + # Create JobSearch instance and scrape recommended jobs + job_search = JobSearch( + driver=driver, + close_on_complete=False, + scrape=True, + scrape_recommended_jobs=True, + ) + recommended_jobs = getattr(job_search, "recommended_jobs", []) + + print("Recommended Jobs:") + if recommended_jobs: + for i, job in enumerate(recommended_jobs): + print(f"\n--- Recommendation {i + 1} ---") + pprint(vars(job)) + else: + print("No recommended jobs found.") + + +if __name__ == "__main__": + main() diff --git a/linkedin_scraper/job_search.py b/linkedin_scraper/job_search.py index ea7f1b8..cb80994 100644 --- a/linkedin_scraper/job_search.py +++ b/linkedin_scraper/job_search.py @@ -2,6 +2,8 @@ import urllib.parse from time import sleep +from selenium.webdriver.common.by import By + from .jobs import Job from .objects import Scraper @@ -34,50 +36,68 @@ def scrape(self, close_on_complete=True, scrape_recommended_jobs=True): raise NotImplementedError("This part is not implemented yet") def scrape_job_card(self, base_element) -> Job: - job_div = self.wait_for_element_to_load( - name="job-card-list__title", base=base_element - ) - job_title = job_div.text.strip() - linkedin_url = job_div.get_attribute("href") - company = base_element.find_element_by_class_name( - "artdeco-entity-lockup__subtitle" - ).text - location = base_element.find_element_by_class_name( - "job-card-container__metadata-wrapper" - ).text - job = Job( - linkedin_url=linkedin_url, - job_title=job_title, - company=company, - location=location, - scrape=False, - driver=self.driver, - ) - return job + try: + # Try to find job title and URL using updated selectors + job_link = base_element.find_element( + By.CLASS_NAME, "job-card-container__link" + ) + job_title = job_link.text.strip() + linkedin_url = job_link.get_attribute("href") + + # Find company name + company = base_element.find_element( + By.CLASS_NAME, "artdeco-entity-lockup__subtitle" + ).text.strip() + + # Find location (try multiple possible selectors) + location = "" + try: + location = base_element.find_element( + By.CLASS_NAME, "job-card-container__metadata-wrapper" + ).text.strip() + except: + try: + location = base_element.find_element( + By.CLASS_NAME, "job-card-container__metadata-item" + ).text.strip() + except: + location = "Location not found" + + job = Job( + linkedin_url=linkedin_url, + job_title=job_title, + company=company, + location=location, + scrape=False, + driver=self.driver, + ) + return job + except Exception as e: + print(f"Error scraping job card: {e}") + return None def scrape_logged_in(self, close_on_complete=True, scrape_recommended_jobs=True): driver = self.driver driver.get(self.base_url) if scrape_recommended_jobs: - self.focus() - sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) - job_area = self.wait_for_element_to_load( - name="scaffold-finite-scroll__content" - ) - areas = self.wait_for_all_elements_to_load( - name="artdeco-card", base=job_area - ) - for i, area in enumerate(areas): - area_name = self.AREAS[i] - if not area_name: - continue - area_results = [] - for job_posting in area.find_elements_by_class_name( - "jobs-job-board-list__item" - ): - job = self.scrape_job_card(job_posting) - area_results.append(job) - setattr(self, area_name, area_results) + sleep(3) # Wait for page to load + + # Find recommended job cards directly + job_cards = driver.find_elements(By.CLASS_NAME, "job-card-container") + print(f"Found {len(job_cards)} recommended jobs") + + recommended_jobs = [] + for job_card in job_cards: + job = self.scrape_job_card(job_card) + if job: + recommended_jobs.append(job) + + # Set the recommended_jobs attribute + self.recommended_jobs = recommended_jobs + print(f"Successfully scraped {len(recommended_jobs)} recommended jobs") + + if close_on_complete: + driver.close() return def search(self, search_term: str) -> list[Job]: @@ -90,25 +110,17 @@ def search(self, search_term: str) -> list[Job]: self.focus() sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) - job_listing_class_name = "jobs-search-results-list" - job_listing = self.wait_for_element_to_load(name=job_listing_class_name) - - self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.3) - self.focus() - sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) - - self.scroll_class_name_element_to_page_percent(job_listing_class_name, 0.6) - self.focus() - sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) - - self.scroll_class_name_element_to_page_percent(job_listing_class_name, 1) - self.focus() - sleep(self.WAIT_FOR_ELEMENT_TIMEOUT) + # Wait for page to load and scroll to load more jobs + sleep(2) + self.scroll_to_bottom() + sleep(2) job_results = [] - for job_card in self.wait_for_all_elements_to_load( - name="job-card-list", base=job_listing - ): + # Find job cards directly - LinkedIn now uses job-card-container + job_cards = self.driver.find_elements(By.CLASS_NAME, "job-card-container") + + for job_card in job_cards: job = self.scrape_job_card(job_card) - job_results.append(job) + if job: # Only add successfully scraped jobs + job_results.append(job) return job_results From 027c0bb2a9e4c42ecc6b74473c8216c115cf91fe Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Fri, 4 Jul 2025 15:33:45 -0400 Subject: [PATCH 08/14] docs(README): reorganize setup instructions and add examples --- README.md | 90 +++++++++++++++++++++++++++---------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 56cafb9..60cbf18 100644 --- a/README.md +++ b/README.md @@ -5,13 +5,12 @@ Scrapes Linkedin User Data [Linkedin Scraper](#linkedin-scraper) - [Linkedin Scraper](#linkedin-scraper) - [Installation](#installation) - - [Development](#development) - - [Running Scripts](#running-scripts) - - [Contributing](#contributing) - - [Development Workflow](#development-workflow) - [Setup](#setup) - [Environment Variables](#environment-variables) - [ChromeDriver](#chromedriver) + - [Running Scripts](#running-scripts) + - [Contributing](#contributing) + - [Development Workflow](#development-workflow) - [Sponsor](#sponsor) - [Usage](#usage) - [Sample Usage](#sample-usage) @@ -64,17 +63,37 @@ cd linkedin_scraper uv sync ``` -## Development +## Setup -### Running Scripts +### Environment Variables +Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: +```bash +cp .env.example .env +# Edit .env with your credentials +``` + +### ChromeDriver +First, set your chromedriver location by running: ```bash -# Run a Python script with the project dependencies -uv run your_script.py +export CHROMEDRIVER=~/chromedriver +``` + +### Running Scripts -# Or activate the virtual environment -uv shell -python your_script.py +```bash +# Try the working examples +# Profile +uv run python examples/person_contacts_example.py +uv run python examples/person_profile_example.py +# Company +uv run python examples/company_profile_example.py +# Job Search +uv run python examples/job_search_example.py "python developer" +# Recommended Jobs +uv run python examples/recommended_jobs_example.py +# Job Details of a specific job +uv run python examples/job_details_example.py "https://www.linkedin.com/jobs/view/4097107717" ``` ## Contributing @@ -91,31 +110,10 @@ If you want to contribute to this project: uv run pre-commit install ``` -3. **Environment setup for testing**: - ```bash - cp .env.example .env - # Edit .env with your LinkedIn credentials for login - ``` - ### Development Workflow With pre-commit installed, your code will be automatically linted and formatted on every commit. -## Setup - -### Environment Variables -Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: -```bash -cp .env.example .env -# Edit .env with your credentials -``` - -### ChromeDriver -The project will automatically use chromedriver from your system PATH. If you need to specify a custom location: - -```bash -export CHROMEDRIVER=~/chromedriver -``` ## Sponsor Message me if you'd like to sponsor me @@ -124,24 +122,26 @@ Message me if you'd like to sponsor me To use it, just create the class. ### Sample Usage -```python -import os -from dotenv import load_dotenv -from linkedin_scraper import Person, actions -from selenium import webdriver +Try our working examples to get started quickly: -# Load environment variables -load_dotenv() +```bash +# Scrape a person's profile and contacts +uv run python examples/person_profile_example.py +uv run python examples/person_contacts_example.py -driver = webdriver.Chrome() +# Scrape company information and employees +uv run python examples/company_profile_example.py -# Use credentials from .env file -email = os.getenv("LINKEDIN_EMAIL") -password = os.getenv("LINKEDIN_PASSWORD") -actions.login(driver, email, password) -person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver) +# Search for jobs and get recommendations +uv run python examples/job_search_example.py "python developer" +uv run python examples/recommended_jobs_example.py + +# Get detailed information about a specific job +uv run python examples/job_details_example.py "https://www.linkedin.com/jobs/view/1234567890" ``` +All examples use credentials from your `.env` file automatically. + **NOTE**: The account used to log-in should have it's language set English to make sure everything works as expected. From f9ac930f03ccada6cb3a97033eee5f0fd9641832 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 5 Jul 2025 10:56:22 -0400 Subject: [PATCH 09/14] docs(README): update .rst as well --- README.rst | 90 +++++++++++++++++++++++++++--------------------------- 1 file changed, 45 insertions(+), 45 deletions(-) diff --git a/README.rst b/README.rst index 56cafb9..60cbf18 100644 --- a/README.rst +++ b/README.rst @@ -5,13 +5,12 @@ Scrapes Linkedin User Data [Linkedin Scraper](#linkedin-scraper) - [Linkedin Scraper](#linkedin-scraper) - [Installation](#installation) - - [Development](#development) - - [Running Scripts](#running-scripts) - - [Contributing](#contributing) - - [Development Workflow](#development-workflow) - [Setup](#setup) - [Environment Variables](#environment-variables) - [ChromeDriver](#chromedriver) + - [Running Scripts](#running-scripts) + - [Contributing](#contributing) + - [Development Workflow](#development-workflow) - [Sponsor](#sponsor) - [Usage](#usage) - [Sample Usage](#sample-usage) @@ -64,17 +63,37 @@ cd linkedin_scraper uv sync ``` -## Development +## Setup -### Running Scripts +### Environment Variables +Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: +```bash +cp .env.example .env +# Edit .env with your credentials +``` + +### ChromeDriver +First, set your chromedriver location by running: ```bash -# Run a Python script with the project dependencies -uv run your_script.py +export CHROMEDRIVER=~/chromedriver +``` + +### Running Scripts -# Or activate the virtual environment -uv shell -python your_script.py +```bash +# Try the working examples +# Profile +uv run python examples/person_contacts_example.py +uv run python examples/person_profile_example.py +# Company +uv run python examples/company_profile_example.py +# Job Search +uv run python examples/job_search_example.py "python developer" +# Recommended Jobs +uv run python examples/recommended_jobs_example.py +# Job Details of a specific job +uv run python examples/job_details_example.py "https://www.linkedin.com/jobs/view/4097107717" ``` ## Contributing @@ -91,31 +110,10 @@ If you want to contribute to this project: uv run pre-commit install ``` -3. **Environment setup for testing**: - ```bash - cp .env.example .env - # Edit .env with your LinkedIn credentials for login - ``` - ### Development Workflow With pre-commit installed, your code will be automatically linted and formatted on every commit. -## Setup - -### Environment Variables -Create a `.env` file (copy from `.env.example`) with your LinkedIn credentials: -```bash -cp .env.example .env -# Edit .env with your credentials -``` - -### ChromeDriver -The project will automatically use chromedriver from your system PATH. If you need to specify a custom location: - -```bash -export CHROMEDRIVER=~/chromedriver -``` ## Sponsor Message me if you'd like to sponsor me @@ -124,24 +122,26 @@ Message me if you'd like to sponsor me To use it, just create the class. ### Sample Usage -```python -import os -from dotenv import load_dotenv -from linkedin_scraper import Person, actions -from selenium import webdriver +Try our working examples to get started quickly: -# Load environment variables -load_dotenv() +```bash +# Scrape a person's profile and contacts +uv run python examples/person_profile_example.py +uv run python examples/person_contacts_example.py -driver = webdriver.Chrome() +# Scrape company information and employees +uv run python examples/company_profile_example.py -# Use credentials from .env file -email = os.getenv("LINKEDIN_EMAIL") -password = os.getenv("LINKEDIN_PASSWORD") -actions.login(driver, email, password) -person = Person("https://www.linkedin.com/in/joey-sham-aa2a50122", driver=driver) +# Search for jobs and get recommendations +uv run python examples/job_search_example.py "python developer" +uv run python examples/recommended_jobs_example.py + +# Get detailed information about a specific job +uv run python examples/job_details_example.py "https://www.linkedin.com/jobs/view/1234567890" ``` +All examples use credentials from your `.env` file automatically. + **NOTE**: The account used to log-in should have it's language set English to make sure everything works as expected. From b9cdb8fa14cb37110879004c815db9052a8a6bfc Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 5 Jul 2025 11:34:10 -0400 Subject: [PATCH 10/14] feat(actions): add login exceotions and enhance login with interactive captcha handling (wait for manual captcha solving) --- examples/person_profile_example.py | 7 +- linkedin_scraper/actions.py | 233 ++++++++++++++++++++++++++--- linkedin_scraper/exceptions.py | 73 +++++++++ 3 files changed, 291 insertions(+), 22 deletions(-) create mode 100644 linkedin_scraper/exceptions.py diff --git a/examples/person_profile_example.py b/examples/person_profile_example.py index 095094d..2d8bc76 100644 --- a/examples/person_profile_example.py +++ b/examples/person_profile_example.py @@ -10,7 +10,12 @@ def main(): load_dotenv() driver = webdriver.Chrome() - actions.login(driver, os.getenv("LINKEDIN_EMAIL"), os.getenv("LINKEDIN_PASSWORD")) + actions.login( + driver, + os.getenv("LINKEDIN_EMAIL"), + os.getenv("LINKEDIN_PASSWORD"), + interactive=True, + ) # Example profile person = Person("https://www.linkedin.com/in/stickerdaniel/", driver=driver) diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py index d533a35..cb072e1 100644 --- a/linkedin_scraper/actions.py +++ b/linkedin_scraper/actions.py @@ -1,10 +1,24 @@ import getpass +import time +from selenium.common.exceptions import ( + NoSuchElementException, + TimeoutException, + WebDriverException, +) from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from . import constants as c +from .exceptions import ( + CaptchaRequiredError, + InvalidCredentialsError, + LoginTimeoutError, + RateLimitError, + SecurityChallengeError, + TwoFactorAuthError, +) def __prompt_email_password(): @@ -18,33 +32,210 @@ def page_has_loaded(driver): return page_state == "complete" -def login(driver, email=None, password=None, cookie=None, timeout=10): +def login( + driver, email=None, password=None, cookie=None, timeout=10, interactive=False +): + """Login to LinkedIn with comprehensive error handling. + + Args: + driver: Selenium WebDriver instance + email: LinkedIn email address + password: LinkedIn password + cookie: LinkedIn authentication cookie (li_at) + timeout: Timeout in seconds for login verification + interactive: If True, pause for manual captcha/challenge solving + + Raises: + InvalidCredentialsError: Wrong email/password combination + CaptchaRequiredError: CAPTCHA verification required + SecurityChallengeError: Security challenge required + TwoFactorAuthError: Two-factor authentication required + RateLimitError: Too many login attempts + LoginTimeoutError: Login process timed out + WebDriverException: Driver-related errors + """ if cookie is not None: return _login_with_cookie(driver, cookie) if not email or not password: email, password = __prompt_email_password() - driver.get("https://www.linkedin.com/login") - WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, "username"))) - - email_elem = driver.find_element(By.ID, "username") - email_elem.send_keys(email) - - password_elem = driver.find_element(By.ID, "password") - password_elem.send_keys(password) - password_elem.submit() - - if driver.current_url == "https://www.linkedin.com/checkpoint/lg/login-submit": - remember = driver.find_element(By.ID, c.REMEMBER_PROMPT) - if remember: - remember.submit() - - WebDriverWait(driver, timeout).until( - EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)) - ) + try: + # Navigate to login page + driver.get("https://www.linkedin.com/login") + + # Wait for login form to load + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.ID, "username")) + ) + + # Fill in credentials + email_elem = driver.find_element(By.ID, "username") + email_elem.clear() + email_elem.send_keys(email) + + password_elem = driver.find_element(By.ID, "password") + password_elem.clear() + password_elem.send_keys(password) + password_elem.submit() + + # Wait a moment for the page to process + time.sleep(2) + + # Check for various post-login scenarios + _handle_post_login_scenarios(driver, timeout, interactive) + + except TimeoutException as e: + raise LoginTimeoutError(f"Login timed out: {str(e)}") from e + except WebDriverException as e: + raise LoginTimeoutError(f"WebDriver error during login: {str(e)}") from e + except Exception as e: + raise LoginTimeoutError(f"Unexpected error during login: {str(e)}") from e + + +def _handle_post_login_scenarios(driver, timeout, interactive=False): + """Handle various post-login scenarios and errors.""" + current_url = driver.current_url + + # Check for specific error conditions + if "checkpoint/challenge" in current_url: + if "AgG1DOkeX" in current_url or "security check" in driver.page_source.lower(): + if interactive: + print(f"Security challenge detected: {current_url}") + print("Please solve the security challenge manually in the browser.") + try: + input("Press Enter after completing the challenge...") + except EOFError: + print( + "Non-interactive mode detected. Waiting 30 seconds for manual completion..." + ) + time.sleep(30) + # Wait for user to complete the challenge and continue + time.sleep(2) + else: + raise SecurityChallengeError( + challenge_url=current_url, message="Let's do a quick security check" + ) + else: + if interactive: + print(f"CAPTCHA detected: {current_url}") + print("Please solve the CAPTCHA manually in the browser.") + try: + input("Press Enter after completing the CAPTCHA...") + except EOFError: + print( + "Non-interactive mode detected. Waiting 30 seconds for manual completion..." + ) + time.sleep(20) + # Wait for user to complete the CAPTCHA and continue + time.sleep(2) + else: + raise CaptchaRequiredError(current_url) + + # Check for invalid credentials - improved detection + page_source = driver.page_source.lower() + + # Debug: print current URL and check for error patterns + if interactive: + print(f"Current URL: {current_url}") + print("Checking for credential errors...") + + # Check for various invalid credential patterns + invalid_cred_patterns = [ + "falsche e-mail", + "wrong email", + "falsches passwort", + "wrong password", + "wrong email or password", # Exact match from screenshot + "try again or create", # Part of the error message + "sign in to linkedin", # Sometimes redirects back to login + "there was an unexpected error", + "please check your email address", + "please check your password", + "incorrect email", + "incorrect password", + "invalid email", + "invalid password", + ] + + login_failed_urls = ["login-challenge-submit", "/login", "/uas/login"] + + # Check if we're on a login error page or back at login + on_error_page = any(url_pattern in current_url for url_pattern in login_failed_urls) + found_patterns = [ + pattern for pattern in invalid_cred_patterns if pattern in page_source + ] + has_error_text = len(found_patterns) > 0 + + if interactive: + print(f"On error page: {on_error_page}") + print(f"Found error patterns: {found_patterns}") + + if on_error_page and has_error_text: + if interactive: + print("Invalid credentials detected!") + raise InvalidCredentialsError("Wrong email or password. Try again.") + + # Also check for credential errors even when not on typical error page + # This handles cases where wrong credentials are shown after solving challenges + if has_error_text and "sign in to linkedin" not in found_patterns: + if interactive: + print("Invalid credentials detected (after challenge)!") + raise InvalidCredentialsError("Wrong email or password. Try again.") + + # Check for two-factor authentication + if ( + "checkpoint/challenge" in current_url + and "two-factor" in driver.page_source.lower() + ): + raise TwoFactorAuthError("Two-factor authentication required") + + # Check for rate limiting + if ( + "too many" in driver.page_source.lower() + or "rate limit" in driver.page_source.lower() + ): + raise RateLimitError("Too many login attempts. Please try again later.") + + # Handle remember me prompt + if current_url == "https://www.linkedin.com/checkpoint/lg/login-submit": + try: + remember = driver.find_element(By.ID, c.REMEMBER_PROMPT) + if remember: + remember.submit() + except NoSuchElementException: + pass + + # Verify successful login + try: + WebDriverWait(driver, timeout).until( + EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)) + ) + except TimeoutException: + # Final check for any error messages on the page + page_source = driver.page_source.lower() + if "error" in page_source or "invalid" in page_source: + raise InvalidCredentialsError( + "Login failed - please check your credentials" + ) from None + else: + raise LoginTimeoutError( + f"Login verification timed out after {timeout} seconds" + ) from None def _login_with_cookie(driver, cookie): - driver.get("https://www.linkedin.com/login") - driver.add_cookie({"name": "li_at", "value": cookie}) + """Login using LinkedIn authentication cookie.""" + try: + driver.get("https://www.linkedin.com/login") + driver.add_cookie({"name": "li_at", "value": cookie}) + driver.get("https://www.linkedin.com/feed/") + + # Verify cookie login worked + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)) + ) + except TimeoutException as e: + raise InvalidCredentialsError( + "Cookie login failed - cookie may be expired or invalid" + ) from e diff --git a/linkedin_scraper/exceptions.py b/linkedin_scraper/exceptions.py new file mode 100644 index 0000000..55963d1 --- /dev/null +++ b/linkedin_scraper/exceptions.py @@ -0,0 +1,73 @@ +""" +Custom exceptions for LinkedIn scraper. + +This module defines specific exception types for different error scenarios +to provide better error handling and reporting. +""" + + +class LinkedInScraperError(Exception): + """Base exception for LinkedIn scraper.""" + + pass + + +class LoginError(LinkedInScraperError): + """Base login error.""" + + pass + + +class CredentialsNotFoundError(LoginError): + """No credentials available in non-interactive mode.""" + + pass + + +class InvalidCredentialsError(LoginError): + """Invalid email/password combination.""" + + pass + + +class CaptchaRequiredError(LoginError): + """LinkedIn requires captcha verification.""" + + def __init__(self, captcha_url: str | None = None) -> None: + self.captcha_url = captcha_url + super().__init__(f"Captcha required: {captcha_url}") + + +class TwoFactorAuthError(LoginError): + """Two-factor authentication required.""" + + pass + + +class RateLimitError(LoginError): + """Too many login attempts.""" + + pass + + +class SecurityChallengeError(LoginError): + """LinkedIn security challenge required.""" + + def __init__( + self, challenge_url: str | None = None, message: str | None = None + ) -> None: + self.challenge_url = challenge_url + self.message = message + super().__init__(f"Security challenge required: {message or challenge_url}") + + +class LoginTimeoutError(LoginError): + """Login process timed out.""" + + pass + + +class DriverInitializationError(LinkedInScraperError): + """Failed to initialize Chrome WebDriver.""" + + pass From 64265d48fec11d6b07b5bd2a608b43dd4a922b30 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sat, 5 Jul 2025 12:35:39 -0400 Subject: [PATCH 11/14] style(actions): comment out security challenge print statements --- linkedin_scraper/actions.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py index cb072e1..41cdefd 100644 --- a/linkedin_scraper/actions.py +++ b/linkedin_scraper/actions.py @@ -101,7 +101,7 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): if "checkpoint/challenge" in current_url: if "AgG1DOkeX" in current_url or "security check" in driver.page_source.lower(): if interactive: - print(f"Security challenge detected: {current_url}") + print("Security challenge detected!") print("Please solve the security challenge manually in the browser.") try: input("Press Enter after completing the challenge...") @@ -136,9 +136,9 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): page_source = driver.page_source.lower() # Debug: print current URL and check for error patterns - if interactive: - print(f"Current URL: {current_url}") - print("Checking for credential errors...") + # if interactive: + # print(f"Current URL: {current_url}") + # print("Checking for credential errors...") # Check for various invalid credential patterns invalid_cred_patterns = [ @@ -167,9 +167,9 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): ] has_error_text = len(found_patterns) > 0 - if interactive: - print(f"On error page: {on_error_page}") - print(f"Found error patterns: {found_patterns}") + # if interactive: + # print(f"On error page: {on_error_page}") + # print(f"Found error patterns: {found_patterns}") if on_error_page and has_error_text: if interactive: From 0cf4e10291037964e7afa28c8da31c542f8e3429 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sun, 6 Jul 2025 04:21:15 -0400 Subject: [PATCH 12/14] fix(actions): update timeout messages for manual completion --- linkedin_scraper/actions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py index 41cdefd..56a3792 100644 --- a/linkedin_scraper/actions.py +++ b/linkedin_scraper/actions.py @@ -107,9 +107,9 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): input("Press Enter after completing the challenge...") except EOFError: print( - "Non-interactive mode detected. Waiting 30 seconds for manual completion..." + "Non-interactive mode detected. Waiting timeout seconds for manual completion..." ) - time.sleep(30) + time.sleep(timeout) # Wait for user to complete the challenge and continue time.sleep(2) else: @@ -124,9 +124,9 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): input("Press Enter after completing the CAPTCHA...") except EOFError: print( - "Non-interactive mode detected. Waiting 30 seconds for manual completion..." + "Non-interactive mode detected. Waiting timeout seconds for manual completion..." ) - time.sleep(20) + time.sleep(timeout) # Wait for user to complete the CAPTCHA and continue time.sleep(2) else: From 58a84a5ff49c97384b14c9c7469c88ff2e5be8b1 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sun, 6 Jul 2025 04:21:38 -0400 Subject: [PATCH 13/14] feat(examples): add cookie authentication example --- .env.example | 3 +++ examples/cookie_login_example.py | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 examples/cookie_login_example.py diff --git a/.env.example b/.env.example index b91e175..e48a38d 100644 --- a/.env.example +++ b/.env.example @@ -3,3 +3,6 @@ LINKEDIN_EMAIL=your-email@example.com LINKEDIN_PASSWORD=your-password-here + +# Alternative: Use cookie authentication (li_at cookie value) +LINKEDIN_COOKIE=your-li_at-cookie-value diff --git a/examples/cookie_login_example.py b/examples/cookie_login_example.py new file mode 100644 index 0000000..3d05c53 --- /dev/null +++ b/examples/cookie_login_example.py @@ -0,0 +1,26 @@ +import os +from pprint import pprint + +from dotenv import load_dotenv +from selenium import webdriver + +from linkedin_scraper import Person, actions + + +def main(): + load_dotenv() + driver = webdriver.Chrome() + + # Use cookie from environment + cookie = os.getenv("LINKEDIN_COOKIE") + actions.login(driver, cookie=cookie) + + # Example profile + person = Person("https://www.linkedin.com/in/stickerdaniel/", driver=driver) + + print("Person Profile:") + pprint(vars(person)) + + +if __name__ == "__main__": + main() From 4afb66ed91cabdcde4779baf20afcbe119a59b15 Mon Sep 17 00:00:00 2001 From: Daniel Sticker Date: Sun, 6 Jul 2025 04:43:45 -0400 Subject: [PATCH 14/14] fix(actions): use timeout parameter for login and cookie login --- linkedin_scraper/actions.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/linkedin_scraper/actions.py b/linkedin_scraper/actions.py index 56a3792..a605ee3 100644 --- a/linkedin_scraper/actions.py +++ b/linkedin_scraper/actions.py @@ -55,7 +55,7 @@ def login( WebDriverException: Driver-related errors """ if cookie is not None: - return _login_with_cookie(driver, cookie) + return _login_with_cookie(driver, cookie, timeout) if not email or not password: email, password = __prompt_email_password() @@ -99,7 +99,7 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): # Check for specific error conditions if "checkpoint/challenge" in current_url: - if "AgG1DOkeX" in current_url or "security check" in driver.page_source.lower(): + if "security check" in driver.page_source.lower(): if interactive: print("Security challenge detected!") print("Please solve the security challenge manually in the browser.") @@ -107,14 +107,15 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): input("Press Enter after completing the challenge...") except EOFError: print( - "Non-interactive mode detected. Waiting timeout seconds for manual completion..." + "Non-interactive environment detected. Waiting timeout seconds for manual completion..." ) time.sleep(timeout) # Wait for user to complete the challenge and continue time.sleep(2) else: raise SecurityChallengeError( - challenge_url=current_url, message="Let's do a quick security check" + challenge_url=current_url, + message="Captcha encountered. Please solve it manually in the browser.", ) else: if interactive: @@ -224,7 +225,7 @@ def _handle_post_login_scenarios(driver, timeout, interactive=False): ) from None -def _login_with_cookie(driver, cookie): +def _login_with_cookie(driver, cookie, timeout=10): """Login using LinkedIn authentication cookie.""" try: driver.get("https://www.linkedin.com/login") @@ -232,7 +233,7 @@ def _login_with_cookie(driver, cookie): driver.get("https://www.linkedin.com/feed/") # Verify cookie login worked - WebDriverWait(driver, 10).until( + WebDriverWait(driver, timeout).until( EC.presence_of_element_located((By.CLASS_NAME, c.VERIFY_LOGIN_ID)) ) except TimeoutException as e: