Skip to content

Commit 4bd405b

Browse files
Lucas FaudmanLucas Faudman
authored andcommitted
Add getchromedriver.py
1 parent 5b3b1f0 commit 4bd405b

File tree

7 files changed

+738
-0
lines changed

7 files changed

+738
-0
lines changed

pyproject.toml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[build-system]
2+
requires = ["setuptools", "setuptools_scm>=8"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
authors = [{name = "Lucas Faudman", email = "lucasfaudman@gmail.com"}]
7+
dynamic = ["version"]
8+
name = "souperscraper"
9+
requires-python = ">=3.8"
10+
readme = "README.md"
11+
license = { file = "LICENSE.txt" }
12+
description = "A simple web scraper base combining Beautiful Soup and Selenium"
13+
keywords = ["web-scraping", "scraping", "beautifulsoup4", "beautifulsoup", "bs4", "selenium", "selenium-webdriver"]
14+
dependencies = [
15+
"selenium",
16+
"beautifulsoup4",
17+
"requests",
18+
]
19+
20+
[project.urls]
21+
Homepage = "https://github.com/LucasFaudman/souper-scraper.git"
22+
Repository = "https://github.com/LucasFaudman/souper-scraper.git"
23+
24+
[tool.setuptools]
25+
26+
[tool.setuptools_scm]
27+
version_file = "src/souperscraper/_version.py"

src/souperscraper/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
from .souperscraper import SouperScraper, Keys
2+
from .getchromedriver import get_chromedriver

src/souperscraper/_version.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# file generated by setuptools_scm
2+
# don't change, don't track in version control
3+
TYPE_CHECKING = False
4+
if TYPE_CHECKING:
5+
from typing import Tuple, Union
6+
VERSION_TUPLE = Tuple[Union[int, str], ...]
7+
else:
8+
VERSION_TUPLE = object
9+
10+
version: str
11+
__version__: str
12+
__version_tuple__: VERSION_TUPLE
13+
version_tuple: VERSION_TUPLE
14+
15+
__version__ = version = '0.1.dev1+g5b3b1f0'
16+
__version_tuple__ = version_tuple = (0, 1, 'dev1', 'g5b3b1f0')

src/souperscraper/getchromedriver.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import requests
2+
import zipfile
3+
import argparse
4+
from time import sleep
5+
from pathlib import Path
6+
from typing import Optional
7+
8+
DEFAULT_PATH = Path('~/chromedriver/')
9+
10+
11+
def options_menu(options, prompt, param_name='option', default=-1):
12+
"""
13+
Display a menu of options and prompt the user to select one.
14+
"""
15+
selected = None
16+
while not selected:
17+
print(prompt)
18+
for i, option in enumerate(options):
19+
print(f"({i+1}) {option}")
20+
print()
21+
default_option = options[default]
22+
selected = input(
23+
f"Enter 1-{len(options)} to select a {param_name} (default: {options.index(default_option) + 1} {default_option}): ")
24+
if not selected:
25+
selected = default
26+
elif selected in options:
27+
selected = options.index(selected)
28+
elif not selected.isdigit() or int(selected) < 1 or int(selected) > len(options):
29+
print(
30+
f"\nInvalid {param_name} number: {selected}. Enter 1-{len(options)}")
31+
sleep(2)
32+
selected = None
33+
else:
34+
selected = int(selected) - 1
35+
36+
return options[selected]
37+
38+
39+
def select_chromedriver(version_number: Optional[int]=None, headless: Optional[bool]=None, platform: Optional[str]=None) -> tuple[str, str]:
40+
"""
41+
Select the chromedriver version and platform to download. Return the filename and download URL.
42+
"""
43+
versions_url = 'https://googlechromelabs.github.io/chrome-for-testing/latest-versions-per-milestone-with-downloads.json'
44+
versions = requests.get(versions_url).json()
45+
milestones = versions['milestones']
46+
47+
version_options = [
48+
f"Version {ms}" for ms in milestones if milestones[ms]['downloads'].get('chromedriver')]
49+
version_q = f"""
50+
Which chrome version number do you have installed?
51+
Open chrome and go to:
52+
chrome://settings/help
53+
you will see a version number like Version 121.0.6167.85 (Official Build) (x86_64)
54+
which corresponds to chromedriver version 121
55+
56+
Available chromedriver versions:"""
57+
selected_version = version_number or options_menu(version_options, version_q, 'version number', -1).split(' ')[1]
58+
if int(selected_version) >= 120:
59+
headless = headless or input(
60+
"Do you want to use the headless version of chromedriver? (y/n): ").lower() == 'y'
61+
else:
62+
headless = False
63+
executable = 'chromedriver' if not headless else 'chrome-headless-shell'
64+
65+
platform_options = [download['platform']
66+
for download in milestones[selected_version]['downloads'][executable]]
67+
platform_q = f"""Which platform are you using?
68+
69+
Available platforms:"""
70+
platform = platform or options_menu(platform_options, platform_q, 'platform', 0)
71+
72+
filename = f"{executable}{selected_version}-{platform}.zip"
73+
for download in milestones[selected_version]['downloads'][executable]:
74+
if download['platform'] == platform:
75+
download_url = download['url']
76+
print(f"Found download for {filename} at {download_url}")
77+
return filename, download_url
78+
79+
raise ValueError(f"Could not find download for {filename}")
80+
81+
82+
def download_chromedriver(filename: str, download_url: str, destdir: Optional[Path]=None):
83+
"""
84+
Download the chromedriver zip file from the download URL and save it to the destination directory.
85+
"""
86+
if not destdir:
87+
destdir_input = input(f"Where do you want to save {filename}? (default: {DEFAULT_PATH}): ").rstrip('/')
88+
if not destdir_input:
89+
destdir = DEFAULT_PATH.resolve()
90+
else:
91+
destdir = Path(destdir_input).resolve()
92+
else:
93+
destdir = destdir.resolve()
94+
95+
if not destdir.exists():
96+
print(f"Creating {destdir}...")
97+
destdir.mkdir(parents=True)
98+
99+
print(f"Downloading {filename} from {download_url}...")
100+
destpath = destdir / filename
101+
with destpath.open('wb') as f:
102+
f.write(requests.get(download_url).content)
103+
print(f"Downloaded {filename} to {destpath}")
104+
105+
print(f"Extracting {filename} to {destdir}...")
106+
with zipfile.ZipFile(destpath, 'r') as zip_ref:
107+
zip_ref.extractall(destdir)
108+
109+
executable_path = next(path for path in destdir.rglob('*chrome*') if path.is_file() and path.suffix == '')
110+
return executable_path
111+
112+
113+
def get_chromedriver() -> Optional[Path]:
114+
parser = argparse.ArgumentParser()
115+
parser.add_argument('-v', '--version', type=int, help='Chromedriver version number')
116+
parser.add_argument('-p', '--platform', type=str, help='Platform to download chromedriver for')
117+
parser.add_argument('-d', '--destdir', type=Path, help='Directory to save chromedriver to')
118+
parser.add_argument('--headless', action='store_true', help='Use headless version of chromedriver')
119+
args = parser.parse_args()
120+
121+
try:
122+
filename, download_url = select_chromedriver(args.version, args.headless, args.platform)
123+
except ValueError as e:
124+
print(e)
125+
return None
126+
127+
try:
128+
executable_path = download_chromedriver(filename, download_url, args.destdir)
129+
except Exception as e:
130+
print("Failed to download and extract chromedriver. Error: ", e)
131+
return None
132+
133+
print('Success. Chromedriver executable downloaded and saved to', executable_path)
134+
return executable_path
135+
136+
if __name__ == '__main__':
137+
get_chromedriver()

0 commit comments

Comments
 (0)