Skip to content

Commit

Permalink
add new feature
Browse files Browse the repository at this point in the history
  • Loading branch information
Germey committed Dec 28, 2021
1 parent 019dad4 commit 3050eb2
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 54 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Gerapy Playwright Changelog

## 0.2.0 (2021-12-28)

- New Feature: Add support for:
- Specifying `channel` for launching
- Specifying `executablePath` for launching
- Specifying `slowMo` for launching
- Specifying `devtools` for launching
- Specifying `--disable-extensions` in args for launching
- Specifying `--hide-scrollbars` in args for launching
- Specifying `--no-sandbox` in args for launching
- Specifying `--disable-setuid-sandbox` in args for launching
- Specifying `--disable-gpu` in args for launching
- Update: change `GERAPY_PLAYWRIGHT_SLEEP` default to 0

## 0.1.2 (2021-12-28)

- Fix: Add retrying logic for PlaywrightError
Expand Down
2 changes: 1 addition & 1 deletion example/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
gerapy-playwright
gerapy-playwright==0.1.2
87 changes: 42 additions & 45 deletions gerapy_playwright/downloadermiddlewares.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,10 @@ def from_crawler(cls, crawler):
GERAPY_PLAYWRIGHT_DEFAULT_USER_AGENT)
cls.headless = settings.get(
'GERAPY_PLAYWRIGHT_HEADLESS', GERAPY_PLAYWRIGHT_HEADLESS)
# cls.dumpio = settings.get(
# 'GERAPY_PLAYWRIGHT_DUMPIO', GERAPY_PLAYWRIGHT_DUMPIO)
# cls.ignore_https_errors = settings.get('GERAPY_PLAYWRIGHT_IGNORE_HTTPS_ERRORS',
# GERAPY_PLAYWRIGHT_IGNORE_HTTPS_ERRORS)
# cls.slow_mo = settings.get(
# 'GERAPY_PLAYWRIGHT_SLOW_MO', GERAPY_PLAYWRIGHT_SLOW_MO)
cls.channel = settings.get(
'GERAPY_PLAYWRIGHT_CHANNEL', GERAPY_PLAYWRIGHT_CHANNEL)
cls.slow_mo = settings.get(
'GERAPY_PLAYWRIGHT_SLOW_MO', GERAPY_PLAYWRIGHT_SLOW_MO)
# cls.ignore_default_args = settings.get('GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS',
# GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS)
# cls.handle_sigint = settings.get(
Expand All @@ -121,24 +119,22 @@ def from_crawler(cls, crawler):
# 'GERAPY_PLAYWRIGHT_HANDLE_SIGTERM', GERAPY_PLAYWRIGHT_HANDLE_SIGTERM)
# cls.handle_sighup = settings.get(
# 'GERAPY_PLAYWRIGHT_HANDLE_SIGHUP', GERAPY_PLAYWRIGHT_HANDLE_SIGHUP)
# cls.auto_close = settings.get(
# 'GERAPY_PLAYWRIGHT_AUTO_CLOSE', GERAPY_PLAYWRIGHT_AUTO_CLOSE)
# cls.devtools = settings.get(
# 'GERAPY_PLAYWRIGHT_DEVTOOLS', GERAPY_PLAYWRIGHT_DEVTOOLS)
# cls.executable_path = settings.get(
# 'GERAPY_PLAYWRIGHT_EXECUTABLE_PATH', GERAPY_PLAYWRIGHT_EXECUTABLE_PATH)
# cls.disable_extensions = settings.get('GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS',
# GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS)
# cls.hide_scrollbars = settings.get(
# 'GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS', GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS)
# cls.mute_audio = settings.get(
# 'GERAPY_PLAYWRIGHT_MUTE_AUDIO', GERAPY_PLAYWRIGHT_MUTE_AUDIO)
# cls.no_sandbox = settings.get(
# 'GERAPY_PLAYWRIGHT_NO_SANDBOX', GERAPY_PLAYWRIGHT_NO_SANDBOX)
# cls.disable_setuid_sandbox = settings.get('GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX',
# GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX)
# cls.disable_gpu = settings.get(
# 'GERAPY_PLAYWRIGHT_DISABLE_GPU', GERAPY_PLAYWRIGHT_DISABLE_GPU)
cls.devtools = settings.get(
'GERAPY_PLAYWRIGHT_DEVTOOLS', GERAPY_PLAYWRIGHT_DEVTOOLS)
cls.executable_path = settings.get(
'GERAPY_PLAYWRIGHT_EXECUTABLE_PATH', GERAPY_PLAYWRIGHT_EXECUTABLE_PATH)
cls.disable_extensions = settings.get('GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS',
GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS)
cls.hide_scrollbars = settings.get(
'GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS', GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS)
cls.mute_audio = settings.get(
'GERAPY_PLAYWRIGHT_MUTE_AUDIO', GERAPY_PLAYWRIGHT_MUTE_AUDIO)
cls.no_sandbox = settings.get(
'GERAPY_PLAYWRIGHT_NO_SANDBOX', GERAPY_PLAYWRIGHT_NO_SANDBOX)
cls.disable_setuid_sandbox = settings.get('GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX',
GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX)
cls.disable_gpu = settings.get(
'GERAPY_PLAYWRIGHT_DISABLE_GPU', GERAPY_PLAYWRIGHT_DISABLE_GPU)
cls.download_timeout = settings.get('GERAPY_PLAYWRIGHT_DOWNLOAD_TIMEOUT',
settings.get('DOWNLOAD_TIMEOUT', GERAPY_PLAYWRIGHT_DOWNLOAD_TIMEOUT))
# cls.ignore_resource_types = settings.get('GERAPY_PLAYWRIGHT_IGNORE_RESOURCE_TYPES',
Expand Down Expand Up @@ -176,35 +172,36 @@ async def _process_request(self, request, spider):

options = {
'headless': self.headless,
'args': [],
}
# if self.executable_path:
# options['executablePath'] = self.executable_path
# if self.ignore_https_errors:
# options['ignoreHTTPSErrors'] = self.ignore_https_errors
# if self.slow_mo:
# options['slowMo'] = self.slow_mo
# if self.ignore_default_args:
if self.executable_path is not None:
options['executablePath'] = self.executable_path
if self.slow_mo is not None:
options['slowMo'] = self.slow_mo
if self.devtools is not None:
options['devtools'] = self.devtools
if self.channel is not None:
options['channel'] = self.channel
# if self.ignore_default_args is not None:
# options['ignoreDefaultArgs'] = self.ignore_default_args
# if self.handle_sigint:
# options['handleSIGINT'] = self.handle_sigint
# if self.handle_sigterm:
# options['handleSIGTERM'] = self.handle_sigterm
# if self.handle_sighup:
# options['handleSIGHUP'] = self.handle_sighup
# if self.auto_close:
# options['autoClose'] = self.auto_close
# if self.disable_extensions:
# options['args'].append('--disable-extensions')
# if self.hide_scrollbars:
# options['args'].append('--hide-scrollbars')
# if self.mute_audio:
# options['args'].append('--mute-audio')
# if self.no_sandbox:
# options['args'].append('--no-sandbox')
# if self.disable_setuid_sandbox:
# options['args'].append('--disable-setuid-sandbox')
# if self.disable_gpu:
# options['args'].append('--disable-gpu')
if self.disable_extensions is not None:
options['args'].append('--disable-extensions')
if self.hide_scrollbars is not None:
options['args'].append('--hide-scrollbars')
if self.mute_audio is not None:
options['args'].append('--mute-audio')
if self.no_sandbox is not None:
options['args'].append('--no-sandbox')
if self.disable_setuid_sandbox is not None:
options['args'].append('--disable-setuid-sandbox')
if self.disable_gpu is not None:
options['args'].append('--disable-gpu')

# pretend as normal browser
_pretend = self.pretend # get global pretend setting
Expand Down
14 changes: 6 additions & 8 deletions gerapy_playwright/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,14 @@

# playwright settings
GERAPY_PLAYWRIGHT_HEADLESS = True
GERAPY_PLAYWRIGHT_CHANNEL = None
GERAPY_PLAYWRIGHT_EXECUTABLE_PATH = None
GERAPY_PLAYWRIGHT_IGNORE_HTTPS_ERRORS = False
GERAPY_PLAYWRIGHT_SLOW_MO = None
GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS = False
GERAPY_PLAYWRIGHT_HANDLE_SIGINT = True
GERAPY_PLAYWRIGHT_HANDLE_SIGTERM = True
GERAPY_PLAYWRIGHT_HANDLE_SIGHUP = True
GERAPY_PLAYWRIGHT_DUMPIO = False
# GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS = False
# GERAPY_PLAYWRIGHT_HANDLE_SIGINT = True
# GERAPY_PLAYWRIGHT_HANDLE_SIGTERM = True
# GERAPY_PLAYWRIGHT_HANDLE_SIGHUP = True
GERAPY_PLAYWRIGHT_DEVTOOLS = False
GERAPY_PLAYWRIGHT_AUTO_CLOSE = True
GERAPY_PLAYWRIGHT_PRETEND = True

# playwright args
Expand All @@ -41,5 +39,5 @@
# ``manifest``, ``other``.
GERAPY_PLAYWRIGHT_IGNORE_RESOURCE_TYPES = []
GERAPY_PLAYWRIGHT_SCREENSHOT = None
GERAPY_PLAYWRIGHT_SLEEP = 1
GERAPY_PLAYWRIGHT_SLEEP = 0
GERAPY_ENABLE_REQUEST_INTERCEPTION = False

0 comments on commit 3050eb2

Please sign in to comment.