Skip to content

Commit

Permalink
Streamline tests for speed (#416)
Browse files Browse the repository at this point in the history
* Introduce pytest-vcr to start recording requests

* Switch accessibility command to using the same utilities as screenshot

* Fix it up a bit more

* Add another one

* Another

* Add pytest-xdist

* Switch to pipenv sync for speed
  • Loading branch information
palewire authored Oct 9, 2023
1 parent a7cbb7f commit c4d1278
Show file tree
Hide file tree
Showing 16 changed files with 6,415 additions and 265 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/continuous-deployment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ jobs:

- id: install-python-dependencies
name: Install Python dependencies
run: pipenv install --dev --python=`which python`
run: pipenv sync --dev
shell: bash

- id: cache-playwright
Expand All @@ -74,7 +74,7 @@ jobs:

- id: test
name: Run tests
run: pipenv run xvfb-run pytest tests -sv --cov
run: pipenv run xvfb-run pytest tests -sv --cov -n auto --vcr-record=none
shell: bash
env:
IA_ACCESS_KEY: ${{ secrets.IA_ACCESS_KEY }}
Expand Down
2 changes: 2 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ xlwt = "*"
pytest-env = "*"
pytest-cov = "*"
pytest-optional-tests = "*"
pytest-vcr = "*"
pytest-xdist = "*"

[requires]
python_version = "3.9"
553 changes: 341 additions & 212 deletions Pipfile.lock

Large diffs are not rendered by default.

55 changes: 28 additions & 27 deletions newshomepages/accessibility.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import subprocess
import typing
"""Save the accessiblity tree of the provided site."""
from __future__ import annotations

import json
from pathlib import Path

import click
from playwright.sync_api import sync_playwright
from playwright.sync_api._generated import BrowserContext
from retry import retry
from rich import print

Expand All @@ -12,43 +16,40 @@
@click.command()
@click.argument("handle")
@click.option("-o", "--output-dir", "output_dir", default="./")
@click.option("--timeout", "timeout", default="180")
def cli(handle: str, output_dir: str, timeout: str = "180"):
@click.option("--verbose", "verbose", default=False, is_flag=True)
def cli(handle, output_dir, verbose=False):
"""Save the accessiblity tree of the provided site."""
# Get metadata
site = utils.get_site(handle)

# Set the output path
output_path = Path(output_dir) / f"{handle.lower()}.accessibility.json"
output_path = Path(output_dir) / f"{site['handle'].lower()}.accessibility.json"
output_path.parent.mkdir(parents=True, exist_ok=True)

# Do the thing
_get_accessibility(site, output_path, int(timeout))
if verbose:
print(f":newspaper: Fetching a11y tree from {site['url']}")
with sync_playwright() as p:
context = utils._load_persistent_context(p)
_get_accessibility(context, site["url"], site["handle"], output_path)
context.close()


@retry(tries=3, delay=5, backoff=2)
def _get_accessibility(data: typing.Dict, output_path: Path, timeout: int = 180):
def _get_accessibility(
context: BrowserContext, url: str, handle: str, output_path: Path
):
"""Run a command that fetches the accessibility tree for the provided site."""
print(f":newspaper: Fetching a11y tree from {data['url']}")

# Shoot the shot
command_list = [
"shot-scraper",
"accessibility",
data["url"],
"-o",
str(output_path),
"--timeout",
str(timeout * 1000), # Convert from seconds into milliseconds
]

# If there's a custom JavaScript include, toss that in
javascript = utils.get_javascript(data["handle"])
if javascript:
command_list.extend(["--javascript", javascript])

# Run the command
subprocess.run(command_list)
with open(output_path, "w") as fp:
page = utils._load_new_page_disable_javascript(
context=context,
url=url,
handle=handle,
)
snapshot = page.accessibility.snapshot()
page.close()
fp.write(json.dumps(snapshot, indent=4))
fp.write("\n")


if __name__ == "__main__":
Expand Down
23 changes: 19 additions & 4 deletions newshomepages/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def write_json(
@retry(tries=3, delay=15, backoff=2)
def get_url(
url: str, timeout: int = 30, user_agent: str | None = None, verbose: bool = False
):
) -> requests.Response:
"""Get the provided URL.
Args:
Expand Down Expand Up @@ -148,9 +148,24 @@ def get_url(
return r


def get_json_url(url: str):
"""Get JSON data from the provided URL."""
r = get_url(url)
def get_json_url(
url: str, timeout: int = 30, user_agent: str | None = None, verbose: bool = False
) -> typing.Any:
"""Get JSON data from the provided URL.
Args:
url (str): The URL to request
timeout (int): How long to wait before timing out
user_agent (str): The user agent to provide in the request headers. None by default.
verbose (bool): Whether or not to print a verbose output
Returns:
The JSON response as a Python object.
"""
# Get the resposne
r = get_url(url, timeout=timeout, user_agent=user_agent, verbose=verbose)

# Return JSON
return r.json()


Expand Down
24 changes: 16 additions & 8 deletions newshomepages/wayback.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
@click.command()
@click.argument("handle")
@click.option("-o", "--output-dir", "output_dir", default="./")
def cli(handle: str, output_dir: str):
@click.option("--verbose", "verbose", default=False, is_flag=True)
def cli(handle: str, output_dir: str, verbose: bool = False):
"""Archive a URL in the Wayback Machine."""
# Pull the source’s metadata
site = utils.get_site(handle)
Expand All @@ -25,7 +26,8 @@ def cli(handle: str, output_dir: str):
assert IA_SECRET_KEY

# Ask for a capture
print(f"🏛 Requesting a Wayback Machine capture of {site['url']}")
if verbose:
print(f"🏛 Requesting a Wayback Machine capture of {site['url']}")
capture_data = _post(site["url"])

# If we've got a message, we need to just give up now. They're not going to do it.
Expand All @@ -36,7 +38,8 @@ def cli(handle: str, output_dir: str):
tries = 1
while True:
# Give it a second (as recommended by the archive.org admins)
print("Waiting 6 seconds to request our job's status")
if verbose:
print("Waiting 6 seconds to request our job's status")
time.sleep(6)

# Check in our capture
Expand All @@ -45,26 +48,31 @@ def cli(handle: str, output_dir: str):

# If it's a success, we're done
if status_data["status"] == "success":
print("Success!")
if verbose:
print("Success!")
capture_data.update(status_data)
break
elif status_data["status"] == "pending":
# If it's not done, up our counter and restart the loop
print("The capture is still pending.")
if verbose:
print("The capture is still pending.")
tries += 1
# Unless we're over out limit, then we quit
if tries >= 11:
print("10 tries have failed. We’re done here.")
if verbose:
print("10 tries have failed. We’re done here.")
break
elif status_data["status"] == "error":
# If there's an error, end it now
print("There's an error. Time to call it quits.")
if verbose:
print("There's an error. Time to call it quits.")
capture_data.update(status_data)
break

# Write it out
slug = site["handle"].lower()
utils.write_json(capture_data, Path(output_dir) / f"{slug}.wayback.json")
output_path = Path(output_dir) / f"{slug}.wayback.json"
utils.write_json(capture_data, output_path, verbose=verbose)


@retry(tries=3, delay=30, backoff=2)
Expand Down
3 changes: 0 additions & 3 deletions pytest.ini

This file was deleted.

Loading

0 comments on commit c4d1278

Please sign in to comment.