Skip to content

Commit

Permalink
export/html2pdf: use the cache dir from project config
Browse files Browse the repository at this point in the history
  • Loading branch information
stanislaw committed Nov 2, 2024
1 parent 711b1f4 commit fcc382f
Show file tree
Hide file tree
Showing 16 changed files with 61 additions and 29 deletions.
47 changes: 33 additions & 14 deletions strictdoc/export/html2pdf/html2pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,6 @@
from webdriver_manager.core.http import HttpClient
from webdriver_manager.core.os_manager import OperationSystemManager

STRICTDOC_CACHE_DIR = os.getenv("STRICTDOC_CACHE_DIR")
if STRICTDOC_CACHE_DIR is not None:
PATH_TO_CACHE_DIR = STRICTDOC_CACHE_DIR
else:
PATH_TO_CACHE_DIR = os.path.join(
tempfile.gettempdir(), "strictdoc_cache", "chromedriver"
)
PATH_TO_CHROMEDRIVER_DIR = os.path.join(PATH_TO_CACHE_DIR, "chromedriver")

# HTML2PDF.js prints unicode symbols to console. The following makes it work on
# Windows which otherwise complains:
# UnicodeEncodeError: 'charmap' codec can't encode characters in position 129-130: character maps to <undefined>
Expand Down Expand Up @@ -66,15 +57,23 @@ def get(self, url, params=None, **kwargs) -> Response:


class HTML2PDF_CacheManager(DriverCacheManager):
def __init__(self, file_manager: FileManager, path_to_cache_dir: str):
super().__init__(file_manager=file_manager)
self.path_to_cache_dir: str = path_to_cache_dir

def find_driver(self, driver: Driver):
path_to_cached_chrome_driver_dir = os.path.join(
self.path_to_cache_dir, "chromedriver"
)

os_type = self.get_os_type()
browser_type = driver.get_browser_type()
browser_version = self._os_system_manager.get_browser_version_from_os(
browser_type
)

path_to_cached_chrome_driver_dir = os.path.join(
PATH_TO_CHROMEDRIVER_DIR, browser_version, os_type
path_to_cached_chrome_driver_dir, browser_version, os_type
)
path_to_cached_chrome_driver = os.path.join(
path_to_cached_chrome_driver_dir, "chromedriver"
Expand All @@ -91,6 +90,10 @@ def find_driver(self, driver: Driver):
)
path_to_downloaded_chrome_driver = super().find_driver(driver)
if path_to_downloaded_chrome_driver is None:
print( # noqa: T201
f"HTML2PDF_CacheManager: could not get a downloaded Chrome driver: "
f"{path_to_cached_chrome_driver}"
)
return None

print( # noqa: T201
Expand Down Expand Up @@ -153,11 +156,14 @@ def get_pdf_from_html(driver, url) -> bytes:
return data


def create_webdriver(chromedriver: Optional[str]):
def create_webdriver(chromedriver: Optional[str], path_to_cache_dir: str):
print("HTML2PDF: creating Chrome Driver service.", flush=True) # noqa: T201
if chromedriver is None:
cache_manager = HTML2PDF_CacheManager(
file_manager=FileManager(os_system_manager=OperationSystemManager())
file_manager=FileManager(
os_system_manager=OperationSystemManager()
),
path_to_cache_dir=path_to_cache_dir,
)

http_client = HTML2PDF_HTTPClient()
Expand Down Expand Up @@ -207,14 +213,27 @@ def main():
type=str,
help="Optional chromedriver path. Downloaded if not given.",
)
parser.add_argument(
"--cache-dir",
type=str,
help="Optional path to a cache directory whereto the Chrome driver is downloaded.",
)
parser.add_argument("paths", help="Paths to input HTML file.")
args = parser.parse_args()

paths = args.paths

separate_path_pairs = paths.split(";")

driver = create_webdriver(args.chromedriver)
path_to_cache_dir: str = (
args.cache_dir
if args.cache_dir is not None
else (
os.path.join(
tempfile.gettempdir(), "strictdoc_cache", "chromedriver"
)
)
)
driver = create_webdriver(args.chromedriver, path_to_cache_dir)

@atexit.register
def exit_handler():
Expand Down
9 changes: 8 additions & 1 deletion strictdoc/export/html2pdf/pdf_print_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,16 @@ def get_pdf_from_html(
sys.executable,
environment.get_path_to_html2pdf(),
paths_to_print,
"--cache-dir",
project_config.get_path_to_cache_dir(),
]
if project_config.chromedriver is not None:
cmd.extend(["--chromedriver", project_config.chromedriver])
cmd.extend(
[
"--chromedriver",
project_config.chromedriver,
]
)
with measure_performance(
"PDFPrintDriver: printing HTML to PDF using HTML2PDF and Chrome Driver"
):
Expand Down
4 changes: 0 additions & 4 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import re
import sys
import tempfile
from enum import Enum
from typing import Optional

Expand Down Expand Up @@ -345,12 +344,9 @@ def test_integration(
chromedriver_param = f"--param CHROMEDRIVER={os.path.join(chromedriver_path, 'chromedriver')}"
test_folder = f"{cwd}/tests/integration/features/html2pdf"

strictdoc_cache_dir = os.path.join(tempfile.gettempdir(), "strictdoc_cache")

itest_command = f"""
lit
--param STRICTDOC_EXEC="{strictdoc_exec}"
--param STRICTDOC_CACHE_DIR="{strictdoc_cache_dir}"
{html2pdf_param}
{chromedriver_param}
-v
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ REQUIRES: TEST_HTML2PDF
# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session

RUN: %check_exists --file %S/Output/html2pdf/pdf/input.pdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ REQUIRES: TEST_HTML2PDF
# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session

RUN: %check_exists --file %S/Output/html2pdf/pdf/input.pdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ REQUIRES: TEST_HTML2PDF
# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session

RUN: %check_exists --file %S/Output/html2pdf/html/03_three_documents_with_assets/input.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ REQUIRES: TEST_HTML2PDF
# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session

RUN: %check_exists --file %S/Output/html2pdf/html/04_composable_document_with_assets/input.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ REQUIRES: TEST_HTML2PDF
# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --generate-bundle-document --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --generate-bundle-document --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session

RUN: %check_exists --file %S/Output/html2pdf/html/bundle.html
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
[project]

cache_dir = "./Output/cache"

features = [
"HTML2PDF",
]
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
REQUIRES: TEST_HTML2PDF
REQUIRES: SYSTEM_CHROMEDRIVER

# FIXME: Getting timeouts on Windows CI all the time. Needs to be checked or tested by users.
REQUIRES: PLATFORM_IS_NOT_WINDOWS

# GitHub images provide a chromedriver and export installed location, see
# https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2404-Readme.md#browsers-and-drivers
RUN: STRICTDOC_CACHE_DIR=%strictdoc_cache_dir %strictdoc export %S --formats=html2pdf --chromedriver=%chromedriver --output-dir Output | filecheck %s --dump-input=fail
RUN: %strictdoc export %S --formats=html2pdf --chromedriver=%chromedriver --output-dir Output | filecheck %s --dump-input=fail
CHECK: HTML2PDF: JS logs from the print session
CHECK-NOT: HTML2PDF: Chrome Driver available at path: {{.*}}strictdoc_cache{{.*}}

Expand Down
5 changes: 1 addition & 4 deletions tests/integration/lit.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,11 @@ current_dir = os.getcwd()
strictdoc_exec = lit_config.params['STRICTDOC_EXEC']
assert(strictdoc_exec)

strictdoc_cache_dir = lit_config.params['STRICTDOC_CACHE_DIR']
assert(strictdoc_cache_dir)

# NOTE: All substitutions work for the RUN: statements but they don't for CHECK:.
# That's how LLVM LIT works.
config.substitutions.append(('%THIS_TEST_FOLDER', '$(basename "%S")'))

config.substitutions.append(('%strictdoc_root', current_dir))
config.substitutions.append(('%strictdoc_cache_dir', strictdoc_cache_dir))
config.substitutions.append(('%strictdoc', strictdoc_exec))

config.substitutions.append(('%cat', 'python \"{}/tests/integration/cat.py\"'.format(current_dir)))
Expand Down Expand Up @@ -50,3 +46,4 @@ if "TEST_HTML2PDF" in lit_config.params:
if "CHROMEDRIVER" in lit_config.params:
chromedriver = lit_config.params['CHROMEDRIVER']
config.substitutions.append(('%chromedriver', chromedriver))
config.available_features.add('SYSTEM_CHROMEDRIVER')

0 comments on commit fcc382f

Please sign in to comment.