Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 58 additions & 37 deletions integration-tests/tests/fixtures/integration_test_logs.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Session-scoped test log fixtures shared across integration tests."""

import logging
import shutil
import subprocess

import pytest
Expand All @@ -10,7 +9,10 @@
IntegrationTestLogs,
IntegrationTestPathConfig,
)
from tests.utils.utils import unlink
from tests.utils.utils import (
get_binary_path,
remove_path,
)

logger = logging.getLogger(__name__)

Expand All @@ -21,7 +23,7 @@ def hive_24hr(
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
"""Provides shared `hive_24hr` test logs."""
return _download_and_extract_dataset(
return _download_and_extract_gzip_dataset(
request=request,
integration_test_path_config=integration_test_path_config,
name="hive-24hr",
Expand All @@ -35,20 +37,34 @@ def postgresql(
integration_test_path_config: IntegrationTestPathConfig,
) -> IntegrationTestLogs:
"""Provides shared `postgresql` test logs."""
return _download_and_extract_dataset(
return _download_and_extract_gzip_dataset(
request=request,
integration_test_path_config=integration_test_path_config,
name="postgresql",
tarball_url="https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1",
)


def _download_and_extract_dataset(
def _download_and_extract_gzip_dataset(
request: pytest.FixtureRequest,
integration_test_path_config: IntegrationTestPathConfig,
name: str,
tarball_url: str,
keep_leading_dir: bool = False,
) -> IntegrationTestLogs:
"""
Download and extract a gzip-compressed dataset tarball for setting up the `IntegrationTestLogs`
fixture. Adjust its file permissions for test use.

:param request: Provides access to the pytest cache.
:param integration_test_path_config: See `IntegrationTestPathConfig`.
:param name: Dataset name.
:param tarball_url: Dataset tarball URL.
:param keep_leading_dir: Whether to preserve the top-level directory during tarball extraction.
Defaults to False to avoid an unnecessary extra directory level.
:return: An IntegrationTestLogs instance providing metadata for the downloaded logs.
:raises subprocess.CalledProcessError: If `curl`, `tar`, or `chmod` fails.
"""
integration_test_logs = IntegrationTestLogs(
name=name,
tarball_url=tarball_url,
Expand All @@ -58,38 +74,43 @@ def _download_and_extract_dataset(
logger.info("Test logs `%s` are up-to-date. Skipping download.", name)
return integration_test_logs

curl_bin = shutil.which("curl")
if curl_bin is None:
err_msg = "curl executable not found"
raise RuntimeError(err_msg)

try:
# fmt: off
curl_cmds = [
curl_bin,
"--fail",
"--location",
"--output", str(integration_test_logs.tarball_path),
"--show-error",
tarball_url,
]
# fmt: on
subprocess.run(curl_cmds, check=True)

unlink(integration_test_logs.extraction_dir)
shutil.unpack_archive(
integration_test_logs.tarball_path, integration_test_logs.extraction_dir
)
except Exception as e:
err_msg = f"Failed to download and extract dataset `{name}`."
raise RuntimeError(err_msg) from e

# Allow the extracted content to be deletable or overwritable
chmod_bin = shutil.which("chmod")
if chmod_bin is None:
err_msg = "chmod executable not found"
raise RuntimeError(err_msg)
subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True)
remove_path(integration_test_logs.tarball_path)
remove_path(integration_test_logs.extraction_dir)
integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False)

tarball_path_str = str(integration_test_logs.tarball_path)
extract_path_str = str(integration_test_logs.extraction_dir)

# fmt: off
curl_cmd = [
get_binary_path("curl"),
"--fail",
"--location",
"--output", tarball_path_str,
"--show-error",
tarball_url,
]
# fmt: on
subprocess.run(curl_cmd, check=True)

# fmt: off
extract_cmd = [
get_binary_path("tar"),
"--extract",
"--gzip",
"--file", tarball_path_str,
"--directory", extract_path_str,
]
# fmt: on
if not keep_leading_dir:
extract_cmd.extend(["--strip-components", "1"])
subprocess.run(extract_cmd, check=True)

# Allow the downloaded and extracted contents to be deletable or overwritable by adding write
# permissions for both the user and the group.
chmod_bin = get_binary_path("chmod")
subprocess.run([chmod_bin, "gu+w", tarball_path_str], check=True)
subprocess.run([chmod_bin, "-R", "gu+w", extract_path_str], check=True)

logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name)
request.config.cache.set(name, True)
Expand Down
6 changes: 3 additions & 3 deletions integration-tests/tests/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
)

from tests.utils.utils import (
unlink,
remove_path,
validate_dir_exists,
validate_file_exists,
)
Expand Down Expand Up @@ -306,5 +306,5 @@ def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig)

def clear_test_outputs(self) -> None:
"""Remove any existing output directories created by this compression test."""
unlink(self.compression_dir)
unlink(self.decompression_dir)
remove_path(self.compression_dir)
remove_path(self.decompression_dir)
42 changes: 18 additions & 24 deletions integration-tests/tests/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,24 @@ def load_yaml_to_dict(path: Path) -> dict[str, Any]:
return target_dict


def remove_path(path_to_remove: Path) -> None:
"""
Remove a file, directory, or symlink at `path_to_remove` if it exists.

:param path_to_remove:
:raise: Propagates `pathlib.Path.unlink`'s exceptions.
:raise: Propagates `shutil.rmtree`'s exceptions.
"""
if path_to_remove.is_symlink():
path_to_remove.unlink()
elif not path_to_remove.exists():
return
elif path_to_remove.is_dir():
shutil.rmtree(path_to_remove)
else:
path_to_remove.unlink()


def resolve_path_env_var(var_name: str) -> Path:
"""
:param var_name: Name of the environment variable holding a path.
Expand All @@ -104,30 +122,6 @@ def resolve_path_env_var(var_name: str) -> Path:
return Path(get_env_var(var_name)).expanduser().resolve()


def unlink(rm_path: Path, force: bool = True) -> None:
"""
Remove a file or directory at `path`.

:param rm_path:
:param force: Whether to force remove with sudo priviledges in case the normal operation fails.
Defaults to True.
"""
try:
shutil.rmtree(rm_path)
except FileNotFoundError:
pass
except PermissionError:
if not force:
raise

sudo_rm_cmds = ["sudo", "rm", "-rf", str(rm_path)]
try:
subprocess.run(sudo_rm_cmds, check=True)
except subprocess.CalledProcessError as e:
err_msg = f"Failed to remove {rm_path} due to lack of superuser privileges (sudo)."
raise OSError(err_msg) from e


def validate_dir_exists(dir_path: Path) -> None:
"""
:param dir_path:
Expand Down
Loading