diff --git a/integration-tests/tests/fixtures/integration_test_logs.py b/integration-tests/tests/fixtures/integration_test_logs.py index 0a876f72e5..fe8688c027 100644 --- a/integration-tests/tests/fixtures/integration_test_logs.py +++ b/integration-tests/tests/fixtures/integration_test_logs.py @@ -1,7 +1,6 @@ """Session-scoped test log fixtures shared across integration tests.""" import logging -import shutil import subprocess import pytest @@ -10,7 +9,10 @@ IntegrationTestLogs, IntegrationTestPathConfig, ) -from tests.utils.utils import unlink +from tests.utils.utils import ( + get_binary_path, + remove_path, +) logger = logging.getLogger(__name__) @@ -21,7 +23,7 @@ def hive_24hr( integration_test_path_config: IntegrationTestPathConfig, ) -> IntegrationTestLogs: """Provides shared `hive_24hr` test logs.""" - return _download_and_extract_dataset( + return _download_and_extract_gzip_dataset( request=request, integration_test_path_config=integration_test_path_config, name="hive-24hr", @@ -35,7 +37,7 @@ def postgresql( integration_test_path_config: IntegrationTestPathConfig, ) -> IntegrationTestLogs: """Provides shared `postgresql` test logs.""" - return _download_and_extract_dataset( + return _download_and_extract_gzip_dataset( request=request, integration_test_path_config=integration_test_path_config, name="postgresql", @@ -43,12 +45,26 @@ def postgresql( ) -def _download_and_extract_dataset( +def _download_and_extract_gzip_dataset( request: pytest.FixtureRequest, integration_test_path_config: IntegrationTestPathConfig, name: str, tarball_url: str, + keep_leading_dir: bool = False, ) -> IntegrationTestLogs: + """ + Download and extract a gzip-compressed dataset tarball for setting up the `IntegrationTestLogs` + fixture. Adjust its file permissions for test use. + + :param request: Provides access to the pytest cache. + :param integration_test_path_config: See `IntegrationTestPathConfig`. + :param name: Dataset name. + :param tarball_url: Dataset tarball URL. + :param keep_leading_dir: Whether to preserve the top-level directory during tarball extraction. + Defaults to False to avoid an unnecessary extra directory level. + :return: An IntegrationTestLogs instance providing metadata for the downloaded logs. + :raises subprocess.CalledProcessError: If `curl`, `tar`, or `chmod` fails. + """ integration_test_logs = IntegrationTestLogs( name=name, tarball_url=tarball_url, @@ -58,38 +74,43 @@ def _download_and_extract_dataset( logger.info("Test logs `%s` are up-to-date. Skipping download.", name) return integration_test_logs - curl_bin = shutil.which("curl") - if curl_bin is None: - err_msg = "curl executable not found" - raise RuntimeError(err_msg) - - try: - # fmt: off - curl_cmds = [ - curl_bin, - "--fail", - "--location", - "--output", str(integration_test_logs.tarball_path), - "--show-error", - tarball_url, - ] - # fmt: on - subprocess.run(curl_cmds, check=True) - - unlink(integration_test_logs.extraction_dir) - shutil.unpack_archive( - integration_test_logs.tarball_path, integration_test_logs.extraction_dir - ) - except Exception as e: - err_msg = f"Failed to download and extract dataset `{name}`." - raise RuntimeError(err_msg) from e - - # Allow the extracted content to be deletable or overwritable - chmod_bin = shutil.which("chmod") - if chmod_bin is None: - err_msg = "chmod executable not found" - raise RuntimeError(err_msg) - subprocess.run([chmod_bin, "-R", "gu+w", integration_test_logs.extraction_dir], check=True) + remove_path(integration_test_logs.tarball_path) + remove_path(integration_test_logs.extraction_dir) + integration_test_logs.extraction_dir.mkdir(parents=True, exist_ok=False) + + tarball_path_str = str(integration_test_logs.tarball_path) + extract_path_str = str(integration_test_logs.extraction_dir) + + # fmt: off + curl_cmd = [ + get_binary_path("curl"), + "--fail", + "--location", + "--output", tarball_path_str, + "--show-error", + tarball_url, + ] + # fmt: on + subprocess.run(curl_cmd, check=True) + + # fmt: off + extract_cmd = [ + get_binary_path("tar"), + "--extract", + "--gzip", + "--file", tarball_path_str, + "--directory", extract_path_str, + ] + # fmt: on + if not keep_leading_dir: + extract_cmd.extend(["--strip-components", "1"]) + subprocess.run(extract_cmd, check=True) + + # Allow the downloaded and extracted contents to be deletable or overwritable by adding write + # permissions for both the user and the group. + chmod_bin = get_binary_path("chmod") + subprocess.run([chmod_bin, "gu+w", tarball_path_str], check=True) + subprocess.run([chmod_bin, "-R", "gu+w", extract_path_str], check=True) logger.info("Downloaded and extracted uncompressed logs for dataset `%s`.", name) request.config.cache.set(name, True) diff --git a/integration-tests/tests/utils/config.py b/integration-tests/tests/utils/config.py index 76a8e2d43a..b7bca4c3dd 100644 --- a/integration-tests/tests/utils/config.py +++ b/integration-tests/tests/utils/config.py @@ -14,7 +14,7 @@ ) from tests.utils.utils import ( - unlink, + remove_path, validate_dir_exists, validate_file_exists, ) @@ -306,5 +306,5 @@ def __post_init__(self, integration_test_path_config: IntegrationTestPathConfig) def clear_test_outputs(self) -> None: """Remove any existing output directories created by this compression test.""" - unlink(self.compression_dir) - unlink(self.decompression_dir) + remove_path(self.compression_dir) + remove_path(self.decompression_dir) diff --git a/integration-tests/tests/utils/utils.py b/integration-tests/tests/utils/utils.py index f58d3c70da..ad9d272cb3 100644 --- a/integration-tests/tests/utils/utils.py +++ b/integration-tests/tests/utils/utils.py @@ -93,6 +93,24 @@ def load_yaml_to_dict(path: Path) -> dict[str, Any]: return target_dict +def remove_path(path_to_remove: Path) -> None: + """ + Remove a file, directory, or symlink at `path_to_remove` if it exists. + + :param path_to_remove: + :raise: Propagates `pathlib.Path.unlink`'s exceptions. + :raise: Propagates `shutil.rmtree`'s exceptions. + """ + if path_to_remove.is_symlink(): + path_to_remove.unlink() + elif not path_to_remove.exists(): + return + elif path_to_remove.is_dir(): + shutil.rmtree(path_to_remove) + else: + path_to_remove.unlink() + + def resolve_path_env_var(var_name: str) -> Path: """ :param var_name: Name of the environment variable holding a path. @@ -104,30 +122,6 @@ def resolve_path_env_var(var_name: str) -> Path: return Path(get_env_var(var_name)).expanduser().resolve() -def unlink(rm_path: Path, force: bool = True) -> None: - """ - Remove a file or directory at `path`. - - :param rm_path: - :param force: Whether to force remove with sudo priviledges in case the normal operation fails. - Defaults to True. - """ - try: - shutil.rmtree(rm_path) - except FileNotFoundError: - pass - except PermissionError: - if not force: - raise - - sudo_rm_cmds = ["sudo", "rm", "-rf", str(rm_path)] - try: - subprocess.run(sudo_rm_cmds, check=True) - except subprocess.CalledProcessError as e: - err_msg = f"Failed to remove {rm_path} due to lack of superuser privileges (sudo)." - raise OSError(err_msg) from e - - def validate_dir_exists(dir_path: Path) -> None: """ :param dir_path: