diff --git a/app/client/build_runner.py b/app/client/build_runner.py index b232dcc..331b244 100644 --- a/app/client/build_runner.py +++ b/app/client/build_runner.py @@ -146,8 +146,8 @@ def _download_and_extract_results(self, timeout=None): """ timeout_time = time.time() + timeout if timeout else sys.maxsize - download_artifacts_url = self._master_api.url('build', self._build_id, 'result') - download_filepath = 'build_results/artifacts.tar.gz' + download_artifacts_url = self._master_api.url('build', self._build_id, 'artifacts.zip') + download_filepath = 'build_results/artifacts.zip' download_dir, _ = os.path.split(download_filepath) # remove any previous build artifacts @@ -164,7 +164,7 @@ def _download_and_extract_results(self, timeout=None): for chunk in response.iter_content(chunk_size): file.write(chunk) - app.util.fs.extract_tar(download_filepath, delete=True) + app.util.fs.unzip_directory(download_filepath, delete=True) return time.sleep(1) diff --git a/app/client/cluster_api_client.py b/app/client/cluster_api_client.py index a442470..7397ca5 100644 --- a/app/client/cluster_api_client.py +++ b/app/client/cluster_api_client.py @@ -61,7 +61,7 @@ def get_build_artifacts(self, build_id): :return: tuple of (the artifact tarball, status code) :rtype: tuple of (bytes, int) """ - artifacts_url = self._api.url('build', build_id, 'artifacts.tar.gz') + artifacts_url = self._api.url('build', build_id, 'artifacts.zip') response = self._network.get(artifacts_url) return response.content, response.status_code diff --git a/app/master/build.py b/app/master/build.py index b9b044e..3802473 100644 --- a/app/master/build.py +++ b/app/master/build.py @@ -3,8 +3,9 @@ import os from queue import Queue, Empty import shutil -import time +import tempfile from threading import Lock +import time import uuid from app.master.build_artifact import BuildArtifact @@ -44,7 +45,8 @@ def __init__(self, build_request): self._logger = get_logger(__name__) self._build_id = self._build_id_counter.increment() self._build_request = build_request - self._artifacts_archive_file = None + self._artifacts_tar_file = None # DEPRECATED - Use zip file instead + self._artifacts_zip_file = None self._build_artifact = None self._error_message = None @@ -81,7 +83,6 @@ def api_representation(self): return { 'id': self._build_id, 'status': build_state, - 'artifacts': self._artifacts_archive_file, # todo: this should probably be a url, not a file path 'details': self._detail_message, 'error_message': self._error_message, 'num_atoms': self._num_atoms, @@ -351,8 +352,19 @@ def project_type(self): return self._project_type @property - def artifacts_archive_file(self): - return self._artifacts_archive_file + def artifacts_zip_file(self): + """Return the local path to the artifacts zip archive.""" + return self._artifacts_zip_file + + @property + def artifacts_tar_file(self): + """ + DEPRECATED: We are transitioning to zip files from tar.gz files for artifacts. + Return the local path to the artifacts tar.gz archive. + """ + self._logger.warning('The tar format for build artifact files is deprecated. File: {}', + self._artifacts_tar_file) + return self._artifacts_tar_file # WIP(joey): Change some of these private @properties to methods. @property @@ -452,8 +464,13 @@ def _create_build_artifact(self): self._build_artifact = BuildArtifact(self._build_results_dir()) self._build_artifact.generate_failures_file() self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results()) - self._artifacts_archive_file = app.util.fs.tar_directory(self._build_results_dir(), - BuildArtifact.ARTIFACT_TARFILE_NAME) + self._artifacts_zip_file = app.util.fs.zip_directory(self._build_results_dir(), + BuildArtifact.ARTIFACT_ZIPFILE_NAME) + # Temporarily move aside zip file so we can create a tar file, then move it back. + temp_zip_path = shutil.move(self._artifacts_zip_file, tempfile.mktemp()) + self._artifacts_tar_file = app.util.fs.tar_directory(self._build_results_dir(), + BuildArtifact.ARTIFACT_TARFILE_NAME) + shutil.move(temp_zip_path, self._artifacts_zip_file) def _delete_temporary_build_artifact_files(self): """ @@ -465,8 +482,8 @@ def _delete_temporary_build_artifact_files(self): build_result_dir = self._build_results_dir() start_time = time.time() for path in os.listdir(build_result_dir): - # The build result tar-ball is also stored in this same directory, so we must not delete it. - if path == BuildArtifact.ARTIFACT_TARFILE_NAME: + # The build result archive is also stored in this same directory, so we must not delete it. + if path in (BuildArtifact.ARTIFACT_TARFILE_NAME, BuildArtifact.ARTIFACT_ZIPFILE_NAME): continue full_path = os.path.join(build_result_dir, path) # Do NOT use app.util.fs.async_delete() here. That call will generate a temp directory for every diff --git a/app/master/build_artifact.py b/app/master/build_artifact.py index fa4210d..fd2494c 100644 --- a/app/master/build_artifact.py +++ b/app/master/build_artifact.py @@ -15,6 +15,7 @@ class BuildArtifact(object): OUTPUT_FILE = 'clusterrunner_console_output' TIMING_FILE = 'clusterrunner_time' ARTIFACT_TARFILE_NAME = 'results.tar.gz' + ARTIFACT_ZIPFILE_NAME = 'results.zip' def __init__(self, build_artifact_dir): """ @@ -76,7 +77,7 @@ def get_failed_subjob_and_atom_ids(self): def _get_failed_artifact_directories(self): """ :return: A list of build-artifact relative paths to the failed artifact directories (e.g. artifact_0_0). - :rtype list[str] + :rtype: list[str] """ if self._failed_artifact_directories is None: if not os.path.isdir(self.build_artifact_dir): diff --git a/app/master/cluster_master.py b/app/master/cluster_master.py index e940b58..01db571 100644 --- a/app/master/cluster_master.py +++ b/app/master/cluster_master.py @@ -296,20 +296,19 @@ def get_build(self, build_id): return build - def get_path_for_build_results_archive(self, build_id): + def get_path_for_build_results_archive(self, build_id: int, is_tar_request: bool=False) -> str: """ Given a build id, get the absolute file path for the archive file containing the build results. :param build_id: The build id for which to retrieve the artifacts archive file - :type build_id: int + :param is_tar_request: If true, download the tar.gz archive instead of a zip. :return: The path to the archived results file - :rtype: str """ - build = self._all_builds_by_id.get(build_id) + build = self._all_builds_by_id.get(build_id) # type: Build if build is None: raise ItemNotFoundError('Invalid build id.') - archive_file = build.artifacts_archive_file + archive_file = build.artifacts_tar_file if is_tar_request else build.artifacts_zip_file if archive_file is None: raise ItemNotReadyError('Build artifact file is not yet ready. Try again later.') diff --git a/app/util/fs.py b/app/util/fs.py index abbd58f..96db8bd 100644 --- a/app/util/fs.py +++ b/app/util/fs.py @@ -120,3 +120,34 @@ def tar_directories(target_dirs_to_archive_paths, tarfile_path): for dir_path, archive_name in target_dirs_to_archive_paths.items(): target_dir = os.path.normpath(dir_path) tar.add(target_dir, arcname=archive_name) + + +def zip_directory(target_dir: str, archive_filename: str) -> str: + """ + Zip up the specified directory and stick the resulting zip file in that directory. + :param target_dir: the directory to zip and the location of the resulting zip file + :param archive_filename: filename for the created zip file + :return: the full path to the created zip archive file + """ + # Create the archive in a temp location and then move it to the target dir. + # (Otherwise the resulting archive will include an extra zero-byte file.) + tmp_path = shutil.make_archive(tempfile.mktemp(), 'zip', target_dir) + target_path = os.path.join(target_dir, archive_filename) + shutil.move(tmp_path, target_path) + return target_path + + +def unzip_directory(archive_file: str, target_dir: str=None, delete: bool=False): + """ + Extract the specified zip file. + :param archive_file: the zip archive file to extract + :param target_dir: the directory in which to extract; defaults to same as archive file + :param delete: whether to delete the zip archive file after unpacking + """ + if not target_dir: + target_dir, _ = os.path.split(archive_file) # default to same directory as archive file + + shutil.unpack_archive(archive_file, target_dir, 'zip') + + if delete: + os.remove(archive_file) diff --git a/app/web_framework/cluster_master_application.py b/app/web_framework/cluster_master_application.py index 2395448..1568c08 100644 --- a/app/web_framework/cluster_master_application.py +++ b/app/web_framework/cluster_master_application.py @@ -42,7 +42,8 @@ def __init__(self, cluster_master): RouteNode(r'build', _BuildsHandler, 'builds').add_children([ RouteNode(r'(\d+)', _BuildHandler, 'build').add_children([ RouteNode(r'result', _BuildResultRedirectHandler), - RouteNode(r'artifacts.tar.gz', _BuildResultHandler), + RouteNode(r'artifacts.tar.gz', _BuildTarResultHandler), + RouteNode(r'artifacts.zip', _BuildZipResultHandler), RouteNode(r'subjob', _SubjobsHandler, 'subjobs').add_children([ RouteNode(r'(\d+)', _SubjobHandler, 'subjob').add_children([ RouteNode(r'atom', _AtomsHandler, 'atoms').add_children([ @@ -286,18 +287,35 @@ def initialize(self, route_node=None, cluster_master=None): :param route_node: This is not used, it is only a param so we can pass route_node to all handlers without error. In other routes, route_node is used to find child routes but filehandler routes will never show child routes. :type route_node: RouteNode | None - :type cluster_master: ClusterMaster | None + :type cluster_master: app.master.cluster_master.ClusterMaster | None """ self._cluster_master = cluster_master super().initialize(path=None) # we will not set the root path until the get() method is called @request_latency.time() - def get(self, build_id, path=None): - artifact_file_path = self._cluster_master.get_path_for_build_results_archive(int(build_id)) + def get(self, build_id): + artifact_file_path = self.get_result_file_download_path(int(build_id)) self.root, artifact_filename = os.path.split(artifact_file_path) self.set_header('Content-Type', 'application/octet-stream') # this should be downloaded as a binary file return super().get(path=artifact_filename) + def get_result_file_download_path(self, build_id: int): + raise NotImplementedError + + +class _BuildTarResultHandler(_BuildResultHandler): + """Handler for the tar archive file""" + def get_result_file_download_path(self, build_id: int): + """Get the file path to the artifacts.tar.gz for the specified build.""" + return self._cluster_master.get_path_for_build_results_archive(build_id, is_tar_request=True) + + +class _BuildZipResultHandler(_BuildResultHandler): + """Handler for the zip archive file""" + def get_result_file_download_path(self, build_id: int): + """Get the file path to the artifacts.zip for the specified build.""" + return self._cluster_master.get_path_for_build_results_archive(build_id) + class _SlavesHandler(_ClusterMasterBaseAPIHandler): @request_latency.time() diff --git a/requirements.txt b/requirements.txt index 1373b03..aebd739 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # Install with "pip install -r requirements.txt" +astroid==1.4.0 # for pylint configobj==5.0.6 coverage==3.7.1 cx_Freeze==4.3.3 diff --git a/test/framework/base_unit_test_case.py b/test/framework/base_unit_test_case.py index 3a0469b..2964e91 100644 --- a/test/framework/base_unit_test_case.py +++ b/test/framework/base_unit_test_case.py @@ -206,10 +206,13 @@ def _blacklist_methods_not_allowed_in_unit_tests(self): 'os.chmod', 'os.chown', 'os.fchmod', 'os.fchown', 'os.fsync', 'os.ftruncate', 'os.lchown', 'os.link', 'os.lockf', 'os.mkdir', 'os.mkfifo', 'os.mknod', 'os.open', 'os.openpty', 'os.makedirs', 'os.remove', 'os.rename', 'os.replace', 'os.rmdir', 'os.symlink', 'os.unlink', - 'shutil.rmtree', + 'shutil.rmtree', 'shutil.move', + 'tempfile.mktemp', 'app.util.fs.extract_tar', 'app.util.fs.tar_directory', 'app.util.fs.tar_directories', + 'app.util.fs.zip_directory', + 'app.util.fs.unzip_directory', 'app.util.fs.create_dir', 'app.util.fs.write_file', ], diff --git a/test/framework/functional/base_functional_test_case.py b/test/framework/functional/base_functional_test_case.py index 9433823..fc75fb4 100644 --- a/test/framework/functional/base_functional_test_case.py +++ b/test/framework/functional/base_functional_test_case.py @@ -6,12 +6,12 @@ import tempfile from unittest import TestCase -from app.util import log -from app.util.fs import create_dir, extract_tar +from app.util import fs, log from app.util.process_utils import is_windows from app.util.network import Network from app.util.secret import Secret from app.master.build_artifact import BuildArtifact +from app.util.url_builder import UrlBuilder from test.framework.functional.fs_item import Directory from test.framework.functional.functional_test_cluster import FunctionalTestCluster, TestClusterTimeoutError @@ -31,28 +31,6 @@ def setUp(self): self.cluster = FunctionalTestCluster(verbose=self._get_test_verbosity()) self._network = Network() - def _create_test_config_file(self, conf_values_to_set=None): - """ - Create a temporary conf file just for this test. - - :return: The path to the conf file - :rtype: str - """ - # Copy default conf file to tmp location - repo_dir = path.dirname(path.dirname(path.dirname(path.dirname(path.realpath(__file__))))) - self._conf_template_path = path.join(repo_dir, 'conf', 'default_clusterrunner.conf') - test_conf_file_path = tempfile.NamedTemporaryFile().name - shutil.copy(self._conf_template_path, test_conf_file_path) - os.chmod(test_conf_file_path, ConfigFile.CONFIG_FILE_MODE) - conf_file = ConfigFile(test_conf_file_path) - - # Set custom conf file values for this test - conf_values_to_set = conf_values_to_set or {} - for conf_key, conf_value in conf_values_to_set.items(): - conf_file.write_value(conf_key, conf_value, BASE_CONFIG_FILE_SECTION) - - return test_conf_file_path - def tearDown(self): # Give the cluster a bit of extra time to finish working (before forcefully killing it and failing the test) with suppress(TestClusterTimeoutError): @@ -158,7 +136,12 @@ def assert_build_artifact_contents_match_expected(self, master_api, build_id, ex :type expected_build_artifact_contents: list[FSItem] """ with tempfile.TemporaryDirectory() as build_artifacts_dir_path: - self._download_and_extract_results(master_api, build_id, build_artifacts_dir_path) + self._download_and_extract_zip_results(master_api, build_id, build_artifacts_dir_path) + self.assert_directory_contents_match_expected(build_artifacts_dir_path, expected_build_artifact_contents) + + # Also check the tar archive even though it is deprecated. + with tempfile.TemporaryDirectory() as build_artifacts_dir_path: + self._download_and_extract_tar_results(master_api, build_id, build_artifacts_dir_path) self.assert_directory_contents_match_expected(build_artifacts_dir_path, expected_build_artifact_contents) def assert_directory_contents_match_expected(self, dir_path, expected_dir_contents): @@ -176,7 +159,7 @@ def assert_directory_contents_match_expected(self, dir_path, expected_dir_conten expected_build_artifacts = Directory(expected_dir_name, expected_dir_contents) expected_build_artifacts.assert_matches_path(dir_path, allow_extra_items=False) - def _download_and_extract_results(self, master_api, build_id, download_dir): + def _download_and_extract_tar_results(self, master_api, build_id, download_dir): """ :type master_api: app.util.url_builder.UrlBuilder :type build_id: int @@ -193,4 +176,19 @@ def _download_and_extract_results(self, master_api, build_id, download_dir): for chunk in response.iter_content(chunk_size): file.write(chunk) - extract_tar(download_filepath, delete=True) + fs.extract_tar(download_filepath, delete=True) + + def _download_and_extract_zip_results(self, master_api: UrlBuilder, build_id: int, download_dir: str): + """Download the artifacts.zip from the master and extract it.""" + download_artifacts_url = master_api.url('build', build_id, 'artifacts.zip') + download_filepath = os.path.join(download_dir, BuildArtifact.ARTIFACT_ZIPFILE_NAME) + response = self._network.get(download_artifacts_url) + + if response.status_code == http.client.OK: + # save file to disk, decompress, and delete + with open(download_filepath, 'wb') as file: + chunk_size = 500 * 1024 + for chunk in response.iter_content(chunk_size): + file.write(chunk) + + fs.unzip_directory(download_filepath, delete=True) diff --git a/test/functional/test_cluster_basic.py b/test/functional/test_cluster_basic.py index b6c074c..54463d5 100644 --- a/test/functional/test_cluster_basic.py +++ b/test/functional/test_cluster_basic.py @@ -60,6 +60,7 @@ def test_git_type_demo_project_config(self): for i in range(10) ] expected_artifact_contents.append(File(BuildArtifact.ARTIFACT_TARFILE_NAME)) + expected_artifact_contents.append(File(BuildArtifact.ARTIFACT_ZIPFILE_NAME)) self.assert_build_has_successful_status(build_id=build_id) self.assert_build_status_contains_expected_data( diff --git a/test/unit/master/test_build.py b/test/unit/master/test_build.py index 446eadd..898a634 100644 --- a/test/unit/master/test_build.py +++ b/test/unit/master/test_build.py @@ -39,6 +39,8 @@ def setUp(self): self.mock_util = self.patch('app.master.build.app.util') # stub out util - it often interacts with the fs self.mock_open = self.patch('app.master.build.open', autospec=False, create=True) self.mock_listdir = self.patch('os.listdir') + self.patch('tempfile.mktemp') + self.patch('shutil.move') self.scheduler_pool = BuildSchedulerPool() def test_allocate_slave_calls_slave_setup(self): @@ -430,7 +432,11 @@ def test_get_failed_atoms_returns_failed_atoms_only(self): def test_delete_temporary_build_artifact_files_skips_results_tarball(self): build = self._create_test_build(BuildStatus.BUILDING) - self.mock_listdir.return_value = ['some_dir1', BuildArtifact.ARTIFACT_TARFILE_NAME] + self.mock_listdir.return_value = [ + 'some_dir1', + BuildArtifact.ARTIFACT_TARFILE_NAME, + BuildArtifact.ARTIFACT_ZIPFILE_NAME, + ] expected_async_delete_call_path = join(build._build_results_dir(), 'some_dir1') self.patch('os.path.isdir').return_value = True mock_shutil = self.patch('app.master.build.shutil')