Skip to content

Commit

Permalink
Deprecate build result tar.gz in favor of zip (#335)
Browse files Browse the repository at this point in the history
This adds an artifacts.zip with exactly the same contents as the
existing artifacts.tar.gz. The tar.gz artifact format is now
deprecated and clients should update to use the zip file instead.

The benefit of using a zip is that it will be easier to fix a bug
around loading console output from completed builds. By having a zip
archive of the build results on the master, we'll be able to load
console output directly from the master without redirecting to the
slave that the build ran on. The fix for this issue is coming in a
follow-up change.

Fixes #329.
  • Loading branch information
josephharrington authored Apr 29, 2017
1 parent 145bf70 commit ea71b49
Show file tree
Hide file tree
Showing 12 changed files with 127 additions and 52 deletions.
6 changes: 3 additions & 3 deletions app/client/build_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def _download_and_extract_results(self, timeout=None):
"""
timeout_time = time.time() + timeout if timeout else sys.maxsize

download_artifacts_url = self._master_api.url('build', self._build_id, 'result')
download_filepath = 'build_results/artifacts.tar.gz'
download_artifacts_url = self._master_api.url('build', self._build_id, 'artifacts.zip')
download_filepath = 'build_results/artifacts.zip'
download_dir, _ = os.path.split(download_filepath)

# remove any previous build artifacts
Expand All @@ -164,7 +164,7 @@ def _download_and_extract_results(self, timeout=None):
for chunk in response.iter_content(chunk_size):
file.write(chunk)

app.util.fs.extract_tar(download_filepath, delete=True)
app.util.fs.unzip_directory(download_filepath, delete=True)
return

time.sleep(1)
Expand Down
2 changes: 1 addition & 1 deletion app/client/cluster_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_build_artifacts(self, build_id):
:return: tuple of (the artifact tarball, status code)
:rtype: tuple of (bytes, int)
"""
artifacts_url = self._api.url('build', build_id, 'artifacts.tar.gz')
artifacts_url = self._api.url('build', build_id, 'artifacts.zip')
response = self._network.get(artifacts_url)
return response.content, response.status_code

Expand Down
35 changes: 26 additions & 9 deletions app/master/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
import os
from queue import Queue, Empty
import shutil
import time
import tempfile
from threading import Lock
import time
import uuid

from app.master.build_artifact import BuildArtifact
Expand Down Expand Up @@ -44,7 +45,8 @@ def __init__(self, build_request):
self._logger = get_logger(__name__)
self._build_id = self._build_id_counter.increment()
self._build_request = build_request
self._artifacts_archive_file = None
self._artifacts_tar_file = None # DEPRECATED - Use zip file instead
self._artifacts_zip_file = None
self._build_artifact = None

self._error_message = None
Expand Down Expand Up @@ -81,7 +83,6 @@ def api_representation(self):
return {
'id': self._build_id,
'status': build_state,
'artifacts': self._artifacts_archive_file, # todo: this should probably be a url, not a file path
'details': self._detail_message,
'error_message': self._error_message,
'num_atoms': self._num_atoms,
Expand Down Expand Up @@ -351,8 +352,19 @@ def project_type(self):
return self._project_type

@property
def artifacts_archive_file(self):
return self._artifacts_archive_file
def artifacts_zip_file(self):
"""Return the local path to the artifacts zip archive."""
return self._artifacts_zip_file

@property
def artifacts_tar_file(self):
"""
DEPRECATED: We are transitioning to zip files from tar.gz files for artifacts.
Return the local path to the artifacts tar.gz archive.
"""
self._logger.warning('The tar format for build artifact files is deprecated. File: {}',
self._artifacts_tar_file)
return self._artifacts_tar_file

# WIP(joey): Change some of these private @properties to methods.
@property
Expand Down Expand Up @@ -452,8 +464,13 @@ def _create_build_artifact(self):
self._build_artifact = BuildArtifact(self._build_results_dir())
self._build_artifact.generate_failures_file()
self._build_artifact.write_timing_data(self._timing_file_path, self._read_subjob_timings_from_results())
self._artifacts_archive_file = app.util.fs.tar_directory(self._build_results_dir(),
BuildArtifact.ARTIFACT_TARFILE_NAME)
self._artifacts_zip_file = app.util.fs.zip_directory(self._build_results_dir(),
BuildArtifact.ARTIFACT_ZIPFILE_NAME)
# Temporarily move aside zip file so we can create a tar file, then move it back.
temp_zip_path = shutil.move(self._artifacts_zip_file, tempfile.mktemp())
self._artifacts_tar_file = app.util.fs.tar_directory(self._build_results_dir(),
BuildArtifact.ARTIFACT_TARFILE_NAME)
shutil.move(temp_zip_path, self._artifacts_zip_file)

def _delete_temporary_build_artifact_files(self):
"""
Expand All @@ -465,8 +482,8 @@ def _delete_temporary_build_artifact_files(self):
build_result_dir = self._build_results_dir()
start_time = time.time()
for path in os.listdir(build_result_dir):
# The build result tar-ball is also stored in this same directory, so we must not delete it.
if path == BuildArtifact.ARTIFACT_TARFILE_NAME:
# The build result archive is also stored in this same directory, so we must not delete it.
if path in (BuildArtifact.ARTIFACT_TARFILE_NAME, BuildArtifact.ARTIFACT_ZIPFILE_NAME):
continue
full_path = os.path.join(build_result_dir, path)
# Do NOT use app.util.fs.async_delete() here. That call will generate a temp directory for every
Expand Down
3 changes: 2 additions & 1 deletion app/master/build_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class BuildArtifact(object):
OUTPUT_FILE = 'clusterrunner_console_output'
TIMING_FILE = 'clusterrunner_time'
ARTIFACT_TARFILE_NAME = 'results.tar.gz'
ARTIFACT_ZIPFILE_NAME = 'results.zip'

def __init__(self, build_artifact_dir):
"""
Expand Down Expand Up @@ -76,7 +77,7 @@ def get_failed_subjob_and_atom_ids(self):
def _get_failed_artifact_directories(self):
"""
:return: A list of build-artifact relative paths to the failed artifact directories (e.g. artifact_0_0).
:rtype list[str]
:rtype: list[str]
"""
if self._failed_artifact_directories is None:
if not os.path.isdir(self.build_artifact_dir):
Expand Down
9 changes: 4 additions & 5 deletions app/master/cluster_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,20 +296,19 @@ def get_build(self, build_id):

return build

def get_path_for_build_results_archive(self, build_id):
def get_path_for_build_results_archive(self, build_id: int, is_tar_request: bool=False) -> str:
"""
Given a build id, get the absolute file path for the archive file containing the build results.
:param build_id: The build id for which to retrieve the artifacts archive file
:type build_id: int
:param is_tar_request: If true, download the tar.gz archive instead of a zip.
:return: The path to the archived results file
:rtype: str
"""
build = self._all_builds_by_id.get(build_id)
build = self._all_builds_by_id.get(build_id) # type: Build
if build is None:
raise ItemNotFoundError('Invalid build id.')

archive_file = build.artifacts_archive_file
archive_file = build.artifacts_tar_file if is_tar_request else build.artifacts_zip_file
if archive_file is None:
raise ItemNotReadyError('Build artifact file is not yet ready. Try again later.')

Expand Down
31 changes: 31 additions & 0 deletions app/util/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,34 @@ def tar_directories(target_dirs_to_archive_paths, tarfile_path):
for dir_path, archive_name in target_dirs_to_archive_paths.items():
target_dir = os.path.normpath(dir_path)
tar.add(target_dir, arcname=archive_name)


def zip_directory(target_dir: str, archive_filename: str) -> str:
"""
Zip up the specified directory and stick the resulting zip file in that directory.
:param target_dir: the directory to zip and the location of the resulting zip file
:param archive_filename: filename for the created zip file
:return: the full path to the created zip archive file
"""
# Create the archive in a temp location and then move it to the target dir.
# (Otherwise the resulting archive will include an extra zero-byte file.)
tmp_path = shutil.make_archive(tempfile.mktemp(), 'zip', target_dir)
target_path = os.path.join(target_dir, archive_filename)
shutil.move(tmp_path, target_path)
return target_path


def unzip_directory(archive_file: str, target_dir: str=None, delete: bool=False):
"""
Extract the specified zip file.
:param archive_file: the zip archive file to extract
:param target_dir: the directory in which to extract; defaults to same as archive file
:param delete: whether to delete the zip archive file after unpacking
"""
if not target_dir:
target_dir, _ = os.path.split(archive_file) # default to same directory as archive file

shutil.unpack_archive(archive_file, target_dir, 'zip')

if delete:
os.remove(archive_file)
26 changes: 22 additions & 4 deletions app/web_framework/cluster_master_application.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def __init__(self, cluster_master):
RouteNode(r'build', _BuildsHandler, 'builds').add_children([
RouteNode(r'(\d+)', _BuildHandler, 'build').add_children([
RouteNode(r'result', _BuildResultRedirectHandler),
RouteNode(r'artifacts.tar.gz', _BuildResultHandler),
RouteNode(r'artifacts.tar.gz', _BuildTarResultHandler),
RouteNode(r'artifacts.zip', _BuildZipResultHandler),
RouteNode(r'subjob', _SubjobsHandler, 'subjobs').add_children([
RouteNode(r'(\d+)', _SubjobHandler, 'subjob').add_children([
RouteNode(r'atom', _AtomsHandler, 'atoms').add_children([
Expand Down Expand Up @@ -286,18 +287,35 @@ def initialize(self, route_node=None, cluster_master=None):
:param route_node: This is not used, it is only a param so we can pass route_node to all handlers without error.
In other routes, route_node is used to find child routes but filehandler routes will never show child routes.
:type route_node: RouteNode | None
:type cluster_master: ClusterMaster | None
:type cluster_master: app.master.cluster_master.ClusterMaster | None
"""
self._cluster_master = cluster_master
super().initialize(path=None) # we will not set the root path until the get() method is called

@request_latency.time()
def get(self, build_id, path=None):
artifact_file_path = self._cluster_master.get_path_for_build_results_archive(int(build_id))
def get(self, build_id):
artifact_file_path = self.get_result_file_download_path(int(build_id))
self.root, artifact_filename = os.path.split(artifact_file_path)
self.set_header('Content-Type', 'application/octet-stream') # this should be downloaded as a binary file
return super().get(path=artifact_filename)

def get_result_file_download_path(self, build_id: int):
raise NotImplementedError


class _BuildTarResultHandler(_BuildResultHandler):
"""Handler for the tar archive file"""
def get_result_file_download_path(self, build_id: int):
"""Get the file path to the artifacts.tar.gz for the specified build."""
return self._cluster_master.get_path_for_build_results_archive(build_id, is_tar_request=True)


class _BuildZipResultHandler(_BuildResultHandler):
"""Handler for the zip archive file"""
def get_result_file_download_path(self, build_id: int):
"""Get the file path to the artifacts.zip for the specified build."""
return self._cluster_master.get_path_for_build_results_archive(build_id)


class _SlavesHandler(_ClusterMasterBaseAPIHandler):
@request_latency.time()
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Install with "pip install -r requirements.txt"

astroid==1.4.0 # for pylint
configobj==5.0.6
coverage==3.7.1
cx_Freeze==4.3.3
Expand Down
5 changes: 4 additions & 1 deletion test/framework/base_unit_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,10 +206,13 @@ def _blacklist_methods_not_allowed_in_unit_tests(self):
'os.chmod', 'os.chown', 'os.fchmod', 'os.fchown', 'os.fsync', 'os.ftruncate', 'os.lchown', 'os.link',
'os.lockf', 'os.mkdir', 'os.mkfifo', 'os.mknod', 'os.open', 'os.openpty', 'os.makedirs', 'os.remove',
'os.rename', 'os.replace', 'os.rmdir', 'os.symlink', 'os.unlink',
'shutil.rmtree',
'shutil.rmtree', 'shutil.move',
'tempfile.mktemp',
'app.util.fs.extract_tar',
'app.util.fs.tar_directory',
'app.util.fs.tar_directories',
'app.util.fs.zip_directory',
'app.util.fs.unzip_directory',
'app.util.fs.create_dir',
'app.util.fs.write_file',
],
Expand Down
52 changes: 25 additions & 27 deletions test/framework/functional/base_functional_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,12 @@
import tempfile
from unittest import TestCase

from app.util import log
from app.util.fs import create_dir, extract_tar
from app.util import fs, log
from app.util.process_utils import is_windows
from app.util.network import Network
from app.util.secret import Secret
from app.master.build_artifact import BuildArtifact
from app.util.url_builder import UrlBuilder
from test.framework.functional.fs_item import Directory
from test.framework.functional.functional_test_cluster import FunctionalTestCluster, TestClusterTimeoutError

Expand All @@ -31,28 +31,6 @@ def setUp(self):
self.cluster = FunctionalTestCluster(verbose=self._get_test_verbosity())
self._network = Network()

def _create_test_config_file(self, conf_values_to_set=None):
"""
Create a temporary conf file just for this test.
:return: The path to the conf file
:rtype: str
"""
# Copy default conf file to tmp location
repo_dir = path.dirname(path.dirname(path.dirname(path.dirname(path.realpath(__file__)))))
self._conf_template_path = path.join(repo_dir, 'conf', 'default_clusterrunner.conf')
test_conf_file_path = tempfile.NamedTemporaryFile().name
shutil.copy(self._conf_template_path, test_conf_file_path)
os.chmod(test_conf_file_path, ConfigFile.CONFIG_FILE_MODE)
conf_file = ConfigFile(test_conf_file_path)

# Set custom conf file values for this test
conf_values_to_set = conf_values_to_set or {}
for conf_key, conf_value in conf_values_to_set.items():
conf_file.write_value(conf_key, conf_value, BASE_CONFIG_FILE_SECTION)

return test_conf_file_path

def tearDown(self):
# Give the cluster a bit of extra time to finish working (before forcefully killing it and failing the test)
with suppress(TestClusterTimeoutError):
Expand Down Expand Up @@ -158,7 +136,12 @@ def assert_build_artifact_contents_match_expected(self, master_api, build_id, ex
:type expected_build_artifact_contents: list[FSItem]
"""
with tempfile.TemporaryDirectory() as build_artifacts_dir_path:
self._download_and_extract_results(master_api, build_id, build_artifacts_dir_path)
self._download_and_extract_zip_results(master_api, build_id, build_artifacts_dir_path)
self.assert_directory_contents_match_expected(build_artifacts_dir_path, expected_build_artifact_contents)

# Also check the tar archive even though it is deprecated.
with tempfile.TemporaryDirectory() as build_artifacts_dir_path:
self._download_and_extract_tar_results(master_api, build_id, build_artifacts_dir_path)
self.assert_directory_contents_match_expected(build_artifacts_dir_path, expected_build_artifact_contents)

def assert_directory_contents_match_expected(self, dir_path, expected_dir_contents):
Expand All @@ -176,7 +159,7 @@ def assert_directory_contents_match_expected(self, dir_path, expected_dir_conten
expected_build_artifacts = Directory(expected_dir_name, expected_dir_contents)
expected_build_artifacts.assert_matches_path(dir_path, allow_extra_items=False)

def _download_and_extract_results(self, master_api, build_id, download_dir):
def _download_and_extract_tar_results(self, master_api, build_id, download_dir):
"""
:type master_api: app.util.url_builder.UrlBuilder
:type build_id: int
Expand All @@ -193,4 +176,19 @@ def _download_and_extract_results(self, master_api, build_id, download_dir):
for chunk in response.iter_content(chunk_size):
file.write(chunk)

extract_tar(download_filepath, delete=True)
fs.extract_tar(download_filepath, delete=True)

def _download_and_extract_zip_results(self, master_api: UrlBuilder, build_id: int, download_dir: str):
"""Download the artifacts.zip from the master and extract it."""
download_artifacts_url = master_api.url('build', build_id, 'artifacts.zip')
download_filepath = os.path.join(download_dir, BuildArtifact.ARTIFACT_ZIPFILE_NAME)
response = self._network.get(download_artifacts_url)

if response.status_code == http.client.OK:
# save file to disk, decompress, and delete
with open(download_filepath, 'wb') as file:
chunk_size = 500 * 1024
for chunk in response.iter_content(chunk_size):
file.write(chunk)

fs.unzip_directory(download_filepath, delete=True)
1 change: 1 addition & 0 deletions test/functional/test_cluster_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def test_git_type_demo_project_config(self):
for i in range(10)
]
expected_artifact_contents.append(File(BuildArtifact.ARTIFACT_TARFILE_NAME))
expected_artifact_contents.append(File(BuildArtifact.ARTIFACT_ZIPFILE_NAME))

self.assert_build_has_successful_status(build_id=build_id)
self.assert_build_status_contains_expected_data(
Expand Down
8 changes: 7 additions & 1 deletion test/unit/master/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ def setUp(self):
self.mock_util = self.patch('app.master.build.app.util') # stub out util - it often interacts with the fs
self.mock_open = self.patch('app.master.build.open', autospec=False, create=True)
self.mock_listdir = self.patch('os.listdir')
self.patch('tempfile.mktemp')
self.patch('shutil.move')
self.scheduler_pool = BuildSchedulerPool()

def test_allocate_slave_calls_slave_setup(self):
Expand Down Expand Up @@ -430,7 +432,11 @@ def test_get_failed_atoms_returns_failed_atoms_only(self):

def test_delete_temporary_build_artifact_files_skips_results_tarball(self):
build = self._create_test_build(BuildStatus.BUILDING)
self.mock_listdir.return_value = ['some_dir1', BuildArtifact.ARTIFACT_TARFILE_NAME]
self.mock_listdir.return_value = [
'some_dir1',
BuildArtifact.ARTIFACT_TARFILE_NAME,
BuildArtifact.ARTIFACT_ZIPFILE_NAME,
]
expected_async_delete_call_path = join(build._build_results_dir(), 'some_dir1')
self.patch('os.path.isdir').return_value = True
mock_shutil = self.patch('app.master.build.shutil')
Expand Down

0 comments on commit ea71b49

Please sign in to comment.