Skip to content

Commit

Permalink
Do not crash master when postbuild tasks fail (#341)
Browse files Browse the repository at this point in the history
Previously if an exception occurred during postbuild tasks
(archiving artifacts, writing timing data, etc.) for any build then
the master service would shut down. Nonsense!

This changes the behavior to merely fail the build whose postbuild
tasks failed.
  • Loading branch information
josephharrington authored May 15, 2017
1 parent fe1ce19 commit cef5e93
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 11 deletions.
28 changes: 17 additions & 11 deletions app/master/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from queue import Queue, Empty
import shutil
import tempfile
from threading import Lock
from threading import Lock, Thread
import time
import uuid

Expand All @@ -19,7 +19,6 @@
import app.util.fs
from app.util.log import get_logger
from app.util.single_use_coin import SingleUseCoin
from app.util.safe_thread import SafeThread


class Build(object):
Expand Down Expand Up @@ -256,8 +255,7 @@ def _mark_subjob_complete(self, subjob_id):
# We use a local variable here which was set inside the _build_completion_lock to prevent a race condition
if should_trigger_postbuild_tasks:
self._logger.info("All results received for build {}!", self._build_id)
# todo: This should not be a SafeThread. https://github.com/box/ClusterRunner/issues/323
SafeThread(target=self._perform_async_postbuild_tasks, name='PostBuild{}'.format(self._build_id)).start()
self.finish()

def mark_started(self):
"""
Expand All @@ -269,8 +267,10 @@ def finish(self):
"""
Perform postbuild task and mark this build as finished.
"""
# This method also transitions the FSM to finished after the postbuild tasks are complete.
self._perform_async_postbuild_tasks()
Thread(
target=self._perform_async_postbuild_tasks,
name='PostBuild{}'.format(self._build_id),
).start()

def mark_failed(self, failure_reason):
"""
Expand Down Expand Up @@ -454,12 +454,18 @@ def _result(self):

def _perform_async_postbuild_tasks(self):
"""
Once a build is complete, certain tasks can be performed asynchronously.
Once a build is complete, execute certain tasks like archiving the artifacts and writing timing
data. This method also transitions the FSM to finished after the postbuild tasks are complete.
"""
self._create_build_artifact()
self._delete_temporary_build_artifact_files()
self._postbuild_tasks_are_finished = True
self._state_machine.trigger(BuildEvent.POSTBUILD_TASKS_COMPLETE)
try:
self._create_build_artifact()
self._delete_temporary_build_artifact_files()
self._postbuild_tasks_are_finished = True
self._state_machine.trigger(BuildEvent.POSTBUILD_TASKS_COMPLETE)

except Exception as ex: # pylint: disable=broad-except
self._logger.exception('Postbuild tasks failed for build {}.'.format(self._build_id))
self.mark_failed('Postbuild tasks failed due to an internal error: "{}"'.format(ex))

def _create_build_artifact(self):
self._build_artifact = BuildArtifact(self._build_results_dir())
Expand Down
7 changes: 7 additions & 0 deletions test/unit/master/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,13 @@ def test_delete_temporary_build_artifact_files_skips_results_tarball(self):

mock_shutil.rmtree.assert_called_once_with(expected_async_delete_call_path, ignore_errors=True)

def test_exception_during_postbuild_tasks_fails_build(self):
self.mock_util.fs.zip_directory.side_effect = FileNotFoundError('Where my files at?')

build = self._create_test_build(BuildStatus.FINISHED)

self.assertEqual(build._status(), BuildStatus.ERROR)

def _create_test_build(
self,
build_status=None,
Expand Down

0 comments on commit cef5e93

Please sign in to comment.