From f0de09a96483b7f24f12e480afd96a8ed330125a Mon Sep 17 00:00:00 2001 From: Marek Goldmann Date: Mon, 29 Aug 2016 16:06:27 +0200 Subject: [PATCH] Removed directories are added back when it should be replaced by a symlink The issue is that we process symlinks at the end. We do not add symlinks paths to to skipped list and in such case a file added in other layer can sneak in preventing the symlink to be added back. Fixes #120. --- docker_squash/image.py | 63 +++++++++++++++++++++++++++++--------- tests/test_integ_squash.py | 22 +++++++++++++ 2 files changed, 71 insertions(+), 14 deletions(-) diff --git a/docker_squash/image.py b/docker_squash/image.py index 797f674..df5dc27 100644 --- a/docker_squash/image.py +++ b/docker_squash/image.py @@ -569,6 +569,28 @@ def _add_hardlinks(self, squashed_tar, squashed_files, to_skip, skipped_hard_lin squashed_files.append(normalized_name) squashed_tar.addfile(member) + def _add_file(self, member, content, squashed_tar, squashed_files, to_skip): + normalized_name = self._normalize_path(member.name) + + if normalized_name in squashed_files: + self.log.debug("Skipping file '%s' because it is already squashed" % normalized_name) + return + + if self._file_should_be_skipped(normalized_name, to_skip): + self.log.debug( + "Skipping '%s' file because it's on the list to skip files" % normalized_name) + return + + if content: + squashed_tar.addfile(member, content) + else: + # Special case: other(?) files, we skip the file + # itself + squashed_tar.addfile(member) + + # We added a file to the squashed tar, so let's note it + squashed_files.append(normalized_name) + def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links): for layer, symlinks_in_layer in enumerate(skipped_sym_links): # We need to start from 1, that's why we bump it here @@ -602,6 +624,8 @@ def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links self.log.debug("Adding symbolic link '%s' pointing to '%s' back..." % ( normalized_name, normalized_linkname)) + to_skip.append([normalized_name]) + squashed_files.append(normalized_name) squashed_tar.addfile(member) @@ -621,6 +645,7 @@ def _squash_layers(self, layers_to_squash, layers_to_move): skipped_markers = {} skipped_hard_links = [] skipped_sym_links = [] + skipped_files = [] # List of filenames in the squashed archive squashed_files = [] @@ -638,6 +663,7 @@ def _squash_layers(self, layers_to_squash, layers_to_move): members = layer_tar.getmembers() markers = self._marker_files(layer_tar, members) + skipped_sym_link_files = {} skipped_hard_link_files = {} files_to_skip = [] @@ -649,26 +675,23 @@ def _squash_layers(self, layers_to_squash, layers_to_move): files_to_skip.append(self._normalize_path(actual_file)) skipped_markers[marker] = marker_file - to_skip.append(files_to_skip) - self.log.debug( "Searching for symbolic links in '%s' archive..." % layer_tar_file) - skipped_sym_link_files = {} - # Scan for all symlinks in the layer and save them # for later processing. for member in members: if member.issym(): normalized_name = self._normalize_path(member.name) skipped_sym_link_files[normalized_name] = member - continue + to_skip.append(files_to_skip) skipped_sym_links.append(skipped_sym_link_files) self.log.debug("Done, found %s files" % len(skipped_sym_link_files)) + skipped_files_in_layer = {} # Copy all the files to the new tar for member in members: @@ -683,6 +706,18 @@ def _squash_layers(self, layers_to_squash, layers_to_move): "Skipping '%s' marker file, at the end of squashing we'll see if it's necessary to add it back" % normalized_name) continue + if self._file_should_be_skipped(normalized_name, skipped_sym_links): + self.log.debug( + "Skipping '%s' file because it's on a symlink path, at the end of squashing we'll see if it's necessary to add it back" % normalized_name) + + if member.isfile(): + f = (member, layer_tar.extractfile(member)) + else: + f = (member, None) + + skipped_files_in_layer[normalized_name] = f + continue + # Skip files that are marked to be skipped if self._file_should_be_skipped(normalized_name, to_skip): self.log.debug( @@ -704,23 +739,23 @@ def _squash_layers(self, layers_to_squash, layers_to_move): skipped_hard_link_files[normalized_name] = member continue + content = None + if member.isfile(): - # Finally add the file to archive - squashed_tar.addfile( - member, layer_tar.extractfile(member)) - else: - # Special case: other(?) files, we skip the file - # itself - squashed_tar.addfile(member) + content = layer_tar.extractfile(member) - # We added a file to the squashed tar, so let's note it - squashed_files.append(normalized_name) + self._add_file(member, content, squashed_tar, squashed_files, to_skip) skipped_hard_links.append(skipped_hard_link_files) + skipped_files.append(skipped_files_in_layer) self._add_hardlinks(squashed_tar, squashed_files, to_skip, skipped_hard_links) self._add_symlinks(squashed_tar, squashed_files, to_skip, skipped_sym_links) + for layer in skipped_files: + for member, content in six.itervalues(layer): + self._add_file(member, content, squashed_tar, squashed_files, to_skip) + if files_in_layers_to_move: self._add_markers(skipped_markers, squashed_tar, files_in_layers_to_move) diff --git a/tests/test_integ_squash.py b/tests/test_integ_squash.py index 2becb58..9e65d78 100644 --- a/tests/test_integ_squash.py +++ b/tests/test_integ_squash.py @@ -860,6 +860,7 @@ def test_should_not_skip_sym_link(self): container.assertFileExists('dir/dir') container.assertFileExists('newdir/file') + # https://github.com/goldmann/docker-squash/issues/118 def test_should_not_skip_hard_link(self): dockerfile = ''' FROM %s @@ -883,6 +884,7 @@ def test_should_not_skip_hard_link(self): container.assertFileExists('dir/dir') container.assertFileExists('newdir/file') + # https://github.com/goldmann/docker-squash/issues/118 def test_should_not_add_hard_link_if_exists_in_other_squashed_layer(self): dockerfile = ''' FROM %s @@ -896,6 +898,26 @@ def test_should_not_add_hard_link_if_exists_in_other_squashed_layer(self): with self.Container(squashed_image) as container: pass + # https://github.com/goldmann/docker-squash/issues/120 + def test_should_handle_symlinks_to_directory(self): + dockerfile = ''' + FROM %s + RUN mkdir /tmp/dir + RUN touch /tmp/dir/file + RUN set -e ; cd / ; mkdir /data-template ; tar cf - ./tmp/dir/ | ( cd /data-template && tar xf - ) ; mkdir -p $( dirname /tmp/dir ) ; rm -rf /tmp/dir ; ln -sf /data/tmp/dir /tmp/dir + ''' % TestIntegSquash.BUSYBOX_IMAGE + + with self.Image(dockerfile) as image: + with self.SquashedImage(image, 3, numeric=True) as squashed_image: + + with self.Container(squashed_image) as container: + container.assertFileExists('data-template') + container.assertFileExists('data-template/tmp') + container.assertFileExists('data-template/tmp/dir') + container.assertFileExists('data-template/tmp/dir/file') + container.assertFileExists('tmp/dir') + container.assertFileDoesNotExist('tmp/dir/file') + class NumericValues(IntegSquash): @classmethod