Skip to content

Commit

Permalink
Merge pull request #121 from goldmann/gh-120-symlink-directory
Browse files Browse the repository at this point in the history
Removed directories are added back when it should be replaced by a symlink
  • Loading branch information
goldmann committed Aug 30, 2016
2 parents e13765a + f0de09a commit 075cd42
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 14 deletions.
63 changes: 49 additions & 14 deletions docker_squash/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,6 +569,28 @@ def _add_hardlinks(self, squashed_tar, squashed_files, to_skip, skipped_hard_lin
squashed_files.append(normalized_name)
squashed_tar.addfile(member)

def _add_file(self, member, content, squashed_tar, squashed_files, to_skip):
normalized_name = self._normalize_path(member.name)

if normalized_name in squashed_files:
self.log.debug("Skipping file '%s' because it is already squashed" % normalized_name)
return

if self._file_should_be_skipped(normalized_name, to_skip):
self.log.debug(
"Skipping '%s' file because it's on the list to skip files" % normalized_name)
return

if content:
squashed_tar.addfile(member, content)
else:
# Special case: other(?) files, we skip the file
# itself
squashed_tar.addfile(member)

# We added a file to the squashed tar, so let's note it
squashed_files.append(normalized_name)

def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links):
for layer, symlinks_in_layer in enumerate(skipped_sym_links):
# We need to start from 1, that's why we bump it here
Expand Down Expand Up @@ -602,6 +624,8 @@ def _add_symlinks(self, squashed_tar, squashed_files, to_skip, skipped_sym_links
self.log.debug("Adding symbolic link '%s' pointing to '%s' back..." % (
normalized_name, normalized_linkname))

to_skip.append([normalized_name])

squashed_files.append(normalized_name)
squashed_tar.addfile(member)

Expand All @@ -621,6 +645,7 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
skipped_markers = {}
skipped_hard_links = []
skipped_sym_links = []
skipped_files = []
# List of filenames in the squashed archive
squashed_files = []

Expand All @@ -638,6 +663,7 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
members = layer_tar.getmembers()
markers = self._marker_files(layer_tar, members)

skipped_sym_link_files = {}
skipped_hard_link_files = {}
files_to_skip = []

Expand All @@ -649,26 +675,23 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
files_to_skip.append(self._normalize_path(actual_file))
skipped_markers[marker] = marker_file

to_skip.append(files_to_skip)

self.log.debug(
"Searching for symbolic links in '%s' archive..." % layer_tar_file)

skipped_sym_link_files = {}

# Scan for all symlinks in the layer and save them
# for later processing.
for member in members:
if member.issym():
normalized_name = self._normalize_path(member.name)
skipped_sym_link_files[normalized_name] = member

continue

to_skip.append(files_to_skip)
skipped_sym_links.append(skipped_sym_link_files)

self.log.debug("Done, found %s files" %
len(skipped_sym_link_files))
skipped_files_in_layer = {}

# Copy all the files to the new tar
for member in members:
Expand All @@ -683,6 +706,18 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
"Skipping '%s' marker file, at the end of squashing we'll see if it's necessary to add it back" % normalized_name)
continue

if self._file_should_be_skipped(normalized_name, skipped_sym_links):
self.log.debug(
"Skipping '%s' file because it's on a symlink path, at the end of squashing we'll see if it's necessary to add it back" % normalized_name)

if member.isfile():
f = (member, layer_tar.extractfile(member))
else:
f = (member, None)

skipped_files_in_layer[normalized_name] = f
continue

# Skip files that are marked to be skipped
if self._file_should_be_skipped(normalized_name, to_skip):
self.log.debug(
Expand All @@ -704,23 +739,23 @@ def _squash_layers(self, layers_to_squash, layers_to_move):
skipped_hard_link_files[normalized_name] = member
continue

content = None

if member.isfile():
# Finally add the file to archive
squashed_tar.addfile(
member, layer_tar.extractfile(member))
else:
# Special case: other(?) files, we skip the file
# itself
squashed_tar.addfile(member)
content = layer_tar.extractfile(member)

# We added a file to the squashed tar, so let's note it
squashed_files.append(normalized_name)
self._add_file(member, content, squashed_tar, squashed_files, to_skip)

skipped_hard_links.append(skipped_hard_link_files)
skipped_files.append(skipped_files_in_layer)

self._add_hardlinks(squashed_tar, squashed_files, to_skip, skipped_hard_links)
self._add_symlinks(squashed_tar, squashed_files, to_skip, skipped_sym_links)

for layer in skipped_files:
for member, content in six.itervalues(layer):
self._add_file(member, content, squashed_tar, squashed_files, to_skip)

if files_in_layers_to_move:
self._add_markers(skipped_markers, squashed_tar,
files_in_layers_to_move)
Expand Down
22 changes: 22 additions & 0 deletions tests/test_integ_squash.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,7 @@ def test_should_not_skip_sym_link(self):
container.assertFileExists('dir/dir')
container.assertFileExists('newdir/file')

# https://github.com/goldmann/docker-squash/issues/118
def test_should_not_skip_hard_link(self):
dockerfile = '''
FROM %s
Expand All @@ -883,6 +884,7 @@ def test_should_not_skip_hard_link(self):
container.assertFileExists('dir/dir')
container.assertFileExists('newdir/file')

# https://github.com/goldmann/docker-squash/issues/118
def test_should_not_add_hard_link_if_exists_in_other_squashed_layer(self):
dockerfile = '''
FROM %s
Expand All @@ -896,6 +898,26 @@ def test_should_not_add_hard_link_if_exists_in_other_squashed_layer(self):
with self.Container(squashed_image) as container:
pass

# https://github.com/goldmann/docker-squash/issues/120
def test_should_handle_symlinks_to_directory(self):
dockerfile = '''
FROM %s
RUN mkdir /tmp/dir
RUN touch /tmp/dir/file
RUN set -e ; cd / ; mkdir /data-template ; tar cf - ./tmp/dir/ | ( cd /data-template && tar xf - ) ; mkdir -p $( dirname /tmp/dir ) ; rm -rf /tmp/dir ; ln -sf /data/tmp/dir /tmp/dir
''' % TestIntegSquash.BUSYBOX_IMAGE

with self.Image(dockerfile) as image:
with self.SquashedImage(image, 3, numeric=True) as squashed_image:

with self.Container(squashed_image) as container:
container.assertFileExists('data-template')
container.assertFileExists('data-template/tmp')
container.assertFileExists('data-template/tmp/dir')
container.assertFileExists('data-template/tmp/dir/file')
container.assertFileExists('tmp/dir')
container.assertFileDoesNotExist('tmp/dir/file')


class NumericValues(IntegSquash):
@classmethod
Expand Down

0 comments on commit 075cd42

Please sign in to comment.