Skip to content

Commit

Permalink
Ensure that the metadata in the TOC matches the tar-split
Browse files Browse the repository at this point in the history
Signed-off-by: Miloslav Trmač <mitr@redhat.com>
  • Loading branch information
mtrmac committed Jul 17, 2024
1 parent cda2c18 commit 85d7be5
Show file tree
Hide file tree
Showing 10 changed files with 457 additions and 0 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ require (
github.com/tchap/go-patricia/v2 v2.3.1
github.com/ulikunitz/xz v0.5.12
github.com/vbatts/tar-split v0.11.5
golang.org/x/exp v0.0.0-20231006140011-7918f672742d
golang.org/x/sys v0.22.0
gotest.tools v2.2.0+incompatible
)
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI=
golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
Expand Down
134 changes: 134 additions & 0 deletions pkg/chunked/compression_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,16 @@ import (
"errors"
"fmt"
"io"
"maps"
"strconv"
"time"

"github.com/containers/storage/pkg/chunked/internal"
"github.com/klauspost/compress/zstd"
"github.com/klauspost/pgzip"
digest "github.com/opencontainers/go-digest"
"github.com/vbatts/tar-split/archive/tar"
expMaps "golang.org/x/exp/maps"
)

var typesToTar = map[string]byte{
Expand Down Expand Up @@ -221,6 +224,12 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
if err != nil {
return nil, nil, nil, 0, fmt.Errorf("validating and decompressing tar-split: %w", err)
}
// We use the TOC for creating on-disk files, but the tar-split for creating metadata
// when exporting the layer contents. Ensure the two match, otherwise local inspection of a container
// might be misleading about the exported contents.
if err := ensureTOCMatchesTarSplit(toc, decodedTarSplit); err != nil {
return nil, nil, nil, 0, fmt.Errorf("tar-split and TOC data is inconsistent: %w", err)
}
} else if tarSplitChunk.Offset > 0 {
// We must ignore the tar-split when the digest is not present in the TOC, because we can’t authenticate it.
//
Expand All @@ -234,6 +243,131 @@ func readZstdChunkedManifest(blobStream ImageSourceSeekable, tocDigest digest.Di
return decodedBlob, toc, decodedTarSplit, int64(manifestChunk.Offset), err
}

// ensureTOCMatchesTarSplit validates that toc and tarSplit contain _exactly_ the same entries.
func ensureTOCMatchesTarSplit(toc *internal.TOC, tarSplit []byte) error {
pendingFiles := map[string]*internal.FileMetadata{} // Name -> an entry in toc.Entries
for i := range toc.Entries {
e := &toc.Entries[i]
if e.Type != internal.TypeChunk {
if _, ok := pendingFiles[e.Name]; ok {
return fmt.Errorf("TOC contains duplicate entries for path %q", e.Name)
}
pendingFiles[e.Name] = e
}
}

if err := iterateTarSplit(tarSplit, func(hdr *tar.Header) error {
e, ok := pendingFiles[hdr.Name]
if !ok {
return fmt.Errorf("tar-split contains an entry for %q missing in TOC", hdr.Name)
}
delete(pendingFiles, hdr.Name)
expected, err := internal.NewFileMetadata(hdr)
if err != nil {
return fmt.Errorf("determining expected metadata for %q: %w", hdr.Name, err)
}
if err := ensureFileMetadataAttributesMatch(e, &expected); err != nil {
return fmt.Errorf("TOC and tar-split metadata doesn’t match: %w", err)
}

return nil
}); err != nil {
return err
}
if len(pendingFiles) != 0 {
remaining := expMaps.Keys(pendingFiles)
if len(remaining) > 5 {
remaining = remaining[:5] // Just to limit the size of the output.
}
return fmt.Errorf("TOC contains entries not present in tar-split, incl. %q", remaining)
}
return nil
}

// ensureTimePointersMatch ensures that a and b are equal
func ensureTimePointersMatch(a, b *time.Time) error {
// We didn’t always use “timeIfNotZero” when creating the TOC, so treat time.IsZero the same as nil.
// The archive/tar code turns time.IsZero() timestamps into an Unix timestamp of 0 when writing, but turns an Unix timestamp of 0
// when writing into a (local-timezone) Jan 1 1970, which is not IsZero(). So, treat that the same as IsZero as well.
unixZero := time.Unix(0, 0)
if a != nil && (a.IsZero() || a.Equal(unixZero)) {
a = nil
}
if b != nil && (b.IsZero() || b.Equal(unixZero)) {
b = nil
}
switch {
case a == nil && b == nil:
return nil
case a == nil:
return fmt.Errorf("nil != %v", *b)
case b == nil:
return fmt.Errorf("%v != nil", *a)
default:
if a.Equal(*b) {
return nil
}
return fmt.Errorf("%v != %v", *a, *b)
}
}

// ensureFileMetadataAttributesMatch ensures that a and b match in file attributes (it ignores entries relevant to locating data
// in the tar stream or matching contents)
func ensureFileMetadataAttributesMatch(a, b *internal.FileMetadata) error {
// Keep this in sync with internal.FileMetadata!

if a.Type != b.Type {
return fmt.Errorf("mismatch of Type: %q != %q", a.Type, b.Type)
}
if a.Name != b.Name {
return fmt.Errorf("mismatch of Name: %q != %q", a.Name, b.Name)
}
if a.Linkname != b.Linkname {
return fmt.Errorf("mismatch of Linkname: %q != %q", a.Linkname, b.Linkname)
}
if a.Mode != b.Mode {
return fmt.Errorf("mismatch of Mode: %q != %q", a.Mode, b.Mode)
}
if a.Size != b.Size {
return fmt.Errorf("mismatch of Size: %q != %q", a.Size, b.Size)
}
if a.UID != b.UID {
return fmt.Errorf("mismatch of UID: %q != %q", a.UID, b.UID)
}
if a.GID != b.GID {
return fmt.Errorf("mismatch of GID: %q != %q", a.GID, b.GID)
}

if err := ensureTimePointersMatch(a.ModTime, b.ModTime); err != nil {
return fmt.Errorf("mismatch of ModTime: %w", err)
}
if err := ensureTimePointersMatch(a.AccessTime, b.AccessTime); err != nil {
return fmt.Errorf("mismatch of AccessTime: %w", err)
}
if err := ensureTimePointersMatch(a.ChangeTime, b.ChangeTime); err != nil {
return fmt.Errorf("mismatch of ChangeTime: %w", err)
}
if a.Devmajor != b.Devmajor {
return fmt.Errorf("mismatch of Devmajor: %q != %q", a.Devmajor, b.Devmajor)
}
if a.Devminor != b.Devminor {
return fmt.Errorf("mismatch of Devminor: %q != %q", a.Devminor, b.Devminor)
}
if !maps.Equal(a.Xattrs, b.Xattrs) {
return fmt.Errorf("mismatch of Xattrs: %q != %q", a.Xattrs, b.Xattrs)
}

// Digest is not compared
// Offset is not compared
// EndOffset is not compared

// ChunkSize is not compared
// ChunkOffset is not compared
// ChunkDigest is not compared
// ChunkType is not compared
return nil
}

func decodeAndValidateBlob(blob []byte, lengthUncompressed uint64, expectedCompressedChecksum string) ([]byte, error) {
d, err := digest.Parse(expectedCompressedChecksum)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/chunked/internal/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ type TOC struct {
// is used instead of that in the tar stream. The contents of the tar stream
// are not used in this scenario.
type FileMetadata struct {
// If you add any fields, update ensureFileMetadataMatches as well!

// The metadata below largely duplicates that in the tar headers.
Type string `json:"type"`
Name string `json:"name"`
Expand Down
68 changes: 68 additions & 0 deletions pkg/chunked/tar_split_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
package chunked

import (
"bytes"
"fmt"
"io"

"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/storage"
)

// iterateTarSplit calls handler for each tar header in tarSplit
func iterateTarSplit(tarSplit []byte, handler func(hdr *tar.Header) error) error {
// This, strictly speaking, hard-codes undocumented assumptions about how github.com/vbatts/tar-split/tar/asm.NewInputTarStream
// forms the tar-split contents. Pragmatically, NewInputTarStream should always produce storage.FileType entries at least
// for every non-empty file, which constraints it basically to the output we expect.
//
// Specifically, we assume:
// - There is a separate SegmentType entry for every tar header, but only one SegmentType entry for the full header incl. any extensions
// - (There is a FileType entry for every tar header, we ignore it)
// - Trailing padding of a file, if any, is included in the next SegmentType entry
// - At the end, there may be SegmentType entries just for the terminating zero blocks.

unpacker := storage.NewJSONUnpacker(bytes.NewReader(tarSplit))
for {
tsEntry, err := unpacker.Next()
if err != nil {
if err == io.EOF {
return nil
}
return fmt.Errorf("reading tar-split entries: %w", err)
}
switch tsEntry.Type {
case storage.SegmentType:
payload := tsEntry.Payload
// This is horrible, but we don’t know how much padding to skip. (It can be computed from the previous hdr.Size for non-sparse
// files, but for sparse files that is set to the logical size.)
//
// First, assume that all padding is zero bytes.
// A tar header starts with a file name, which might in principle be empty, but
// at least https://github.com/opencontainers/image-spec/blob/main/layer.md#populate-initial-filesystem suggests that
// the tar name should never be empty (it should be ".", or maybe "./").
//
// This will cause us to skip all zero bytes in the trailing blocks, but that’s fine.
i := 0
for i < len(payload) && payload[i] == 0 {
i++
}
payload = payload[i:]
tr := tar.NewReader(bytes.NewReader(payload))
hdr, err := tr.Next()
if err != nil {
if err == io.EOF { // Probably the last entry, but let’s let the unpacker drive that.
break
}
return fmt.Errorf("decoding a tar header from a tar-split entry: %w", err)
}
if err := handler(hdr); err != nil {
return err
}

case storage.FileType:
// Nothing
default:
return fmt.Errorf("unexpected tar-split entry type %q", tsEntry.Type)
}
}
}
104 changes: 104 additions & 0 deletions pkg/chunked/tar_split_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package chunked

import (
"bytes"
"fmt"
"io"
"testing"
"time"

"github.com/containers/storage/pkg/chunked/internal"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/vbatts/tar-split/archive/tar"
"github.com/vbatts/tar-split/tar/asm"
"github.com/vbatts/tar-split/tar/storage"
)

func createTestTarheader(index int, typeFlag byte, size int64) tar.Header {
n := (index + 1) * 100 // Use predictable, but distinct, values for all headers

res := tar.Header{
Typeflag: typeFlag,
Name: fmt.Sprintf("name%d", n),
Size: size,
Mode: int64(n + 1),
Uid: n + 2,
Gid: n + 3,
Uname: fmt.Sprintf("user%d", n),
Gname: fmt.Sprintf("group%d", n),
ModTime: time.Unix(int64(n+4), 0),
AccessTime: time.Unix(int64(n+5), 0),
ChangeTime: time.Unix(int64(n+6), 0),
PAXRecords: map[string]string{fmt.Sprintf("key%d", n): fmt.Sprintf("value%d", n)},
Format: tar.FormatPAX, // We must set a format, in the default one AccessTime and ChangeTime are discarded.
}
switch res.Typeflag {
case tar.TypeLink, tar.TypeSymlink:
res.Linkname = fmt.Sprintf("link%d", n)
case tar.TypeChar, tar.TypeBlock:
res.Devmajor = int64(n + 7)
res.Devminor = int64(n + 8)
}
return res
}

func TestIterateTarSplit(t *testing.T) {
entries := []struct {
typeFlag byte
size int64
}{
{tar.TypeReg, 0},
{tar.TypeReg, 1},
{tar.TypeReg, 511},
{tar.TypeReg, 512},
{tar.TypeReg, 513},
{tar.TypeLink, 0},
{tar.TypeSymlink, 0},
{tar.TypeChar, 0},
{tar.TypeBlock, 0},
{tar.TypeDir, 0},
{tar.TypeFifo, 0},
}

var tarball bytes.Buffer
var expected []tar.Header
w := tar.NewWriter(&tarball)
for i, e := range entries {
hdr := createTestTarheader(i, e.typeFlag, e.size)
err := w.WriteHeader(&hdr)
require.NoError(t, err)
data := make([]byte, e.size)
_, err = w.Write(data)
require.NoError(t, err)
expected = append(expected, hdr)
}
err := w.Close()
require.NoError(t, err)

var tarSplit bytes.Buffer
tsReader, err := asm.NewInputTarStream(&tarball, storage.NewJSONPacker(&tarSplit), storage.NewDiscardFilePutter())
require.NoError(t, err)
_, err = io.Copy(io.Discard, tsReader)
require.NoError(t, err)

var actual []tar.Header
err = iterateTarSplit(tarSplit.Bytes(), func(hdr *tar.Header) error {
actual = append(actual, *hdr)
return nil
})
require.NoError(t, err)

assert.Equal(t, len(expected), len(actual))
for i := range expected {
// We would have to open-code an equality comparison of time.Time values; instead, convert to FileMetadata,
// because we already have that implemented for that type — and because it provides a tiny bit of code coverage
// testing for ensureFileMetadataAttributesMatch.
expected1, err := internal.NewFileMetadata(&expected[i])
require.NoError(t, err, i)
actual1, err := internal.NewFileMetadata(&actual[i])
require.NoError(t, err, i)
err = ensureFileMetadataAttributesMatch(&expected1, &actual1)
assert.NoError(t, err, i)
}
}
27 changes: 27 additions & 0 deletions vendor/golang.org/x/exp/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 85d7be5

Please sign in to comment.