diff --git a/decompress_bzip2.go b/decompress_bzip2.go index a5373e4e1..6db0b3577 100644 --- a/decompress_bzip2.go +++ b/decompress_bzip2.go @@ -9,7 +9,12 @@ import ( // Bzip2Decompressor is an implementation of Decompressor that can // decompress bz2 files. -type Bzip2Decompressor struct{} +type Bzip2Decompressor struct { + // FileSizeLimit limits the size of a decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 +} func (d *Bzip2Decompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // Directory isn't supported at all @@ -33,5 +38,5 @@ func (d *Bzip2Decompressor) Decompress(dst, src string, dir bool, umask os.FileM bzipR := bzip2.NewReader(f) // Copy it out - return copyReader(dst, bzipR, 0622, umask) + return copyReader(dst, bzipR, 0622, umask, d.FileSizeLimit) } diff --git a/decompress_gzip.go b/decompress_gzip.go index 669e5eafd..f94f2bcff 100644 --- a/decompress_gzip.go +++ b/decompress_gzip.go @@ -9,7 +9,12 @@ import ( // GzipDecompressor is an implementation of Decompressor that can // decompress gzip files. -type GzipDecompressor struct{} +type GzipDecompressor struct { + // FileSizeLimit limits the size of a decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 +} func (d *GzipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // Directory isn't supported at all @@ -37,5 +42,5 @@ func (d *GzipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMo defer gzipR.Close() // Copy it out - return copyReader(dst, gzipR, 0622, umask) + return copyReader(dst, gzipR, 0622, umask, d.FileSizeLimit) } diff --git a/decompress_tar.go b/decompress_tar.go index ee1d29ca9..b5188c0e5 100644 --- a/decompress_tar.go +++ b/decompress_tar.go @@ -11,12 +11,25 @@ import ( // untar is a shared helper for untarring an archive. The reader should provide // an uncompressed view of the tar archive. -func untar(input io.Reader, dst, src string, dir bool, umask os.FileMode) error { +func untar(input io.Reader, dst, src string, dir bool, umask os.FileMode, fileSizeLimit int64, filesLimit int) error { tarR := tar.NewReader(input) done := false dirHdrs := []*tar.Header{} now := time.Now() + + var ( + fileSize int64 + filesCount int + ) + for { + if filesLimit > 0 { + filesCount++ + if filesCount > filesLimit { + return fmt.Errorf("tar archive contains too many files: %d > %d", filesCount, filesLimit) + } + } + hdr, err := tarR.Next() if err == io.EOF { if !done { @@ -45,7 +58,15 @@ func untar(input io.Reader, dst, src string, dir bool, umask os.FileMode) error path = filepath.Join(path, hdr.Name) } - if hdr.FileInfo().IsDir() { + fileInfo := hdr.FileInfo() + + fileSize += fileInfo.Size() + + if fileSizeLimit > 0 && fileSize > fileSizeLimit { + return fmt.Errorf("tar archive larger than limit: %d", fileSizeLimit) + } + + if fileInfo.IsDir() { if !dir { return fmt.Errorf("expected a single file: %s", src) } @@ -81,8 +102,8 @@ func untar(input io.Reader, dst, src string, dir bool, umask os.FileMode) error // Mark that we're done so future in single file mode errors done = true - // Open the file for writing - err = copyReader(path, tarR, hdr.FileInfo().Mode(), umask) + // Size limit is tracked using the returned file info. + err = copyReader(path, tarR, hdr.FileInfo().Mode(), umask, 0) if err != nil { return err } @@ -127,7 +148,19 @@ func untar(input io.Reader, dst, src string, dir bool, umask os.FileMode) error // TarDecompressor is an implementation of Decompressor that can // unpack tar files. -type TarDecompressor struct{} +type TarDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *TarDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -146,5 +179,5 @@ func (d *TarDecompressor) Decompress(dst, src string, dir bool, umask os.FileMod } defer f.Close() - return untar(f, dst, src, dir, umask) + return untar(f, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) } diff --git a/decompress_tar_test.go b/decompress_tar_test.go index cfc842019..3df60b544 100644 --- a/decompress_tar_test.go +++ b/decompress_tar_test.go @@ -1,10 +1,13 @@ package getter import ( + "archive/tar" + "bytes" "io/ioutil" "os" "path/filepath" "runtime" + "strings" "testing" "time" ) @@ -45,6 +48,86 @@ func TestTar(t *testing.T) { TestDecompressor(t, new(TarDecompressor), cases) } +func TestTarLimits(t *testing.T) { + b := bytes.NewBuffer(nil) + + tw := tar.NewWriter(b) + + var files = []struct { + Name, Body string + }{ + {"readme.txt", "This archive contains some text files."}, + {"gopher.txt", "Gopher names:\nCharlie\nRonald\nGlenn"}, + {"todo.txt", "Get animal handling license."}, + } + + for _, file := range files { + hdr := &tar.Header{ + Name: file.Name, + Mode: 0600, + Size: int64(len(file.Body)), + } + if err := tw.WriteHeader(hdr); err != nil { + t.Fatal(err) + } + if _, err := tw.Write([]byte(file.Body)); err != nil { + t.Fatal(err) + } + } + + if err := tw.Close(); err != nil { + t.Fatal(err) + } + + td, err := ioutil.TempDir("", "getter") + if err != nil { + t.Fatalf("err: %s", err) + } + + tarFilePath := filepath.Join(td, "input.tar") + + err = os.WriteFile(tarFilePath, b.Bytes(), 0666) + if err != nil { + t.Fatalf("err: %s", err) + } + + t.Run("file size limit", func(t *testing.T) { + d := new(TarDecompressor) + + d.FileSizeLimit = 35 + + dst := filepath.Join(td, "subdir", "file-size-limit-result") + + err = d.Decompress(dst, tarFilePath, true, 0022) + + if err == nil { + t.Fatal("expected file size limit to error") + } + + if !strings.Contains(err.Error(), "tar archive larger than limit: 35") { + t.Fatalf("unexpected error: %q", err.Error()) + } + }) + + t.Run("files limit", func(t *testing.T) { + d := new(TarDecompressor) + + d.FilesLimit = 2 + + dst := filepath.Join(td, "subdir", "files-limit-result") + + err = d.Decompress(dst, tarFilePath, true, 0022) + + if err == nil { + t.Fatal("expected files limit to error") + } + + if !strings.Contains(err.Error(), "tar archive contains too many files: 3 > 2") { + t.Fatalf("unexpected error: %q", err.Error()) + } + }) +} + // testDecompressPermissions decompresses a directory and checks the permissions of the expanded files func testDecompressorPermissions(t *testing.T, d Decompressor, input string, expected map[string]int, umask os.FileMode) { td, err := ioutil.TempDir("", "getter") diff --git a/decompress_tbz2.go b/decompress_tbz2.go index e2e5458c9..78609c9ff 100644 --- a/decompress_tbz2.go +++ b/decompress_tbz2.go @@ -8,7 +8,19 @@ import ( // TarBzip2Decompressor is an implementation of Decompressor that can // decompress tar.bz2 files. -type TarBzip2Decompressor struct{} +type TarBzip2Decompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *TarBzip2Decompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -29,5 +41,5 @@ func (d *TarBzip2Decompressor) Decompress(dst, src string, dir bool, umask os.Fi // Bzip2 compression is second bzipR := bzip2.NewReader(f) - return untar(bzipR, dst, src, dir, umask) + return untar(bzipR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) } diff --git a/decompress_tgz.go b/decompress_tgz.go index 84c4aa33d..848f5e372 100644 --- a/decompress_tgz.go +++ b/decompress_tgz.go @@ -9,7 +9,19 @@ import ( // TarGzipDecompressor is an implementation of Decompressor that can // decompress tar.gzip files. -type TarGzipDecompressor struct{} +type TarGzipDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *TarGzipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -35,5 +47,5 @@ func (d *TarGzipDecompressor) Decompress(dst, src string, dir bool, umask os.Fil } defer gzipR.Close() - return untar(gzipR, dst, src, dir, umask) + return untar(gzipR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) } diff --git a/decompress_txz.go b/decompress_txz.go index 24686f454..42f6179a8 100644 --- a/decompress_txz.go +++ b/decompress_txz.go @@ -10,7 +10,19 @@ import ( // TarXzDecompressor is an implementation of Decompressor that can // decompress tar.xz files. -type TarXzDecompressor struct{} +type TarXzDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *TarXzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -35,5 +47,5 @@ func (d *TarXzDecompressor) Decompress(dst, src string, dir bool, umask os.FileM return fmt.Errorf("Error opening an xz reader for %s: %s", src, err) } - return untar(txzR, dst, src, dir, umask) + return untar(txzR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) } diff --git a/decompress_tzst.go b/decompress_tzst.go index a9f3da51e..3b086ced2 100644 --- a/decompress_tzst.go +++ b/decompress_tzst.go @@ -2,14 +2,27 @@ package getter import ( "fmt" - "github.com/klauspost/compress/zstd" "os" "path/filepath" + + "github.com/klauspost/compress/zstd" ) // TarZstdDecompressor is an implementation of Decompressor that can // decompress tar.zstd files. -type TarZstdDecompressor struct{} +type TarZstdDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *TarZstdDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -35,5 +48,5 @@ func (d *TarZstdDecompressor) Decompress(dst, src string, dir bool, umask os.Fil } defer zstdR.Close() - return untar(zstdR, dst, src, dir, umask) + return untar(zstdR, dst, src, dir, umask, d.FileSizeLimit, d.FilesLimit) } diff --git a/decompress_xz.go b/decompress_xz.go index de5d6ce2b..89fafd6bc 100644 --- a/decompress_xz.go +++ b/decompress_xz.go @@ -10,7 +10,12 @@ import ( // XzDecompressor is an implementation of Decompressor that can // decompress xz files. -type XzDecompressor struct{} +type XzDecompressor struct { + // FileSizeLimit limits the size of a decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 +} func (d *XzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // Directory isn't supported at all @@ -36,6 +41,6 @@ func (d *XzDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode return err } - // Copy it out - return copyReader(dst, xzR, 0622, umask) + // Copy it out, potentially using a file size limit. + return copyReader(dst, xzR, 0622, umask, d.FileSizeLimit) } diff --git a/decompress_zip.go b/decompress_zip.go index 943610aee..3ae80f298 100644 --- a/decompress_zip.go +++ b/decompress_zip.go @@ -9,7 +9,19 @@ import ( // ZipDecompressor is an implementation of Decompressor that can // decompress zip files. -type ZipDecompressor struct{} +type ZipDecompressor struct { + // FileSizeLimit limits the total size of all + // decompressed files. + // + // The zero value means no limit. + FileSizeLimit int64 + + // FilesLimit limits the number of files that are + // allowed to be decompressed. + // + // The zero value means no limit. + FilesLimit int +} func (d *ZipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { // If we're going into a directory we should make that first @@ -37,6 +49,12 @@ func (d *ZipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMod return fmt.Errorf("expected a single file: %s", src) } + if d.FilesLimit > 0 && len(zipR.File) > d.FilesLimit { + return fmt.Errorf("zip archive contains too many files: %d > %d", len(zipR.File), d.FilesLimit) + } + + var fileSizeTotal int64 + // Go through and unarchive for _, f := range zipR.File { path := dst @@ -49,7 +67,15 @@ func (d *ZipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMod path = filepath.Join(path, f.Name) } - if f.FileInfo().IsDir() { + fileInfo := f.FileInfo() + + fileSizeTotal += fileInfo.Size() + + if d.FileSizeLimit > 0 && fileSizeTotal > d.FileSizeLimit { + return fmt.Errorf("zip archive larger than limit: %d", d.FileSizeLimit) + } + + if fileInfo.IsDir() { if !dir { return fmt.Errorf("expected a single file: %s", src) } @@ -80,7 +106,8 @@ func (d *ZipDecompressor) Decompress(dst, src string, dir bool, umask os.FileMod return err } - err = copyReader(path, srcF, f.Mode(), umask) + // Size limit is tracked using the returned file info. + err = copyReader(path, srcF, f.Mode(), umask, 0) srcF.Close() if err != nil { return err diff --git a/decompress_zip_test.go b/decompress_zip_test.go index d898da158..b2298231e 100644 --- a/decompress_zip_test.go +++ b/decompress_zip_test.go @@ -4,6 +4,7 @@ import ( "os" "path/filepath" "runtime" + "strings" "testing" ) @@ -129,3 +130,25 @@ func TestDecompressZipPermissions(t *testing.T) { expected["directory/setuid"] = masked testDecompressorPermissions(t, d, input, expected, os.FileMode(060000000)) } + +func TestDecompressZipBomb(t *testing.T) { + // If the zip decompression bomb protection fails, this can fill up disk space on the entire + // computer. + if os.Getenv("GO_GETTER_TEST_ZIP_BOMB") != "true" { + t.Skip("skipping potentially dangerous test without GO_GETTER_TEST_ZIP_BOMB=true") + } + + // https://www.bamsoftware.com/hacks/zipbomb/zblg.zip + srcPath := filepath.Join("./testdata", "decompress-zip", "bomb.zip") + + d := new(ZipDecompressor) + d.FileSizeLimit = 512 + + err := d.Decompress(t.TempDir(), srcPath, true, 0644) + if err == nil { + t.FailNow() + } + if !strings.Contains(err.Error(), "zip archive larger than limit: 512") { + t.Fatalf("unexpected error: %q", err.Error()) + } +} diff --git a/decompress_zstd.go b/decompress_zstd.go index 6ff6c86a9..3922d27bd 100644 --- a/decompress_zstd.go +++ b/decompress_zstd.go @@ -2,14 +2,20 @@ package getter import ( "fmt" - "github.com/klauspost/compress/zstd" "os" "path/filepath" + + "github.com/klauspost/compress/zstd" ) // ZstdDecompressor is an implementation of Decompressor that // can decompress .zst files. -type ZstdDecompressor struct{} +type ZstdDecompressor struct { + // FileSizeLimit limits the size of a decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 +} func (d *ZstdDecompressor) Decompress(dst, src string, dir bool, umask os.FileMode) error { if dir { @@ -35,6 +41,6 @@ func (d *ZstdDecompressor) Decompress(dst, src string, dir bool, umask os.FileMo } defer zstdR.Close() - // Copy it out - return copyReader(dst, zstdR, 0622, umask) + // Copy it out, potentially using a file size limit. + return copyReader(dst, zstdR, 0622, umask, d.FileSizeLimit) } diff --git a/get_file_copy.go b/get_file_copy.go index 6eeda23ca..d6145cbac 100644 --- a/get_file_copy.go +++ b/get_file_copy.go @@ -31,13 +31,17 @@ func Copy(ctx context.Context, dst io.Writer, src io.Reader) (int64, error) { } // copyReader copies from an io.Reader into a file, using umask to create the dst file -func copyReader(dst string, src io.Reader, fmode, umask os.FileMode) error { +func copyReader(dst string, src io.Reader, fmode, umask os.FileMode, fileSizeLimit int64) error { dstF, err := os.OpenFile(dst, os.O_RDWR|os.O_CREATE|os.O_TRUNC, fmode) if err != nil { return err } defer dstF.Close() + if fileSizeLimit > 0 { + src = io.LimitReader(src, fileSizeLimit) + } + _, err = io.Copy(dstF, src) if err != nil { return err diff --git a/get_gcs.go b/get_gcs.go index abf2f1d4f..0c2f96995 100644 --- a/get_gcs.go +++ b/get_gcs.go @@ -25,6 +25,12 @@ type GCSGetter struct { // Timeout sets a deadline which all GCS operations should // complete within. Zero value means no timeout. Timeout time.Duration + + // FileSizeLimit limits the size of an single + // decompressed file. + // + // The zero value means no limit. + FileSizeLimit int64 } func (g *GCSGetter) ClientMode(u *url.URL) (ClientMode, error) { @@ -179,7 +185,8 @@ func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst, return err } - return copyReader(dst, rc, 0666, g.client.umask()) + // There is no limit set for the size of an object from GCS + return copyReader(dst, rc, 0666, g.client.umask(), 0) } func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err error) { diff --git a/get_s3.go b/get_s3.go index 7e0d853ba..94291947c 100644 --- a/get_s3.go +++ b/get_s3.go @@ -23,7 +23,9 @@ type S3Getter struct { getter // Timeout sets a deadline which all S3 operations should - // complete within. Zero value means no timeout. + // complete within. + // + // The zero value means timeout. Timeout time.Duration } @@ -207,7 +209,8 @@ func (g *S3Getter) getObject(ctx context.Context, client *s3.S3, dst, bucket, ke } defer body.Close() - return copyReader(dst, body, 0666, g.client.umask()) + // There is no limit set for the size of an object from S3 + return copyReader(dst, body, 0666, g.client.umask(), 0) } func (g *S3Getter) getAWSConfig(region string, url *url.URL, creds *credentials.Credentials) *aws.Config { diff --git a/testdata/decompress-zip/bomb.zip b/testdata/decompress-zip/bomb.zip new file mode 100644 index 000000000..3bad671b7 Binary files /dev/null and b/testdata/decompress-zip/bomb.zip differ