Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add archive extraction support for http(s) #24

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 193 additions & 0 deletions pullman/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,61 @@
package pullman

import (
"archive/tar"
"archive/zip"
"bufio"
"bytes"
"compress/gzip"
"fmt"
"hash/fnv"
"io"
"os"
"path/filepath"
"strings"
)

type FileFormat struct {
MagicBytes []byte
Offset int
Extension string
}

// Magic byte values pulled from: https://en.wikipedia.org/wiki/List_of_file_signatures
var fileFormats = []FileFormat{
{
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x00, 0x30, 0x30},
Offset: 257,
Extension: "tar",
},
{
MagicBytes: []byte{0x75, 0x73, 0x74, 0x61, 0x72, 0x20, 0x20, 0x00},
Offset: 257,
Extension: "tar",
},
{
MagicBytes: []byte{0x1F, 0x8B},
Offset: 0,
Extension: "gz",
},
{
MagicBytes: []byte{0x50, 0x4B, 0x03, 0x04},
Offset: 0,
Extension: "zip",
},

{
MagicBytes: []byte{0x50, 0x4B, 0x05, 0x06},
Offset: 0,
Extension: "zip",
},

{
MagicBytes: []byte{0x50, 0x4B, 0x07, 0x08},
Offset: 0,
Extension: "zip",
},
}

// OpenFile will check the path and the filesystem for mismatch errors
func OpenFile(path string) (*os.File, error) {
// resource paths need to be compatible with a local filesystem download
Expand Down Expand Up @@ -57,3 +106,147 @@ func HashStrings(strings ...string) string {

return fmt.Sprintf("%#x", h.Sum64())
}

// Extract a zip file into the provided destination directory.
func ExtractZip(filePath string, dest string) error {
zipReader, err := zip.OpenReader(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer zipReader.Close()

prefix := filepath.Clean(dest) + string(os.PathSeparator)
for _, zipFileEntry := range zipReader.File {
destFilePath := filepath.Join(dest, zipFileEntry.Name)

// Zip slip vulnerability check
if !strings.HasPrefix(destFilePath, prefix) {
return fmt.Errorf("%s: illegal file path", destFilePath)
}

if zipFileEntry.FileInfo().IsDir() {
err = os.MkdirAll(destFilePath, 0755)
if err != nil {
return fmt.Errorf("error creating new directory %s", destFilePath)
}
continue
}

file, fileErr := OpenFile(destFilePath)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
}
defer file.Close()

zippedRc, err := zipFileEntry.Open()
if err != nil {
return fmt.Errorf("error opening zip file entry: %w", err)
}
defer zippedRc.Close()

if _, err = io.Copy(file, zippedRc); err != nil {
return fmt.Errorf("error writing zip resource to local file '%s': %w", destFilePath, err)
}

}
return nil
}

// Extract a tar archive file into the provided destination directory.
func ExtractTar(filePath string, dest string) error {
tarFile, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer tarFile.Close()

tr := tar.NewReader(tarFile)
for {
header, err := tr.Next()

if err == io.EOF {
break
}

if err != nil {
return fmt.Errorf("error reading tar archive entry: %w", err)
}

if header == nil {
continue
}

destFilePath := filepath.Join(dest, header.Name)
if header.Typeflag == tar.TypeDir {
err = os.MkdirAll(destFilePath, 0755)
if err != nil {
return fmt.Errorf("error creating new directory %s", destFilePath)
}
continue
}

file, fileErr := OpenFile(destFilePath)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", destFilePath, fileErr)
}
defer file.Close()
if _, err = io.Copy(file, tr); err != nil {
return fmt.Errorf("error writing tar resource to local file '%s': %w", destFilePath, err)
}
}
return nil
}

// Extract a gzip compressed file into the provided destination file path.
func ExtractGzip(filePath string, dest string) error {
gzipFile, err := os.Open(filePath)
if err != nil {
return fmt.Errorf("unable to open '%s' for reading: %w", filePath, err)
}
defer gzipFile.Close()
gzr, err := gzip.NewReader(gzipFile)
if err != nil {
return fmt.Errorf("unable to create gzip reader: %w", err)
}
defer gzr.Close()

file, fileErr := OpenFile(dest)
if fileErr != nil {
return fmt.Errorf("unable to open local file '%s' for writing: %w", dest, fileErr)
}
defer file.Close()

if _, err = io.Copy(file, gzr); err != nil {
return fmt.Errorf("error writing gzip resource to local file '%s': %w", dest, err)
}

return nil
}

// Get the file type based on the first few hundred bytes of the stream.
// If the file isn't one of the expected formats, nil is returned.
// If an error occurs while determining the file format, nil is returned.
func GetFileFormat(filePath string) *FileFormat {

file, err := os.Open(filePath)
if err != nil {
return nil
}
defer file.Close()

r := bufio.NewReader(file)

// Due to the tar magic bytes offset, this is the minimum number of bytes we need to read.
numBytes := 264
fileBytes, err := r.Peek(numBytes)
if err != nil {
return nil
}

for _, format := range fileFormats {
if bytes.Equal(fileBytes[format.Offset:format.Offset+len(format.MagicBytes)], format.MagicBytes) {
return &format
}
}
return nil
}
Loading