Skip to content

Commit

Permalink
don't load files >1 GiB into memory when creating CAR file (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
makew0rld committed Jun 10, 2024
1 parent 70ffe24 commit b369433
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 8 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -221,3 +221,5 @@ require (
google.golang.org/protobuf v1.34.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

replace github.com/photon-storage/go-ipfs-car => github.com/starlinglab/go-ipfs-car v0.0.0-20240610181106-ba9c16cb219f
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,6 @@ github.com/photon-storage/fastssz v0.0.0-20220401135229-47aa49fe839f h1:9665mStR
github.com/photon-storage/fastssz v0.0.0-20220401135229-47aa49fe839f/go.mod h1:Hmk4z6NsD9YtCrJE1P1BL6hQ3pWVuCzRE9oL7FMs/C0=
github.com/photon-storage/go-common v0.0.0-20230615034818-acdf765cb510 h1:UU6Nu7q8gDfibaoYa/yEb8Il3zJtQG2xwyRqmRWOKno=
github.com/photon-storage/go-common v0.0.0-20230615034818-acdf765cb510/go.mod h1:esL6le/G7T9W1HjHvMZ+N+WciU/J9qKoF+ZS+GqwCDM=
github.com/photon-storage/go-ipfs-car v0.0.0-20240530014616-17d95f03173f h1:tYNSr+4Kk6v39FSSGfPI7iTqwAM0LRFRFT/r3tBT7ng=
github.com/photon-storage/go-ipfs-car v0.0.0-20240530014616-17d95f03173f/go.mod h1:j6/Ty3JkeZhIekjoqyUnTlFRyn/MswRsK2P6aiAWYo4=
github.com/photon-storage/photon-proto v0.0.0-20220806134259-8b3f28ad0258 h1:jscyag7xBM2jGmcpz/fyfcXjzQZVtLamCpzS6Fj/+yM=
github.com/photon-storage/photon-proto v0.0.0-20220806134259-8b3f28ad0258/go.mod h1:2ugydRJbn8v57ixOXz3TvC4VVlxL913c5HqmN024B9M=
github.com/pion/datachannel v1.5.6 h1:1IxKJntfSlYkpUj8LlYRSWpYiTTC02nUrOE8T3DqGeg=
Expand Down Expand Up @@ -721,6 +719,8 @@ github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb6
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s=
github.com/src-d/envconfig v1.0.0/go.mod h1:Q9YQZ7BKITldTBnoxsE5gOeB5y66RyPXeue/R4aaNBc=
github.com/starlinglab/go-ipfs-car v0.0.0-20240610181106-ba9c16cb219f h1:zWcd5THZiSI3dJVd9YkBy49NnURlXj2QOfvaa45f34s=
github.com/starlinglab/go-ipfs-car v0.0.0-20240610181106-ba9c16cb219f/go.mod h1:j6/Ty3JkeZhIekjoqyUnTlFRyn/MswRsK2P6aiAWYo4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
Expand Down
11 changes: 9 additions & 2 deletions upload/web3.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func uploadWeb3(space string, cidPaths []string) error {

// Warn for what util.GetCAR might do
fmt.Fprintln(os.Stderr,
"warning: the whole file may be loaded into memory to create a CAR file for upload")
"warning: the whole file will be loaded into memory by w3 for upload")

for i, cidPath := range cidPaths {
// Use anon func to allow for safe idiomatic usage of `defer`
Expand All @@ -52,10 +52,17 @@ func uploadWeb3(space string, cidPaths []string) error {
}
defer cidF.Close()

car, err := util.GetCAR(cidF)
fi, err := cidF.Stat()
if err != nil {
return fmt.Errorf("error getting CID file info: %w", err)
}

// Hold file in memory for CAR creation, unless it's larger than 1 GiB
car, err := util.GetCAR(cidF, fi.Size() > 1<<30)
if err != nil {
return fmt.Errorf("error calculating CAR data: %w", err)
}
defer util.RemoveCarTmpDatastore()

Check failure on line 65 in upload/web3.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `util.RemoveCarTmpDatastore` is not checked (errcheck)

// Make sure CID hasn't changed
if car.Root().String() != filepath.Base(cidPath) {
Expand Down
25 changes: 21 additions & 4 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"io"
"net/http"
"os"
"path/filepath"

car "github.com/photon-storage/go-ipfs-car"
)
Expand Down Expand Up @@ -48,13 +49,29 @@ func MoveFile(sourcePath, destPath string) error {
// CIDv1 represented by the CAR file should exactly match the CIDv1 from CalculateFileCid
// or IPFS kubo every time.
//
// This function loads the whole file into memory. Watch out!
// https://github.com/starlinglab/integrity-v2/issues/17
func GetCAR(r io.Reader) (*car.CarV1, error) {
b := car.NewBuilder()
// Set useDisk to control whether this function holds the read bytes all in memory
// or stores them on the disk temporarily.
//
// If useDisk is true, the caller should call RemoveCarTmpDatastore once they are done
// with the returned *car.CarV1 struct. This will clear the datastore from the disk.
func GetCAR(r io.Reader, useDisk bool) (*car.CarV1, error) {
var b *car.Builder
if useDisk {
var err error
b, err = car.NewBuilderDisk(filepath.Join(TempDir(), "car-datastore"))
if err != nil {
return nil, err
}
} else {
b = car.NewBuilder()
}
return b.Buildv1(context.Background(), r, car.ImportOpts.CIDv1())
}

func RemoveCarTmpDatastore() error {
return os.RemoveAll(filepath.Join(TempDir(), "car-datastore"))
}

// GuessMediaType guesses the media type of a file based on its contents.
// The 'media_type' attribute should be preferred over this method.
func GuessMediaType(path string) (string, error) {
Expand Down

0 comments on commit b369433

Please sign in to comment.