From f8e9367632a4b174c6afbdc8355155432d8c4bd2 Mon Sep 17 00:00:00 2001 From: Stefan Sundin Date: Sat, 13 Nov 2021 17:44:55 -0800 Subject: [PATCH] Add -part-size which lets you set the part size on your own. Only use this if you know what you are doing. --- README.md | 2 ++ main.go | 44 +++++++++++++++++++++++++++++--------------- utils.go | 21 +++++++++++++++++++++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f281a60..fd9424c 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,8 @@ Parameters: The account ID of the expected bucket owner. -metadata string A map of metadata to store with the object in S3. (JSON syntax is not supported) + -part-size string + Override automatic part size. (e.g. "128m") -profile string Use a specific profile from your credential file. -storage-class string diff --git a/main.go b/main.go index 18e82eb..226b231 100644 --- a/main.go +++ b/main.go @@ -49,10 +49,11 @@ func main() { } func run() (int, error) { - var profile, bwlimit, cacheControl, contentDisposition, contentEncoding, contentLanguage, contentType, expectedBucketOwner, tagging, storageClass, metadata string + var profile, bwlimit, partSizeRaw, cacheControl, contentDisposition, contentEncoding, contentLanguage, contentType, expectedBucketOwner, tagging, storageClass, metadata string var versionFlag bool flag.StringVar(&profile, "profile", "", "Use a specific profile from your credential file.") flag.StringVar(&bwlimit, "bwlimit", "", "Bandwidth limit. (e.g. \"2.5m\")") + flag.StringVar(&partSizeRaw, "part-size", "", "Override automatic part size. (e.g. \"128m\")") flag.StringVar(&cacheControl, "cache-control", "", "Specifies caching behavior for the object.") flag.StringVar(&contentDisposition, "content-disposition", "", "Specifies presentational information for the object.") flag.StringVar(&contentEncoding, "content-encoding", "", "Specifies what content encodings have been applied to the object.") @@ -168,25 +169,38 @@ func run() (int, error) { } fileSize := stat.Size() fmt.Printf("File size: %s\n", formatFilesize(fileSize)) + if fileSize > 5*TiB { + fmt.Println("Warning: File size is greater than 5 TiB. At the time of writing 5 TiB is the maximum object size.") + fmt.Println("This program is not stopping you from proceeding in case the limit has been increased, but be warned!") + } - // Detect best part size - // Double the part size until the file fits in 10,000 parts. - // The minimum part size is 5 MiB (except for the last part), although shrimp starts at 8 MiB (like the aws cli). - // The maximum part size is 5 GiB, which would in theory allow 50000 GiB (~48.8 TiB) in 10,000 parts. - // The aws cli follows a very similar algorithm: https://github.com/boto/s3transfer/blob/0.5.0/s3transfer/utils.py#L711-L763 var partSize int64 = 8 * MiB - for 10000*partSize < fileSize { - partSize *= 2 - } - if partSize > 5*GiB { - partSize = 5 * GiB + if partSizeRaw != "" { + var err error + partSize, err = parseFilesize(partSizeRaw) + if err != nil { + return 1, err + } + } else { + // Detect best part size + // Double the part size until the file fits in 10,000 parts. + // The minimum part size is 5 MiB (except for the last part), although shrimp starts at 8 MiB (like the aws cli). + // The maximum part size is 5 GiB, which would in theory allow 50000 GiB (~48.8 TiB) in 10,000 parts. + // The aws cli follows a very similar algorithm: https://github.com/boto/s3transfer/blob/0.5.0/s3transfer/utils.py#L711-L763 + // var partSize int64 = 8 * MiB + for 10000*partSize < fileSize { + partSize *= 2 + } + if partSize > 5*GiB { + partSize = 5 * GiB + } } fmt.Printf("Part size: %s\n", formatFilesize(partSize)) - fmt.Printf("The upload will consist of %d parts.\n", int64(math.Ceil(float64(fileSize)/float64(partSize)))) - if fileSize > 5*TiB { - fmt.Println("Warning: File size is greater than 5 TiB. At the time of writing 5 TiB is the maximum object size.") - fmt.Println("This program is not stopping you from proceeding in case the limit has been increased, but be warned!") + if partSize < 5*MiB || partSize > 5*GiB { + fmt.Println("Warning: Part size is not in the allowed limits (must be between 5 MiB - 5 GiB).") + fmt.Println("This program is not stopping you from proceeding in case the limits have changed, but be warned!") } + fmt.Printf("The upload will consist of %d parts.\n", int64(math.Ceil(float64(fileSize)/float64(partSize)))) if 10000*partSize < fileSize { fmt.Println("Warning: File size is too large to be transferred in 10,000 parts!") } diff --git a/utils.go b/utils.go index 9d56537..17463ee 100644 --- a/utils.go +++ b/utils.go @@ -61,6 +61,27 @@ func parseRate(s string) (int64, error) { return int64(math.Round(f * float64(factor))), nil } +func parseFilesize(s string) (int64, error) { + factor := 1 + suffix := s[len(s)-1] + if suffix == 'k' || suffix == 'K' { + factor = kiB + } else if suffix == 'm' || suffix == 'M' { + factor = MiB + } else if suffix == 'g' || suffix == 'G' { + factor = GiB + } + if factor != 1 { + s = s[0 : len(s)-1] + } + + f, err := strconv.ParseFloat(s, 64) + if err != nil { + return 0, err + } + return int64(math.Round(f * float64(factor))), nil +} + func parseMetadata(s string) (map[string]string, error) { m := make(map[string]string) for _, kv := range strings.Split(s, ",") {