Skip to content

Commit 6203b23

Browse files
authored
Fix mpu part size round off error for large files (#289)
1 parent b19d924 commit 6203b23

File tree

5 files changed

+224
-38
lines changed

5 files changed

+224
-38
lines changed

include/aws/s3/private/s3_util.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,20 @@ int aws_s3_parse_content_length_response_header(
236236
AWS_S3_API
237237
uint32_t aws_s3_get_num_parts(size_t part_size, uint64_t object_range_start, uint64_t object_range_end);
238238

239+
/**
240+
* Calculates the optimal part size and num parts given the 'content_length' and 'client_part_size'.
241+
* This will increase the part size to stay within S3's number of parts.
242+
* If the required part size exceeds the 'client_max_part_size' or
243+
* if the system cannot support the required part size, it will raise an 'AWS_ERROR_INVALID_ARGUMENT' argument.
244+
*/
245+
AWS_S3_API
246+
int aws_s3_calculate_optimal_mpu_part_size_and_num_parts(
247+
uint64_t content_length,
248+
size_t client_part_size,
249+
uint64_t client_max_part_size,
250+
size_t *out_part_size,
251+
uint32_t *out_num_parts);
252+
239253
/* Calculates the part range for a part given overall object range, size of each part, and the part's number. Note: part
240254
* numbers begin at one. This takes into account aligning part-ranges on part_size. Intended to be used in conjunction
241255
* with aws_s3_get_num_parts. part_number should be less than or equal to the result of aws_s3_get_num_parts. */

source/s3_client.c

Lines changed: 4 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,47 +1040,13 @@ static struct aws_s3_meta_request *s_s3_client_meta_request_factory_default(
10401040
}
10411041
}
10421042

1043-
uint64_t part_size_uint64 = content_length / (uint64_t)g_s3_max_num_upload_parts;
1044-
1045-
if (part_size_uint64 > SIZE_MAX) {
1046-
AWS_LOGF_ERROR(
1047-
AWS_LS_S3_META_REQUEST,
1048-
"Could not create auto-ranged-put meta request; required part size of %" PRIu64
1049-
" bytes is too large for platform.",
1050-
part_size_uint64);
1051-
1052-
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
1053-
return NULL;
1054-
}
1055-
1056-
size_t part_size = (size_t)part_size_uint64;
1057-
1058-
if (part_size > client_max_part_size) {
1059-
AWS_LOGF_ERROR(
1060-
AWS_LS_S3_META_REQUEST,
1061-
"Could not create auto-ranged-put meta request; required part size for put request is %" PRIu64
1062-
", but current maximum part size is %" PRIu64,
1063-
(uint64_t)part_size,
1064-
(uint64_t)client_max_part_size);
1065-
aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
1043+
size_t part_size;
1044+
uint32_t num_parts;
1045+
if (aws_s3_calculate_optimal_mpu_part_size_and_num_parts(
1046+
content_length, client_part_size, client_max_part_size, &part_size, &num_parts)) {
10661047
return NULL;
10671048
}
10681049

1069-
if (part_size < client_part_size) {
1070-
part_size = client_part_size;
1071-
}
1072-
if (content_length < part_size) {
1073-
/* When the content length is smaller than part size and larger than the threshold, we set one part
1074-
* with the whole length */
1075-
part_size = (size_t)content_length;
1076-
}
1077-
1078-
uint32_t num_parts = (uint32_t)(content_length / part_size);
1079-
1080-
if ((content_length % part_size) > 0) {
1081-
++num_parts;
1082-
}
1083-
10841050
return aws_s3_meta_request_auto_ranged_put_new(
10851051
client->allocator, client, part_size, content_length, num_parts, options);
10861052
} else {

source/s3_util.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,65 @@ void aws_s3_get_part_range(
547547
}
548548
}
549549

550+
int aws_s3_calculate_optimal_mpu_part_size_and_num_parts(
551+
uint64_t content_length,
552+
size_t client_part_size,
553+
uint64_t client_max_part_size,
554+
size_t *out_part_size,
555+
uint32_t *out_num_parts) {
556+
557+
AWS_FATAL_ASSERT(out_part_size);
558+
AWS_FATAL_ASSERT(out_num_parts);
559+
560+
uint64_t part_size_uint64 = content_length / (uint64_t)g_s3_max_num_upload_parts;
561+
562+
if ((content_length % g_s3_max_num_upload_parts) > 0) {
563+
++part_size_uint64;
564+
}
565+
566+
if (part_size_uint64 > SIZE_MAX) {
567+
AWS_LOGF_ERROR(
568+
AWS_LS_S3_META_REQUEST,
569+
"Could not create auto-ranged-put meta request; required part size of %" PRIu64
570+
" bytes is too large for platform.",
571+
part_size_uint64);
572+
573+
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
574+
}
575+
576+
size_t part_size = (size_t)part_size_uint64;
577+
578+
if (part_size > client_max_part_size) {
579+
AWS_LOGF_ERROR(
580+
AWS_LS_S3_META_REQUEST,
581+
"Could not create auto-ranged-put meta request; required part size for put request is %" PRIu64
582+
", but current maximum part size is %" PRIu64,
583+
(uint64_t)part_size,
584+
(uint64_t)client_max_part_size);
585+
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
586+
}
587+
588+
if (part_size < client_part_size) {
589+
part_size = client_part_size;
590+
}
591+
592+
if (content_length < part_size) {
593+
/* When the content length is smaller than part size and larger than the threshold, we set one part
594+
* with the whole length */
595+
part_size = (size_t)content_length;
596+
}
597+
598+
uint32_t num_parts = (uint32_t)(content_length / part_size);
599+
if ((content_length % part_size) > 0) {
600+
++num_parts;
601+
}
602+
AWS_ASSERT(num_parts <= g_s3_max_num_upload_parts);
603+
604+
*out_part_size = part_size;
605+
*out_num_parts = num_parts;
606+
return AWS_OP_SUCCESS;
607+
}
608+
550609
int aws_s3_crt_error_code_from_server_error_code_string(const struct aws_string *error_code_string) {
551610
if (aws_string_eq_byte_cursor(error_code_string, &g_s3_slow_down_error_code)) {
552611
return AWS_ERROR_S3_SLOW_DOWN;

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ add_test_case(test_s3_strip_quotes)
160160
add_test_case(test_s3_parse_content_range_response_header)
161161
add_test_case(test_s3_parse_content_length_response_header)
162162
add_test_case(test_s3_get_num_parts_and_get_part_range)
163+
add_test_case(test_s3_mpu_get_part_size_and_num_parts)
163164
add_test_case(test_s3_aws_xml_get_top_level_tag_with_root_name)
164165
add_test_case(test_add_user_agent_header)
165166

tests/s3_util_tests.c

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,152 @@ static int s_test_s3_get_num_parts_and_get_part_range(struct aws_allocator *allo
364364
return 0;
365365
}
366366

367+
struct s3_request_part_config_example {
368+
const char *name;
369+
uint64_t content_length;
370+
size_t client_part_size;
371+
uint64_t client_max_part_size;
372+
size_t expected_part_size;
373+
uint32_t expected_num_parts;
374+
};
375+
376+
AWS_TEST_CASE(test_s3_mpu_get_part_size_and_num_parts, s_test_s3_mpu_get_part_size_and_num_parts)
377+
static int s_test_s3_mpu_get_part_size_and_num_parts(struct aws_allocator *allocator, void *ctx) {
378+
(void)allocator;
379+
(void)ctx;
380+
uint64_t default_max_part_size = 5368709120ULL;
381+
382+
const struct s3_request_part_config_example valid_request_part_config[] = {
383+
{
384+
.name = "simple case",
385+
.content_length = MB_TO_BYTES((uint64_t)10000),
386+
.client_part_size = MB_TO_BYTES(5),
387+
.client_max_part_size = default_max_part_size,
388+
.expected_part_size = 5242880,
389+
.expected_num_parts = 2000,
390+
},
391+
{
392+
.name = "large content length with small part size",
393+
.content_length = MB_TO_BYTES((uint64_t)990000),
394+
.client_part_size = MB_TO_BYTES(5),
395+
.client_max_part_size = default_max_part_size,
396+
.expected_part_size = 103809024,
397+
.expected_num_parts = 10000,
398+
},
399+
{
400+
401+
.name = "large content length with large part size",
402+
.content_length = MB_TO_BYTES((uint64_t)1000000),
403+
.client_part_size = MB_TO_BYTES(500),
404+
.client_max_part_size = default_max_part_size,
405+
.expected_part_size = MB_TO_BYTES(500),
406+
.expected_num_parts = 2000,
407+
},
408+
{
409+
.name = "large odd content length",
410+
.content_length = 1044013645824,
411+
.client_part_size = 5242880,
412+
.client_max_part_size = default_max_part_size,
413+
.expected_part_size = 104401365,
414+
.expected_num_parts = 10000,
415+
},
416+
{
417+
.name = "10k parts",
418+
.content_length = MB_TO_BYTES((uint64_t)50000),
419+
.client_part_size = MB_TO_BYTES(5),
420+
.client_max_part_size = default_max_part_size,
421+
.expected_part_size = MB_TO_BYTES(5),
422+
.expected_num_parts = 10000,
423+
},
424+
{
425+
.name = "10k - 1 parts",
426+
.content_length = 49995,
427+
.client_part_size = 5,
428+
.client_max_part_size = default_max_part_size,
429+
.expected_part_size = 5,
430+
.expected_num_parts = 9999,
431+
},
432+
{
433+
.name = "10k with small last part",
434+
.content_length = 49998,
435+
.client_part_size = 5,
436+
.client_max_part_size = default_max_part_size,
437+
.expected_part_size = 5,
438+
.expected_num_parts = 10000,
439+
},
440+
{
441+
.name = "10k + 1 parts",
442+
.content_length = 50001,
443+
.client_part_size = 5,
444+
.client_max_part_size = default_max_part_size,
445+
.expected_part_size = 6,
446+
.expected_num_parts = 8334,
447+
448+
},
449+
{
450+
.name = "bump content length",
451+
.content_length = 100000,
452+
.client_part_size = 5,
453+
.client_max_part_size = default_max_part_size,
454+
.expected_part_size = 10,
455+
.expected_num_parts = 10000,
456+
},
457+
{
458+
.name = "bump content length with non-zero mod",
459+
.content_length = 999999,
460+
.client_part_size = 5,
461+
.client_max_part_size = default_max_part_size,
462+
.expected_part_size = 100,
463+
.expected_num_parts = 10000,
464+
},
465+
{
466+
.name = "5 tb content length",
467+
.content_length = MB_TO_BYTES((uint64_t)5 * 1024 * 1024),
468+
.client_part_size = MB_TO_BYTES((uint64_t)5),
469+
.client_max_part_size = default_max_part_size,
470+
.expected_part_size = 549755814,
471+
.expected_num_parts = 10000,
472+
},
473+
};
474+
for (size_t i = 0; i < AWS_ARRAY_SIZE(valid_request_part_config); ++i) {
475+
AWS_LOGF_INFO(AWS_LS_S3_GENERAL, "valid example [%zu]: %s\n", i, valid_request_part_config[i].name);
476+
477+
uint64_t content_length = valid_request_part_config[i].content_length;
478+
size_t part_size;
479+
uint32_t num_parts;
480+
481+
ASSERT_SUCCESS(aws_s3_calculate_optimal_mpu_part_size_and_num_parts(
482+
content_length,
483+
valid_request_part_config[i].client_part_size,
484+
valid_request_part_config[i].client_max_part_size,
485+
&part_size,
486+
&num_parts));
487+
ASSERT_INT_EQUALS(valid_request_part_config[i].expected_part_size, part_size);
488+
ASSERT_INT_EQUALS(valid_request_part_config[i].expected_num_parts, num_parts);
489+
}
490+
491+
/* Invalid cases */
492+
const struct s3_request_part_config_example invalid_request_part_config[] = {{
493+
.name = "max part < required part size",
494+
.content_length = 900000,
495+
.client_part_size = 5,
496+
.client_max_part_size = 10,
497+
}};
498+
499+
for (size_t i = 0; i < AWS_ARRAY_SIZE(invalid_request_part_config); ++i) {
500+
printf("invalid example [%zu]: %s\n", i, invalid_request_part_config[i].name);
501+
size_t part_size;
502+
uint32_t num_parts;
503+
ASSERT_FAILS(aws_s3_calculate_optimal_mpu_part_size_and_num_parts(
504+
invalid_request_part_config[i].content_length,
505+
invalid_request_part_config[i].client_part_size,
506+
invalid_request_part_config[i].client_max_part_size,
507+
&part_size,
508+
&num_parts));
509+
}
510+
return AWS_OP_SUCCESS;
511+
}
512+
367513
AWS_TEST_CASE(test_s3_aws_xml_get_top_level_tag_with_root_name, s_test_s3_aws_xml_get_top_level_tag_with_root_name)
368514
static int s_test_s3_aws_xml_get_top_level_tag_with_root_name(struct aws_allocator *allocator, void *ctx) {
369515
(void)allocator;

0 commit comments

Comments
 (0)