From 6535a7a8960831f95282842f6dd3cd2b9f4ea58c Mon Sep 17 00:00:00 2001 From: sbiscigl Date: Thu, 14 Mar 2024 15:17:38 -0400 Subject: [PATCH] add configuration to preserve path seperator in URIs --- src/aws-cpp-sdk-core/include/aws/core/Aws.h | 8 +++ .../include/aws/core/http/URI.h | 14 ++++- .../include/aws/core/utils/StringUtils.h | 15 ++++- src/aws-cpp-sdk-core/source/Aws.cpp | 1 + src/aws-cpp-sdk-core/source/http/URI.cpp | 3 + .../source/utils/StringUtils.cpp | 20 +++++++ .../aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp | 57 +++++++++++++++++++ 7 files changed, 115 insertions(+), 3 deletions(-) diff --git a/src/aws-cpp-sdk-core/include/aws/core/Aws.h b/src/aws-cpp-sdk-core/include/aws/core/Aws.h index d221af2039b..c07887bfaba 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/Aws.h +++ b/src/aws-cpp-sdk-core/include/aws/core/Aws.h @@ -105,6 +105,14 @@ namespace Aws * Disable legacy URL encoding that leaves `$&,:@=` unescaped for legacy purposes. */ bool compliantRfc3986Encoding; + /** + * When constructing Path segments in a URI preserve path separators instead of collapsing + * slashes. This is useful for aligning with other SDKs and tools on key path for S3 objects + * as currently the C++ SDK sanitizes the path. + * + * TODO: In the next major release, this will become the default to align better with other SDKs. + */ + bool preservePathSeparators = false; }; /** diff --git a/src/aws-cpp-sdk-core/include/aws/core/http/URI.h b/src/aws-cpp-sdk-core/include/aws/core/http/URI.h index 972b1db155e..5b674d94c45 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/http/URI.h +++ b/src/aws-cpp-sdk-core/include/aws/core/http/URI.h @@ -24,6 +24,9 @@ namespace Aws extern bool s_compliantRfc3986Encoding; AWS_CORE_API void SetCompliantRfc3986Encoding(bool compliant); + extern AWS_CORE_API bool s_preservePathSeparators; + AWS_CORE_API void SetPreservePathSeparators(bool preservePathSeparators); + //per https://tools.ietf.org/html/rfc3986#section-3.4 there is nothing preventing servers from allowing //multiple values for the same key. So use a multimap instead of a map. typedef Aws::MultiMap QueryStringParameterCollection; @@ -135,7 +138,16 @@ namespace Aws Aws::StringStream ss; ss << pathSegments; Aws::String segments = ss.str(); - for (const auto& segment : Aws::Utils::StringUtils::Split(segments, '/')) + const auto splitOption = s_preservePathSeparators + ? Utils::StringUtils::SplitOptions::INCLUDE_EMPTY_SEGMENTS + : Utils::StringUtils::SplitOptions::NOT_SET; + // Preserve legacy behavior -- we need to remove a leading "/" if use `INCLUDE_EMPTY_SEGMENTS` is specified + // because string split will no longer ignore leading delimiters -- which is correct. + auto split = Aws::Utils::StringUtils::Split(segments, '/', splitOption); + if (s_preservePathSeparators && m_pathSegments.empty() && !split.empty() && split.front().empty() && !m_pathHasTrailingSlash) { + split.erase(split.begin()); + } + for (const auto& segment: split) { m_pathSegments.push_back(segment); } diff --git a/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h b/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h index 04e99e25187..9edb2de1873 100644 --- a/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h +++ b/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h @@ -79,9 +79,13 @@ namespace Aws */ NOT_SET, /** - * Includes empty entries in the vector returned by Split() + * Deprecated use INCLUDE_EMPTY_SEGMENTS instead. */ - INCLUDE_EMPTY_ENTRIES + INCLUDE_EMPTY_ENTRIES, + /** + * Include delimiters as empty segments in the split string + */ + INCLUDE_EMPTY_SEGMENTS, }; /** @@ -116,6 +120,13 @@ namespace Aws */ static Aws::Vector Split(const Aws::String& toSplit, char splitOn, size_t numOfTargetParts, SplitOptions option); + /** + * Splits a string on delimeter, keeping the delimiter in the string as a empty space. + * @param toSplit, the original string to split + * @param splitOn, the delimiter you want to use. + */ + static Aws::Vector SplitWithSpaces(const Aws::String& toSplit, char splitOn); + /** * Splits a string on new line characters. */ diff --git a/src/aws-cpp-sdk-core/source/Aws.cpp b/src/aws-cpp-sdk-core/source/Aws.cpp index b21de29afaa..720f26bfb1d 100644 --- a/src/aws-cpp-sdk-core/source/Aws.cpp +++ b/src/aws-cpp-sdk-core/source/Aws.cpp @@ -155,6 +155,7 @@ namespace Aws Aws::Http::SetInitCleanupCurlFlag(options.httpOptions.initAndCleanupCurl); Aws::Http::SetInstallSigPipeHandlerFlag(options.httpOptions.installSigPipeHandler); Aws::Http::SetCompliantRfc3986Encoding(options.httpOptions.compliantRfc3986Encoding); + Aws::Http::SetPreservePathSeparators(options.httpOptions.preservePathSeparators); Aws::Http::InitHttp(); Aws::InitializeEnumOverflowContainer(); cJSON_AS4CPP_Hooks hooks; diff --git a/src/aws-cpp-sdk-core/source/http/URI.cpp b/src/aws-cpp-sdk-core/source/http/URI.cpp index 4cab8b10e70..36f3274a1b6 100644 --- a/src/aws-cpp-sdk-core/source/http/URI.cpp +++ b/src/aws-cpp-sdk-core/source/http/URI.cpp @@ -27,6 +27,9 @@ const char* SEPARATOR = "://"; bool s_compliantRfc3986Encoding = false; void SetCompliantRfc3986Encoding(bool compliant) { s_compliantRfc3986Encoding = compliant; } +bool s_preservePathSeparators = false; +void SetPreservePathSeparators(bool preservePathSeparators) { s_preservePathSeparators = preservePathSeparators; } + Aws::String urlEncodeSegment(const Aws::String& segment, bool rfcEncoded = false) { // consolidates legacy escaping logic into one local method diff --git a/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp b/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp index e1deb3f0462..89712bec1b8 100644 --- a/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp +++ b/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp @@ -90,6 +90,11 @@ Aws::Vector StringUtils::Split(const Aws::String& toSplit, char spl Aws::Vector StringUtils::Split(const Aws::String& toSplit, char splitOn, size_t numOfTargetParts, SplitOptions option) { + if (option == SplitOptions::INCLUDE_EMPTY_SEGMENTS) + { + return StringUtils::SplitWithSpaces(toSplit, splitOn); + } + Aws::Vector returnValues; Aws::StringStream input(toSplit); Aws::String item; @@ -128,6 +133,21 @@ Aws::Vector StringUtils::Split(const Aws::String& toSplit, char spl return returnValues; } +Aws::Vector StringUtils::SplitWithSpaces(const Aws::String& toSplit, char splitOn) +{ + size_t pos = 0; + String split{toSplit}; + Vector returnValues; + while ((pos = split.find(splitOn)) != std::string::npos) { + returnValues.emplace_back(split.substr(0, pos)); + split.erase(0, pos + 1); + } + if (!split.empty()) { + returnValues.emplace_back(split); + } + return returnValues; +} + Aws::Vector StringUtils::SplitOnLine(const Aws::String& toSplit) { Aws::StringStream input(toSplit); diff --git a/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp b/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp index a0612101716..b90b8b6f778 100644 --- a/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp +++ b/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp @@ -124,3 +124,60 @@ TEST_F(S3UnitTest, S3UriMiddleDots) { const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest(); EXPECT_EQ("https://bluerev.s3.us-east-1.amazonaws.com/belinda/../says", seenRequest.GetUri().GetURIString()); } + +TEST_F(S3UnitTest, S3UriPathPreservationOff) { + auto putObjectRequest = PutObjectRequest() + .WithBucket("velvetunderground") + .WithKey("////stephanie////says////////////that////////she//wants///////to/know.txt"); + + std::shared_ptr body = Aws::MakeShared(ALLOCATION_TAG, + "What country shall I say is calling From across the world?", + std::ios_base::in | std::ios_base::binary); + + putObjectRequest.SetBody(body); + + //We have to mock requset because it is used to create the return body, it actually isnt used. + auto mockRequest = Aws::MakeShared(ALLOCATION_TAG, "mockuri", HttpMethod::HTTP_GET); + mockRequest->SetResponseStreamFactory([]() -> IOStream* { + return Aws::New(ALLOCATION_TAG, "response-string", std::ios_base::in | std::ios_base::binary); + }); + auto mockResponse = Aws::MakeShared(ALLOCATION_TAG, mockRequest); + mockResponse->SetResponseCode(HttpResponseCode::OK); + _mockHttpClient->AddResponseToReturn(mockResponse); + + const auto response = _s3Client->PutObject(putObjectRequest); + AWS_EXPECT_SUCCESS(response); + + const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest(); + EXPECT_EQ("https://velvetunderground.s3.us-east-1.amazonaws.com/stephanie/says/that/she/wants/to/know.txt", seenRequest.GetUri().GetURIString()); +} + +TEST_F(S3UnitTest, S3UriPathPreservationOn) { + //Turn on path preservation + Aws::Http::SetPreservePathSeparators(true); + + auto putObjectRequest = PutObjectRequest() + .WithBucket("velvetunderground") + .WithKey("////stephanie////says////////////that////////she//wants///////to/know.txt"); + + std::shared_ptr body = Aws::MakeShared(ALLOCATION_TAG, + "What country shall I say is calling From across the world?", + std::ios_base::in | std::ios_base::binary); + + putObjectRequest.SetBody(body); + + //We have to mock requset because it is used to create the return body, it actually isnt used. + auto mockRequest = Aws::MakeShared(ALLOCATION_TAG, "mockuri", HttpMethod::HTTP_GET); + mockRequest->SetResponseStreamFactory([]() -> IOStream* { + return Aws::New(ALLOCATION_TAG, "response-string", std::ios_base::in | std::ios_base::binary); + }); + auto mockResponse = Aws::MakeShared(ALLOCATION_TAG, mockRequest); + mockResponse->SetResponseCode(HttpResponseCode::OK); + _mockHttpClient->AddResponseToReturn(mockResponse); + + const auto response = _s3Client->PutObject(putObjectRequest); + AWS_EXPECT_SUCCESS(response); + + const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest(); + EXPECT_EQ("https://velvetunderground.s3.us-east-1.amazonaws.com/////stephanie////says////////////that////////she//wants///////to/know.txt", seenRequest.GetUri().GetURIString()); +}