From 6535a7a8960831f95282842f6dd3cd2b9f4ea58c Mon Sep 17 00:00:00 2001
From: sbiscigl <sbiscigl@amazon.com>
Date: Thu, 14 Mar 2024 15:17:38 -0400
Subject: [PATCH] add configuration to preserve path seperator in URIs

---
 src/aws-cpp-sdk-core/include/aws/core/Aws.h   |  8 +++
 .../include/aws/core/http/URI.h               | 14 ++++-
 .../include/aws/core/utils/StringUtils.h      | 15 ++++-
 src/aws-cpp-sdk-core/source/Aws.cpp           |  1 +
 src/aws-cpp-sdk-core/source/http/URI.cpp      |  3 +
 .../source/utils/StringUtils.cpp              | 20 +++++++
 .../aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp | 57 +++++++++++++++++++
 7 files changed, 115 insertions(+), 3 deletions(-)

diff --git a/src/aws-cpp-sdk-core/include/aws/core/Aws.h b/src/aws-cpp-sdk-core/include/aws/core/Aws.h
index d221af2039b..c07887bfaba 100644
--- a/src/aws-cpp-sdk-core/include/aws/core/Aws.h
+++ b/src/aws-cpp-sdk-core/include/aws/core/Aws.h
@@ -105,6 +105,14 @@ namespace Aws
          * Disable legacy URL encoding that leaves `$&,:@=` unescaped for legacy purposes.
          */
         bool compliantRfc3986Encoding;
+        /**
+         * When constructing Path segments in a URI preserve path separators instead of collapsing
+         * slashes. This is useful for aligning with other SDKs and tools on key path for S3 objects
+         * as currently the C++ SDK sanitizes the path.
+         *
+         * TODO: In the next major release, this will become the default to align better with other SDKs.
+         */
+        bool preservePathSeparators = false;
     };
 
     /**
diff --git a/src/aws-cpp-sdk-core/include/aws/core/http/URI.h b/src/aws-cpp-sdk-core/include/aws/core/http/URI.h
index 972b1db155e..5b674d94c45 100644
--- a/src/aws-cpp-sdk-core/include/aws/core/http/URI.h
+++ b/src/aws-cpp-sdk-core/include/aws/core/http/URI.h
@@ -24,6 +24,9 @@ namespace Aws
         extern bool s_compliantRfc3986Encoding;
         AWS_CORE_API void SetCompliantRfc3986Encoding(bool compliant);
 
+        extern AWS_CORE_API bool s_preservePathSeparators;
+        AWS_CORE_API void SetPreservePathSeparators(bool preservePathSeparators);
+
         //per https://tools.ietf.org/html/rfc3986#section-3.4 there is nothing preventing servers from allowing
         //multiple values for the same key. So use a multimap instead of a map.
         typedef Aws::MultiMap<Aws::String, Aws::String> QueryStringParameterCollection;
@@ -135,7 +138,16 @@ namespace Aws
                 Aws::StringStream ss;
                 ss << pathSegments;
                 Aws::String segments = ss.str();
-                for (const auto& segment : Aws::Utils::StringUtils::Split(segments, '/'))
+                const auto splitOption = s_preservePathSeparators
+                                           ? Utils::StringUtils::SplitOptions::INCLUDE_EMPTY_SEGMENTS
+                                           : Utils::StringUtils::SplitOptions::NOT_SET;
+                // Preserve legacy behavior -- we need to remove a leading "/" if use `INCLUDE_EMPTY_SEGMENTS` is specified
+                // because string split will no longer ignore leading delimiters -- which is correct.
+                auto split = Aws::Utils::StringUtils::Split(segments, '/', splitOption);
+                if (s_preservePathSeparators && m_pathSegments.empty() && !split.empty() && split.front().empty() && !m_pathHasTrailingSlash) {
+                  split.erase(split.begin());
+                }
+                for (const auto& segment: split)
                 {
                     m_pathSegments.push_back(segment);
                 }
diff --git a/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h b/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h
index 04e99e25187..9edb2de1873 100644
--- a/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h
+++ b/src/aws-cpp-sdk-core/include/aws/core/utils/StringUtils.h
@@ -79,9 +79,13 @@ namespace Aws
                  */
                 NOT_SET,
                 /**
-                 * Includes empty entries in the vector returned by Split()
+                 * Deprecated use INCLUDE_EMPTY_SEGMENTS instead.
                  */
-                INCLUDE_EMPTY_ENTRIES
+                INCLUDE_EMPTY_ENTRIES,
+                /**
+                 * Include delimiters as empty segments in the split string
+                 */
+                INCLUDE_EMPTY_SEGMENTS,
             };
 
             /**
@@ -116,6 +120,13 @@ namespace Aws
              */
             static Aws::Vector<Aws::String> Split(const Aws::String& toSplit, char splitOn, size_t numOfTargetParts, SplitOptions option);
 
+            /**
+             * Splits a string on delimeter, keeping the delimiter in the string as a empty space.
+             * @param toSplit, the original string to split
+             * @param splitOn, the delimiter you want to use.
+             */
+            static Aws::Vector<Aws::String> SplitWithSpaces(const Aws::String& toSplit, char splitOn);
+
             /**
             * Splits a string on new line characters.
             */
diff --git a/src/aws-cpp-sdk-core/source/Aws.cpp b/src/aws-cpp-sdk-core/source/Aws.cpp
index b21de29afaa..720f26bfb1d 100644
--- a/src/aws-cpp-sdk-core/source/Aws.cpp
+++ b/src/aws-cpp-sdk-core/source/Aws.cpp
@@ -155,6 +155,7 @@ namespace Aws
         Aws::Http::SetInitCleanupCurlFlag(options.httpOptions.initAndCleanupCurl);
         Aws::Http::SetInstallSigPipeHandlerFlag(options.httpOptions.installSigPipeHandler);
         Aws::Http::SetCompliantRfc3986Encoding(options.httpOptions.compliantRfc3986Encoding);
+        Aws::Http::SetPreservePathSeparators(options.httpOptions.preservePathSeparators);
         Aws::Http::InitHttp();
         Aws::InitializeEnumOverflowContainer();
         cJSON_AS4CPP_Hooks hooks;
diff --git a/src/aws-cpp-sdk-core/source/http/URI.cpp b/src/aws-cpp-sdk-core/source/http/URI.cpp
index 4cab8b10e70..36f3274a1b6 100644
--- a/src/aws-cpp-sdk-core/source/http/URI.cpp
+++ b/src/aws-cpp-sdk-core/source/http/URI.cpp
@@ -27,6 +27,9 @@ const char* SEPARATOR = "://";
 bool s_compliantRfc3986Encoding = false;
 void SetCompliantRfc3986Encoding(bool compliant) { s_compliantRfc3986Encoding = compliant; }
 
+bool s_preservePathSeparators = false;
+void SetPreservePathSeparators(bool preservePathSeparators) { s_preservePathSeparators = preservePathSeparators; }
+
 Aws::String urlEncodeSegment(const Aws::String& segment, bool rfcEncoded = false)
 {
     // consolidates legacy escaping logic into one local method
diff --git a/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp b/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp
index e1deb3f0462..89712bec1b8 100644
--- a/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp
+++ b/src/aws-cpp-sdk-core/source/utils/StringUtils.cpp
@@ -90,6 +90,11 @@ Aws::Vector<Aws::String> StringUtils::Split(const Aws::String& toSplit, char spl
 
 Aws::Vector<Aws::String> StringUtils::Split(const Aws::String& toSplit, char splitOn, size_t numOfTargetParts, SplitOptions option)
 {
+    if (option == SplitOptions::INCLUDE_EMPTY_SEGMENTS)
+    {
+        return StringUtils::SplitWithSpaces(toSplit, splitOn);
+    }
+
     Aws::Vector<Aws::String> returnValues;
     Aws::StringStream input(toSplit);
     Aws::String item;
@@ -128,6 +133,21 @@ Aws::Vector<Aws::String> StringUtils::Split(const Aws::String& toSplit, char spl
     return returnValues;
 }
 
+Aws::Vector<Aws::String> StringUtils::SplitWithSpaces(const Aws::String& toSplit, char splitOn)
+{
+    size_t pos = 0;
+    String split{toSplit};
+    Vector<String> returnValues;
+    while ((pos = split.find(splitOn)) != std::string::npos) {
+        returnValues.emplace_back(split.substr(0, pos));
+        split.erase(0, pos + 1);
+    }
+    if (!split.empty()) {
+        returnValues.emplace_back(split);
+    }
+    return returnValues;
+}
+
 Aws::Vector<Aws::String> StringUtils::SplitOnLine(const Aws::String& toSplit)
 {
     Aws::StringStream input(toSplit);
diff --git a/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp b/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp
index a0612101716..b90b8b6f778 100644
--- a/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp
+++ b/tests/aws-cpp-sdk-s3-unit-tests/S3UnitTests.cpp
@@ -124,3 +124,60 @@ TEST_F(S3UnitTest, S3UriMiddleDots) {
   const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest();
   EXPECT_EQ("https://bluerev.s3.us-east-1.amazonaws.com/belinda/../says", seenRequest.GetUri().GetURIString());
 }
+
+TEST_F(S3UnitTest, S3UriPathPreservationOff) {
+  auto putObjectRequest = PutObjectRequest()
+      .WithBucket("velvetunderground")
+      .WithKey("////stephanie////says////////////that////////she//wants///////to/know.txt");
+
+  std::shared_ptr<IOStream> body = Aws::MakeShared<StringStream>(ALLOCATION_TAG,
+    "What country shall I say is calling From across the world?",
+    std::ios_base::in | std::ios_base::binary);
+
+  putObjectRequest.SetBody(body);
+
+  //We have to mock requset because it is used to create the return body, it actually isnt used.
+  auto mockRequest = Aws::MakeShared<Standard::StandardHttpRequest>(ALLOCATION_TAG, "mockuri", HttpMethod::HTTP_GET);
+  mockRequest->SetResponseStreamFactory([]() -> IOStream* {
+    return Aws::New<StringStream>(ALLOCATION_TAG, "response-string", std::ios_base::in | std::ios_base::binary);
+  });
+  auto mockResponse = Aws::MakeShared<Standard::StandardHttpResponse>(ALLOCATION_TAG, mockRequest);
+  mockResponse->SetResponseCode(HttpResponseCode::OK);
+  _mockHttpClient->AddResponseToReturn(mockResponse);
+
+  const auto response = _s3Client->PutObject(putObjectRequest);
+  AWS_EXPECT_SUCCESS(response);
+
+  const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest();
+  EXPECT_EQ("https://velvetunderground.s3.us-east-1.amazonaws.com/stephanie/says/that/she/wants/to/know.txt", seenRequest.GetUri().GetURIString());
+}
+
+TEST_F(S3UnitTest, S3UriPathPreservationOn) {
+  //Turn on path preservation
+  Aws::Http::SetPreservePathSeparators(true);
+
+  auto putObjectRequest = PutObjectRequest()
+      .WithBucket("velvetunderground")
+      .WithKey("////stephanie////says////////////that////////she//wants///////to/know.txt");
+
+  std::shared_ptr<IOStream> body = Aws::MakeShared<StringStream>(ALLOCATION_TAG,
+    "What country shall I say is calling From across the world?",
+    std::ios_base::in | std::ios_base::binary);
+
+  putObjectRequest.SetBody(body);
+
+  //We have to mock requset because it is used to create the return body, it actually isnt used.
+  auto mockRequest = Aws::MakeShared<Standard::StandardHttpRequest>(ALLOCATION_TAG, "mockuri", HttpMethod::HTTP_GET);
+  mockRequest->SetResponseStreamFactory([]() -> IOStream* {
+    return Aws::New<StringStream>(ALLOCATION_TAG, "response-string", std::ios_base::in | std::ios_base::binary);
+  });
+  auto mockResponse = Aws::MakeShared<Standard::StandardHttpResponse>(ALLOCATION_TAG, mockRequest);
+  mockResponse->SetResponseCode(HttpResponseCode::OK);
+  _mockHttpClient->AddResponseToReturn(mockResponse);
+
+  const auto response = _s3Client->PutObject(putObjectRequest);
+  AWS_EXPECT_SUCCESS(response);
+
+  const auto seenRequest = _mockHttpClient->GetMostRecentHttpRequest();
+  EXPECT_EQ("https://velvetunderground.s3.us-east-1.amazonaws.com/////stephanie////says////////////that////////she//wants///////to/know.txt", seenRequest.GetUri().GetURIString());
+}