From 3b01d1ad30a5fa41e921eb0f1b44a95efc0db3da Mon Sep 17 00:00:00 2001
From: Lucas Pimentel <lucas.pimentel@datadoghq.com>
Date: Tue, 12 Nov 2024 18:06:05 -0500
Subject: [PATCH] remove HashSets, split EncodeStringAndAppend

---
 .../Propagators/W3CBaggagePropagator.cs       | 60 +++++++++----------
 1 file changed, 27 insertions(+), 33 deletions(-)
diff --git a/tracer/src/Datadog.Trace/Propagators/W3CBaggagePropagator.cs b/tracer/src/Datadog.Trace/Propagators/W3CBaggagePropagator.cs
index c971bf1c4baa..d35506c825a5 100644
--- a/tracer/src/Datadog.Trace/Propagators/W3CBaggagePropagator.cs
+++ b/tracer/src/Datadog.Trace/Propagators/W3CBaggagePropagator.cs
@@ -5,7 +5,6 @@
 
 using System;
 using System.Collections.Generic;
-using System.Runtime.CompilerServices;
 using System.Text;
 using Datadog.Trace.Telemetry;
 using Datadog.Trace.Telemetry.Metrics;
@@ -35,24 +34,6 @@ internal class W3CBaggagePropagator : IContextInjector, IContextExtractor
 
     public static readonly W3CBaggagePropagator Instance = new();
 
-    private static readonly HashSet<char> KeyCharsToEncode;
-
-    private static readonly HashSet<char> ValueCharsToEncode;
-
-    static W3CBaggagePropagator()
-    {
-        // key may not contain whitespace or any of the following characters: " , ; \ ( ) / : < = > ? @ [ ] { }
-        KeyCharsToEncode = ['"', ',', ';', '\\', '(', ')', '/', ':', '<', '=', '>', '?', '@', '[', ']', '{', '}'];
-
-        // value may contain characters from the Basic Latin Unicode Block, except for the following:
-        //   U+0020 space ( )
-        //   U+0022 double quotation mark (")
-        //   U+002C comma (,)
-        //   U+003B semicolon (;)
-        //   U+005C backslash (\)
-        ValueCharsToEncode = [' ', '"', ',', ';', '\\'];
-    }
-
     private W3CBaggagePropagator()
     {
     }
@@ -104,20 +85,25 @@ public bool TryExtract<TCarrier, TCarrierGetter>(
         return baggage is { Count: > 0 };
     }
 
-    internal static void EncodeStringAndAppend(StringBuilder sb, string source, HashSet<char> charsToEncode)
+    internal static void EncodeStringAndAppend(StringBuilder sb, string source, bool isKey)
     {
         if (string.IsNullOrEmpty(source))
         {
             return;
         }
 
-        if (!AnyCharRequiresEncoding(source, charsToEncode))
+        if (!AnyCharRequiresEncoding(source, isKey))
         {
-            // no bytes require encoding, append the source string directly
+            // no chars require encoding, append the source string directly
             sb.Append(source);
             return;
         }
 
+        EncodeStringAndAppendSlow(sb, source, isKey);
+    }
+
+    private static void EncodeStringAndAppendSlow(StringBuilder sb, string source, bool isKey)
+    {
         // this is an upper bound and will almost always be more bytes than we need
         var maxByteCount = Encoding.UTF8.GetMaxByteCount(source.Length);
 
@@ -130,7 +116,7 @@ internal static void EncodeStringAndAppend(StringBuilder sb, string source, Hash
 
             // slice the buffer down to the actual bytes written
             var stackBytes = stackBuffer[..byteCount];
-            EncodeBytesAndAppend(sb, stackBytes, charsToEncode);
+            EncodeBytesAndAppend(sb, stackBytes, isKey);
             return;
         }
 #endif
@@ -144,7 +130,7 @@ internal static void EncodeStringAndAppend(StringBuilder sb, string source, Hash
 
             // slice the buffer down to the actual bytes written
             var bytes = buffer.AsSpan(0, byteCount);
-            EncodeBytesAndAppend(sb, bytes, charsToEncode);
+            EncodeBytesAndAppend(sb, bytes, isKey);
         }
         finally
         {
@@ -152,7 +138,7 @@ internal static void EncodeStringAndAppend(StringBuilder sb, string source, Hash
         }
     }
 
-    private static void EncodeBytesAndAppend(StringBuilder sb, ReadOnlySpan<byte> bytes, HashSet<char> charsToEncode)
+    private static void EncodeBytesAndAppend(StringBuilder sb, ReadOnlySpan<byte> bytes, bool isKey)
     {
         // allocate a buffer on the stack (or rent one) for hexadecimal strings
 #if NETCOREAPP3_1_OR_GREATER
@@ -166,7 +152,7 @@ private static void EncodeBytesAndAppend(StringBuilder sb, ReadOnlySpan<byte> by
         {
             var c = (char)bytes[index];
 
-            if (CharRequiresEncoding(c, charsToEncode))
+            if (CharRequiresEncoding(c, isKey))
             {
                 // encode byte as "%FF" (hexadecimal)
                 var byteToEncode = bytes.Slice(index, 1);
@@ -186,17 +172,25 @@ private static void EncodeBytesAndAppend(StringBuilder sb, ReadOnlySpan<byte> by
 #endif
     }
 
-    [MethodImpl(MethodImplOptions.AggressiveInlining)]
-    private static bool CharRequiresEncoding(char c, HashSet<char> charsToEncode)
+    private static bool CharRequiresEncoding(char c, bool isKey)
     {
-        return c < 0x20 || c > 0x7E || char.IsWhiteSpace(c) || charsToEncode.Contains(c);
+        // key or value may not contain whitespace, control characters, or characters outside the Basic Latin Unicode Block
+        // key   may not contain any of the following characters: " , ; \ ( ) / : < = > ? @ [ ] { }
+        // value may not contain any of the following characters: " , ; \
+        if (c < 0x20 || c > 0x7E || char.IsWhiteSpace(c) || c is '"' or ',' or ';' or '\\')
+        {
+            return true;
+        }
+
+        // key is more restrictive than value
+        return isKey && c is '(' or ')' or '/' or ':' or '<' or '=' or '>' or '?' or '@' or '[' or ']' or '{' or '}';
     }
 
-    private static bool AnyCharRequiresEncoding(string source, HashSet<char> charsToEncode)
+    private static bool AnyCharRequiresEncoding(string source, bool isKey)
     {
         foreach (var c in source)
         {
-            if (CharRequiresEncoding(c, charsToEncode))
+            if (CharRequiresEncoding(c, isKey))
             {
                 return true;
             }
@@ -333,9 +327,9 @@ public void OnNext(KeyValuePair<string, string?> item)
                 _sb.Append(PairSeparator);
             }
 
-            EncodeStringAndAppend(_sb, item.Key, KeyCharsToEncode);
+            EncodeStringAndAppend(_sb, item.Key, isKey: true);
             _sb.Append(KeyAndValueSeparator);
-            EncodeStringAndAppend(_sb, item.Value!, ValueCharsToEncode);
+            EncodeStringAndAppend(_sb, item.Value!, isKey: false);
 
             // it's all ASCII here after encoding, so we can use the string
             // length directly instead of using Encoding.UTF8.GetByteCount().