Skip to content

Commit

Permalink
Using SearchValues in ContentDispositionHeaderValue (dotnet#55039)
Browse files Browse the repository at this point in the history
* Using SearchValues in ContentDispositionHeaderValue

Updating current Encode5987 method to use SearchValues and IndexOfAny to encode the FileNameStar property.
Adding unit tests and a  benchmark to measure the before-after performance

* Removing if-else case for the encoding parts with the rune.

* Correcting the size of the temp char buffer allocation

* Review feedback

* Adding test for inputs that does not fit on a stacksize

* Using a for loop instead of a while loop for processing toHexEscape

* Using ref struct string interpolation with string builder

* Using a const for the stackallocted buffer sizes.

* Remove unneeded span

* Using Rune to encode/decode non ascii characters

* With custom utf8 and hex encoding combined

* Update src/Http/Headers/src/ContentDispositionHeaderValue.cs

Co-authored-by: Günther Foidl <gue@korporal.at>

---------

Co-authored-by: ladeak <ladeak87@windowslive.com>
Co-authored-by: Günther Foidl <gue@korporal.at>
  • Loading branch information
3 people authored Apr 19, 2024
1 parent 8660e3a commit 84af599
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 45 deletions.
106 changes: 61 additions & 45 deletions src/Http/Headers/src/ContentDispositionHeaderValue.cs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public class ContentDispositionHeaderValue

// attr-char definition from RFC5987
// Same as token except ( "*" / "'" / "%" )
private static readonly SearchValues<char> AttrChar =
private static readonly SearchValues<char> Rfc5987AttrChar =
SearchValues.Create("!#$&+-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz|~");

private static readonly HttpHeaderParser<ContentDispositionHeaderValue> Parser
Expand Down Expand Up @@ -618,54 +618,36 @@ private static bool TryDecodeMime(StringSegment input, [NotNullWhen(true)] out s
private static string Encode5987(StringSegment input)
{
var builder = new StringBuilder("UTF-8\'\'");

var maxInputBytes = Encoding.UTF8.GetMaxByteCount(input.Length);
byte[]? bufferFromPool = null;
Span<byte> inputBytes = maxInputBytes <= MaxStackAllocSizeBytes
? stackalloc byte[MaxStackAllocSizeBytes]
: bufferFromPool = ArrayPool<byte>.Shared.Rent(maxInputBytes);

var bytesWritten = Encoding.UTF8.GetBytes(input, inputBytes);
inputBytes = inputBytes[..bytesWritten];

int totalBytesConsumed = 0;
while (totalBytesConsumed < inputBytes.Length)
var remaining = input.AsSpan();
while (remaining.Length > 0)
{
if (Ascii.IsValid(inputBytes[totalBytesConsumed]))
var length = remaining.IndexOfAnyExcept(Rfc5987AttrChar);
if (length < 0)
{
// This is an ASCII char. Let's handle it ourselves.

char c = (char)inputBytes[totalBytesConsumed];
if (!AttrChar.Contains(c))
{
HexEscape(builder, c);
}
else
{
builder.Append(c);
}

totalBytesConsumed++;
length = remaining.Length;
}
else
{
// Non-ASCII, let's rely on Rune to decode it.
builder.Append(remaining[..length]);

Rune.DecodeFromUtf8(inputBytes.Slice(totalBytesConsumed), out Rune r, out int bytesConsumedForRune);
Contract.Assert(!r.IsAscii, "We shouldn't have gotten here if the Rune is ASCII.");
remaining = remaining.Slice(length);
if (remaining.Length == 0)
{
break;
}

for (int i = 0; i < bytesConsumedForRune; i++)
{
HexEscape(builder, (char)inputBytes[totalBytesConsumed + i]);
}
length = remaining.IndexOfAny(Rfc5987AttrChar);
if (length < 0)
{
length = remaining.Length;
}

totalBytesConsumed += bytesConsumedForRune;
for (var i = 0; i < length;)
{
Rune.DecodeFromUtf16(remaining.Slice(i), out Rune rune, out var runeLength);
EncodeToUtf8Hex(rune, builder);
i += runeLength;
}
}

if (bufferFromPool is not null)
{
ArrayPool<byte>.Shared.Return(bufferFromPool);
remaining = remaining.Slice(length);
}

return builder.ToString();
Expand All @@ -675,11 +657,45 @@ private static string Encode5987(StringSegment input)
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };

private static void HexEscape(StringBuilder builder, char c)
private static void EncodeToUtf8Hex(Rune rune, StringBuilder builder)
{
builder.Append('%');
builder.Append(HexUpperChars[(c & 0xf0) >> 4]);
builder.Append(HexUpperChars[c & 0xf]);
// Inspired by https://source.dot.net/#System.Private.CoreLib/src/libraries/System.Private.CoreLib/src/System/Text/Rune.cs TryEncodeToUtf8
var value = (uint)rune.Value;
if (rune.IsAscii)
{
var byteValue = (byte)value;
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
}
else if (rune.Value <= 0x7FFu)
{
// Scalar 00000yyy yyxxxxxx -> bytes [ 110yyyyy 10xxxxxx ]
var byteValue = (byte)((value + (0b110u << 11)) >> 6);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)((value & 0x3Fu) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
}
else if (rune.Value <= 0xFFFFu)
{
// Scalar zzzzyyyy yyxxxxxx -> bytes [ 1110zzzz 10yyyyyy 10xxxxxx ]
var byteValue = (byte)((value + (0b1110 << 16)) >> 12);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)(((value & (0x3Fu << 6)) >> 6) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)((value & 0x3Fu) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
}
else
{
// Scalar 000uuuuu zzzzyyyy yyxxxxxx -> bytes [ 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx ]
var byteValue = (byte)((value + (0b11110 << 21)) >> 18);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)(((value & (0x3Fu << 12)) >> 12) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)(((value & (0x3Fu << 6)) >> 6) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
byteValue = (byte)((value & 0x3Fu) + 0x80u);
builder.Append(CultureInfo.InvariantCulture, $"%{HexUpperChars[(byteValue & 0xf0) >> 4]}{HexUpperChars[byteValue & 0xf]}");
}
}

// Attempt to decode using RFC 5987 encoding.
Expand Down
48 changes: 48 additions & 0 deletions src/Http/Headers/test/ContentDispositionHeaderValueTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Globalization;
using System.Text;

namespace Microsoft.Net.Http.Headers;

Expand Down Expand Up @@ -212,6 +213,53 @@ public void FileNameStar_NeedsEncoding_EncodedAndDecodedCorrectly()
Assert.Null(contentDisposition.FileNameStar.Value);
}

[Fact]
public void NonValidAscii_WhenNeedsEncoding_UsesHex()
{
var contentDisposition = new ContentDispositionHeaderValue("inline");
contentDisposition.FileNameStar = "a\u0080b";
Assert.Equal($"UTF-8\'\'a%C2%80b", contentDisposition.Parameters.First().Value); //%C2 added because the value in UTF-8 is encoded on 2 bytes.
}

[Fact]
public void LongValidAscii_FullyProcessedWithout()
{
var contentDisposition = new ContentDispositionHeaderValue("inline");
contentDisposition.FileNameStar = new string('a', 400); // 400 is larger to the max stackallow size
Assert.Equal($"UTF-8\'\'{new string('a', 400)}", contentDisposition.Parameters.First().Value);
}

[Fact]
public void FileNameStar_WhenNeedsEncoding_UsesHex()
{
var contentDisposition = new ContentDispositionHeaderValue("inline");
foreach (byte b in Enumerable.Range(0, 128))
{
contentDisposition.FileNameStar = $"a{(char)b}b";
if (b <= 0x20
|| b == '"'
|| b == '%'
|| (b >= 0x27 && b <= 0x2A)
|| b == ','
|| b == '/'
|| (b >= 0x3A && b <= 0x40)
|| (b >= 0x5B && b <= 0x5D)
|| (b >= 0x61 && b <= 0x5D)
|| b == '{'
|| b == '}'
|| b >= 0x7F)
{
var hexC = Convert.ToHexString([b]);
Assert.Equal($"UTF-8\'\'a%{hexC}b", contentDisposition.Parameters.First().Value);
}
else
{
Assert.Equal($"UTF-8\'\'a{(char)b}b", contentDisposition.Parameters.First().Value);
}
contentDisposition.Parameters.Remove(contentDisposition.Parameters.First());
}
}

[Fact]
public void FileNameStar_UnknownOrBadEncoding_PropertyFails()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using BenchmarkDotNet.Attributes;
using Microsoft.Net.Http.Headers;

namespace Microsoft.AspNetCore.Http;

public class ContentDispositionHeaderValueBenchmarks
{
private readonly ContentDispositionHeaderValue _contentDisposition = new ContentDispositionHeaderValue("inline");

[Benchmark]
public void FileNameStarEncoding() => _contentDisposition.FileNameStar = "My TypicalFilename 2024 04 09 08:00:00.dat";

[Benchmark]
public void FileNameStarNoEncoding() => _contentDisposition.FileNameStar = "My_TypicalFilename_2024_04_09-08_00_00.dat";

}

0 comments on commit 84af599

Please sign in to comment.