Skip to content

Commit

Permalink
UnsafeWriter/UnsafeReader: simple methods for compact UTF8 string ser…
Browse files Browse the repository at this point in the history
…ialization
  • Loading branch information
controlflow committed Feb 19, 2024
1 parent 345190c commit 4884865
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 16 deletions.
25 changes: 25 additions & 0 deletions rd-net/Lifetimes/Serialization/UnsafeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using JetBrains.Annotations;
using JetBrains.Diagnostics;
using JetBrains.Util;
Expand Down Expand Up @@ -296,6 +297,30 @@ public Uri ReadUri()
return res;
}

public string? ReadStringUTF8()
{
switch (ReadByte())
{
case 0: return null;
case 1: return "";

case var byteData:
{
var bytesCount = byteData == 0xFF ? ReadInt32() : byteData - 1;
var startPtr = ReadRaw(bytesCount);

#if NET35
var buffer = new byte[bytesCount]; // very unfortunate, .NET 3.5 only
Marshal.Copy((IntPtr) startPtr, buffer, 0, length: bytesCount);
var value = Encoding.UTF8.GetString(buffer);
#else
var value = Encoding.UTF8.GetString(startPtr, bytesCount);
#endif
return value;
}
}
}

#endregion
#region Intern

Expand Down
59 changes: 49 additions & 10 deletions rd-net/Lifetimes/Serialization/UnsafeWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics.CodeAnalysis;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using JetBrains.Annotations;
using JetBrains.Diagnostics;
using JetBrains.Util;
Expand Down Expand Up @@ -667,6 +668,44 @@ public void WriteString(string? value)
}
}

public void WriteStringUTF8(string? value)
{
if (value == null)
{
WriteByte(0); // mean null
}
else if (value.Length == 0)
{
WriteByte(1); // means empty string
}
else // non-empty string
{
var maxBytesForString = Encoding.UTF8.GetMaxByteCount(value.Length);
var bytesForLength = maxBytesForString < 254 ? 1 : 5; // [byte <254 bytes_count] or [0xFF marker]+[int32 bytes_count]
var bookmark = Alloc(maxBytesForString + bytesForLength);

fixed (char* sourcePtr = value)
{
var bytesWritten = Encoding.UTF8.GetBytes(
sourcePtr, charCount: value.Length, bytes: bookmark.Data + bytesForLength, maxBytesForString);

if (bytesForLength == 1) // [byte bytes_count]+[utf8 bytes]
{
if (Mode.IsAssertion) Assertion.Assert(bytesWritten < 254);
*bookmark.Data = (byte)(bytesWritten + 1);
}
else // [0xFF byte]+[int32 bytes_count]+[utf8 bytes]
{
*bookmark.Data = 0xFF;
*(int*)(bookmark.Data + 1) = bytesWritten;
}

bytesWritten += bytesForLength;
bookmark.FinishRawWrite(bytesWritten);
}
}
}

/// <summary>
/// Doesn't write length prefix, only string contents. If <paramref name="value"/> is <c>value</c>, does nothing.
/// </summary>
Expand Down Expand Up @@ -700,33 +739,33 @@ It is special method to avoid crash on mono before 5.0
of bugzilla: https://bugzilla.xamarin.com/show_bug.cgi?id=60625
It is shouldn't dropped while we support client mono version before 5.0
*/
private static void WriteStringContentInternal(UnsafeWriter wrt, string value, int offset, int count)
private static void WriteStringContentInternal(UnsafeWriter writer, string value, int offset, int count)
{
if (ourOldMonoFlag)
{
WriteStringContentInternalBeforeMono5(wrt, value, offset, count);
WriteStringContentInternalBeforeMono5(writer, value, offset, count);
}
else
{
WriteStringContentInternalAfterMono5(wrt, value, offset, count);
WriteStringContentInternalAfterMono5(writer, value, offset, count);
}
}

// Mono 5.4 try to inline this method and crash.
//[MethodImpl(MethodImplAdvancedOptions.AggressiveInlining)]
private static void WriteStringContentInternalAfterMono5(UnsafeWriter wrt, string value, int offset, int count)
// Mono 5.4 tries to inline this method and crashes.
// [MethodImpl(MethodImplAdvancedOptions.AggressiveInlining)]
private static void WriteStringContentInternalAfterMono5(UnsafeWriter writer, string value, int offset, int count)
{
fixed (char* c = value)
{
wrt.Write((byte*) (c + offset), count * sizeof(char));
writer.Write((byte*) (c + offset), count * sizeof(char));
}
}

private static void WriteStringContentInternalBeforeMono5(UnsafeWriter wrt, string value, int offset, int count)
private static void WriteStringContentInternalBeforeMono5(UnsafeWriter writer, string value, int offset, int count)
{
for (var i = offset; i < offset + count; i++)
for (var index = offset; index < offset + count; index++)
{
wrt.WriteChar(value[i]);
writer.WriteChar(value[index]);
}
}

Expand Down
1 change: 1 addition & 0 deletions rd-net/Rd.sln.DotSettings
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/Abbreviations/=UTF/@EntryIndexedValue">UTF</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/PredefinedNamingRules/=PrivateInstanceFields/@EntryIndexedValue">&lt;Policy Inspect="True" Prefix="my" Suffix="" Style="AaBb" /&gt;</s:String>
<s:String x:Key="/Default/CodeStyle/Naming/CSharpNaming/PredefinedNamingRules/=PrivateStaticFields/@EntryIndexedValue">&lt;Policy Inspect="True" Prefix="our" Suffix="" Style="AaBb" /&gt;</s:String></wpf:ResourceDictionary>
59 changes: 53 additions & 6 deletions rd-net/Test.Lifetimes/Serialization/UnsafeMarshallersTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -81,13 +81,11 @@ public void Test1()
}

[Test]
[TestCase("")]
[TestCase("x")]
[TestCase("hello")]
[TestCase("привет")]
[TestCase("one два three")]
public void TestUtf8Encoding([NotNull] string value)
[TestCaseSource(nameof(GenerateSamplesForUtf8Tests))]
public void TestUtf8Encoding([CanBeNull] string value)
{
if (value == null) return;

var encoding = Encoding.UTF8;

byte[] bytes;
Expand Down Expand Up @@ -136,6 +134,55 @@ public void TestUtf8Encoding([NotNull] string value)
}
}

[Test]
[TestCaseSource(nameof(GenerateSamplesForUtf8Tests))]
public void TestUtf8Encoding2([CanBeNull] string value)
{
var encoding = Encoding.UTF8;

byte[] bytes;
using (var cookie = UnsafeWriter.NewThreadLocalWriter())
{
cookie.Writer.WriteStringUTF8(value);
cookie.Writer.WriteUInt32(0xDEADBEEF);
bytes = cookie.CloneData();
}

fixed (byte* ptr = bytes)
{
var reader = UnsafeReader.CreateReader(ptr, bytes.Length);
var value2 = reader.ReadStringUTF8();

var marker = reader.ReadUInt32();
Assert.AreEqual(marker, 0xDEADBEEF);
Assert.AreEqual(value, value2);
}
}

[ItemCanBeNull]
private static string[] GenerateSamplesForUtf8Tests()
{
return new[]
{
null,
"",
" ",
"x",
"xx",
"abc",
"abc_def",
"привет",
"one два three",
"abra_кадабра",
new string('a', 100),
new string('щ', 100),
new string('b', 200),
new string('г', 200),
new string('c', 10000),
new string('ю', 10000),
};
}

#if NET472
private UnsafeWriter.Cookie myCookie;
private UnsafeReader myReader;
Expand Down

0 comments on commit 4884865

Please sign in to comment.