Skip to content

Commit

Permalink
Improve Character method performance (#75)
Browse files Browse the repository at this point in the history
* add SkipLocalsInit attribute
* use pre-calculated answers for chars, generate using a test case using old methods
* use HexConverter borrowed from .NET sources which has superior check performance for common case
  • Loading branch information
lahma authored Nov 30, 2022
1 parent c57f75e commit 233cc69
Show file tree
Hide file tree
Showing 8 changed files with 1,892 additions and 50 deletions.
1,650 changes: 1,650 additions & 0 deletions src/Parlot/Character.Generated.cs

Large diffs are not rendered by default.

75 changes: 26 additions & 49 deletions src/Parlot/Character.cs
Original file line number Diff line number Diff line change
@@ -1,36 +1,40 @@
using System;
using System.Runtime.CompilerServices;

namespace Parlot
{
public static class Character
[Flags]
internal enum CharacterMask : byte
{
public static bool IsDecimalDigit(char ch)
=> ch >= '0' && ch <= '9';
None = 0,
IdentifierStart = 1,
IdentifierPart = 2,
WhiteSpace = 4,
WhiteSpaceOrNewLine = 8
}

public static partial class Character
{
public static bool IsDecimalDigit(char ch) => IsInRange(ch, '0', '9');

public static bool IsInRange(char ch, char a, char b)
=> ch >= a && ch <= b;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsInRange(char ch, char min, char max) => ch - (uint) min <= max - (uint) min;

public static bool IsHexDigit(char ch)
=> IsDecimalDigit(ch) ||
(ch >= 'A' && ch <= 'F') ||
(ch >= 'a' && ch <= 'f');
public static bool IsHexDigit(char ch) => HexConverter.IsHexChar(ch);

public static bool IsIdentifierStart(char ch)
=> (ch == '$') || (ch == '_') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= 'a' && ch <= 'z');
{
return (_characterData[ch] & (byte) CharacterMask.IdentifierStart) != 0;
}

public static bool IsIdentifierPart(char ch)
=> IsIdentifierStart(ch) || IsDecimalDigit(ch);
{
return (_characterData[ch] & (byte) CharacterMask.IdentifierPart) != 0;
}

public static bool IsWhiteSpace(char ch)
{
return (ch <= 32 &&
((ch == 32) || // space
(ch == '\t'))) // horizontal tab
|| (ch == 0xA0) // non-breaking space
|| (ch >= 0x1680 && IsWhiteSpaceNonAscii(ch))
;
return (_characterData[ch] & (byte) CharacterMask.WhiteSpace) != 0;
}

public static bool IsWhiteSpaceNonAscii(char ch)
Expand All @@ -47,19 +51,10 @@ public static bool IsWhiteSpaceNonAscii(char ch)

public static bool IsWhiteSpaceOrNewLine(char ch)
{
return (ch <= 32 &&
((ch == 32) || // space
(ch == '\n') ||
(ch == '\r') ||
(ch == '\t') || // horizontal tab
(ch == '\v')))
|| (ch == 0xA0) // non-breaking space
|| (ch >= 0x1680 && IsWhiteSpaceNonAscii(ch))
;
return (_characterData[ch] & (byte) CharacterMask.WhiteSpaceOrNewLine) != 0;
}

public static bool IsNewLine(char ch)
=> (ch == '\n') || (ch == '\r') || (ch == '\v');
public static bool IsNewLine(char ch) => ch is '\n' or '\r' or '\v';

public static char ScanHexEscape(string text, int index, out int length)
{
Expand Down Expand Up @@ -155,25 +150,7 @@ public static TextSpan DecodeString(TextSpan span)
return new TextSpan(result);
}

private static int HexValue(char ch)
{
if (ch >= '0' && ch <= '9')
{
return ch - 48;
}
else if (ch >= 'a' && ch <= 'f')
{
return ch - 'a' + 10;
}
else if (ch >= 'A' && ch <= 'F')
{
return ch - 'A' + 10;
}
else
{
return 0;
}
}
private static int HexValue(char ch) => HexConverter.FromChar(ch);

#if NETSTANDARD2_0
private delegate void SpanAction<T, in TArg>(T[] span, TArg arg);
Expand Down
69 changes: 69 additions & 0 deletions src/Parlot/HexConverter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

// https://github.com/dotnet/runtime/blob/38ca26b27b9e7a867e6ff69eec3cabbfb4e9e1cf/src/libraries/Common/src/System/HexConverter.cs

using System;
using System.Runtime.CompilerServices;

internal static class HexConverter
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsHexChar(int c)
{
if (IntPtr.Size == 8)
{
// This code path, when used, has no branches and doesn't depend on cache hits,
// so it's faster and does not vary in speed depending on input data distribution.
// We only use this logic on 64-bit systems, as using 64 bit values would otherwise
// be much slower than just using the lookup table anyway (no hardware support).
// The magic constant 18428868213665201664 is a 64 bit value containing 1s at the
// indices corresponding to all the valid hex characters (ie. "0123456789ABCDEFabcdef")
// minus 48 (ie. '0'), and backwards (so from the most significant bit and downwards).
// The offset of 48 for each bit is necessary so that the entire range fits in 64 bits.
// First, we subtract '0' to the input digit (after casting to uint to account for any
// negative inputs). Note that even if this subtraction underflows, this happens before
// the result is zero-extended to ulong, meaning that `i` will always have upper 32 bits
// equal to 0. We then left shift the constant with this offset, and apply a bitmask that
// has the highest bit set (the sign bit) if and only if `c` is in the ['0', '0' + 64) range.
// Then we only need to check whether this final result is less than 0: this will only be
// the case if both `i` was in fact the index of a set bit in the magic constant, and also
// `c` was in the allowed range (this ensures that false positive bit shifts are ignored).
ulong i = (uint) c - '0';
ulong shift = 18428868213665201664UL << (int) i;
ulong mask = i - 64;

return (long) (shift & mask) < 0 ? true : false;
}

return FromChar(c) != 0xFF;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int FromChar(int c)
{
return c >= CharToHexLookup.Length ? 0xFF : CharToHexLookup[c];
}

/// <summary>Map from an ASCII char to its hex value, e.g. arr['b'] == 11. 0xFF means it's not a hex digit.</summary>
public static ReadOnlySpan<byte> CharToHexLookup => new byte[]
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 15
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 31
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 47
0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 63
0xFF, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 79
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 95
0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 111
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 127
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 143
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 159
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 175
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 191
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 207
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 223
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, // 239
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF // 255
};

}
1 change: 1 addition & 0 deletions src/Parlot/Parlot.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<SignAssembly>true</SignAssembly>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);1591</NoWarn>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>

<ItemGroup>
Expand Down
1 change: 1 addition & 0 deletions src/Parlot/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[module: System.Runtime.CompilerServices.SkipLocalsInit]
14 changes: 14 additions & 0 deletions src/Parlot/SkipLocalsInitAttribute.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
namespace System.Runtime.CompilerServices;

[AttributeUsage(
AttributeTargets.Module
| AttributeTargets.Class
| AttributeTargets.Struct
| AttributeTargets.Interface
| AttributeTargets.Constructor
| AttributeTargets.Method
| AttributeTargets.Property
| AttributeTargets.Event, Inherited = false)]
internal sealed class SkipLocalsInitAttribute : Attribute
{
}
Loading

0 comments on commit 233cc69

Please sign in to comment.