Skip to content

Commit

Permalink
[.NET] Improved parsing time (#336)
Browse files Browse the repository at this point in the history
* [.NET] Add benchmarks

* [.NET] Gherkin.Parser: avoid delegate creation

* [.NET] GherkinLine.SplitCells: Avoid string allocations

* [.NET] Optimize AstNode.subItems handling

* [.NET] Remove old Mono workaround and use string.StartsWith with ordinal everywhere

* [.NET] Gherkinline.GetTableCells: avoid string concatenation

* [.NET] AstBuilder.GetCells: avoid unnesscary enumerator and array collections

* [.NET] GherkinDialectProvider.ParseJsonContent: Use SourceCodeGenerator for System.Text.Json

* [.NET] GherkinLine.GetTags: avoid calling RegEx
  • Loading branch information
obligaron authored Dec 18, 2024
1 parent acd20c2 commit be363bc
Show file tree
Hide file tree
Showing 14 changed files with 435 additions and 113 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ This document is formatted according to the principles of [Keep A CHANGELOG](htt

### Fixed
- [c] slight update to existing CMakeFiles.txt to propagate VERSION. Close #320 ([#328](https://github.com/cucumber/gherkin/pull/328))
- [.NET] Improved parsing time
- [.NET] Use string-ordinal comparison consistently and remove old Mono workaround

### Changed
- [cpp] add generic support for ABI versioning with VERSION ([#328](https://github.com/cucumber/gherkin/pull/328))
Expand Down
20 changes: 20 additions & 0 deletions dotnet/Gherkin.Benchmarks/Gherkin.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFrameworks>net8.0;net481</TargetFrameworks>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="System.Collections.Immutable" Version="8.0.0" />
<PackageReference Include="System.Reflection.Metadata" Version="8.0.1" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\Gherkin\Gherkin.csproj" />
</ItemGroup>

</Project>
43 changes: 43 additions & 0 deletions dotnet/Gherkin.Benchmarks/GherkingParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
using BenchmarkDotNet.Attributes;
using Gherkin.Ast;
using System.Text;

namespace Gherkin.Benchmarks;

public class GherkingParser
{
[Params("very_long.feature", "tags.feature")]
public string? FeatureFile { get; set; }

readonly MemoryStream _TestData = new();
readonly Parser _ParserReused = new();
readonly TokenMatcher _TokenMatcher = new();
StreamReader? _Reader;

[GlobalSetup]
public void GlobalSetup()
{
var fullPathToTestFeatureFile = Path.Combine(TestFileProvider.GetTestFileFolder("good"), FeatureFile!);

using var fileStream = new FileStream(fullPathToTestFeatureFile, FileMode.Open, FileAccess.Read);

fileStream.CopyTo(_TestData);

_Reader = new StreamReader(_TestData, Encoding.UTF8, false, 4096, true);
}

[Benchmark]
public GherkinDocument Parser()
{
_TestData.Seek(0, SeekOrigin.Begin);
var parser = new Parser();
return parser.Parse(new TokenScanner(_Reader));
}

[Benchmark]
public GherkinDocument ParserReuse()
{
_TestData.Seek(0, SeekOrigin.Begin);
return _ParserReused.Parse(new TokenScanner(_Reader), _TokenMatcher);
}
}
24 changes: 24 additions & 0 deletions dotnet/Gherkin.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Environments;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;

namespace Gherkin.Benchmarks;

internal class Program
{
static void Main(string[] args)
{
#if DEBUG
var config = new DebugInProcessConfig()
#else
var config = DefaultConfig.Instance
.AddJob(Job.Default.WithRuntime(CoreRuntime.Core80))
.AddJob(Job.Default.WithRuntime(ClrRuntime.Net481))
#endif
.AddDiagnoser(MemoryDiagnoser.Default)
;
_ = BenchmarkRunner.Run<GherkingParser>(config);
}
}
15 changes: 15 additions & 0 deletions dotnet/Gherkin.Benchmarks/TestFileProvider.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
namespace Gherkin.Benchmarks;

public class TestFileProvider
{
public static string GetTestFileFolder(string category)
{
var inputFolder = Environment.CurrentDirectory;
#if DEBUG
// Artefacts are not created in subdirectories, so we don't need to go any higher.
#elif NET6_0_OR_GREATER
inputFolder = Path.Combine(inputFolder, "..", "..", "..", "..");
#endif
return Path.GetFullPath(Path.Combine(inputFolder, "..", "..", "..", "..", "..", "testdata", category));
}
}
6 changes: 6 additions & 0 deletions dotnet/Gherkin.sln
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
Makefile = Makefile
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Gherkin.Benchmarks", "Gherkin.Benchmarks\Gherkin.Benchmarks.csproj", "{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -32,6 +34,10 @@ Global
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Debug|Any CPU.Build.0 = Debug|Any CPU
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.ActiveCfg = Release|Any CPU
{A0DEA4BA-3A79-4C05-87F2-7C7C9DE8B245}.Release|Any CPU.Build.0 = Release|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4DC5C858-3F32-44E7-8FF6-7D85A16F7FF7}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
13 changes: 9 additions & 4 deletions dotnet/Gherkin/AstBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -279,8 +279,9 @@ protected virtual void CheckCellCountConsistency(TableRow[] rows)
return;

int cellCount = rows[0].Cells.Count();
foreach (var row in rows)
for (int i = 1; i < rows.Length; i++)
{
var row = rows[i];
if (row.Cells.Count() != cellCount)
{
HandleAstError("inconsistent cell count within the table", row.Location);
Expand All @@ -295,9 +296,13 @@ protected virtual void HandleAstError(string message, Location location)

private TableCell[] GetCells(Token tableRowToken)
{
return tableRowToken.MatchedItems
.Select(cellItem => CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text))
.ToArray();
var cells = new TableCell[tableRowToken.MatchedItems.Length];
for (int i = 0; i < cells.Length; i++)
{
var cellItem = tableRowToken.MatchedItems[i];
cells[i] = CreateTableCell(GetLocation(tableRowToken, cellItem.Column), cellItem.Text);
}
return cells;
}

private static Step[] GetSteps(AstNode scenarioDefinitionNode)
Expand Down
61 changes: 50 additions & 11 deletions dotnet/Gherkin/AstNode.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ namespace Gherkin;

public class AstNode(RuleType ruleType)
{
private readonly Dictionary<RuleType, IList<object>> subItems = new Dictionary<RuleType, IList<object>>();
private readonly Dictionary<RuleType, object> subItems = new Dictionary<RuleType, object>();

public RuleType RuleType { get; } = ruleType;

Expand All @@ -18,17 +18,50 @@ public IEnumerable<Token> GetTokens(TokenType tokenType)

public T GetSingle<T>(RuleType ruleType)
{
return GetItems<T>(ruleType).SingleOrDefault();
if (!subItems.TryGetValue(ruleType, out var items))
return default;
if (items is List<object> list)
{
T ret = default;
bool foundOne = false;
foreach (var item in list)
{
if (item is T tItem)
{
if (foundOne)
throw new InvalidOperationException();
ret = tItem;
foundOne = true;
}
}
if (foundOne)
return ret;
else
throw new InvalidOperationException();
}
else if (items is T tItem)
{
return tItem;
}
return default;
}

public IEnumerable<T> GetItems<T>(RuleType ruleType)
{
IList<object> items;
if (!subItems.TryGetValue(ruleType, out items))
if (!subItems.TryGetValue(ruleType, out var items))
yield break;
if (items is List<object> list)
{
foreach (var item in list)
{
if (item is T tItem)
yield return tItem;
}
}
else if (items is T tItem)
{
return Enumerable.Empty<T>();
yield return tItem;
}
return items.Cast<T>();
}

public void SetSingle<T>(RuleType ruleType, T value)
Expand All @@ -46,12 +79,18 @@ public void AddRange<T>(RuleType ruleType, IEnumerable<T> values)

public void Add<T>(RuleType ruleType, T obj)
{
IList<object> items;
if (!subItems.TryGetValue(ruleType, out items))
if (!subItems.TryGetValue(ruleType, out var items))
{
subItems.Add(ruleType, obj);
}
else if (items is List<object> list)
{
list.Add(obj);
}
else
{
items = new List<object>();
subItems.Add(ruleType, items);
list = [items, obj];
subItems[ruleType] = list;
}
items.Add(obj);
}
}
9 changes: 8 additions & 1 deletion dotnet/Gherkin/GherkinDialectProvider.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Gherkin.Ast;
using System.Text.Json;
using System.Text.Json.Serialization;

namespace Gherkin;

Expand Down Expand Up @@ -52,7 +53,7 @@ protected virtual Dictionary<string, GherkinLanguageSetting> LoadLanguageSetting

protected virtual Dictionary<string, GherkinLanguageSetting> ParseJsonContent(string languagesFileContent)
{
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web));
return JsonSerializer.Deserialize<Dictionary<string, GherkinLanguageSetting>>(languagesFileContent, new JsonSerializerOptions(JsonSerializerDefaults.Web) { TypeInfoResolver = SourceGenerationContext.Default });
}

protected virtual bool TryGetDialect(string language, Dictionary<string, GherkinLanguageSetting> gherkinLanguageSettings, Location location, out GherkinDialect dialect)
Expand Down Expand Up @@ -113,6 +114,12 @@ protected static GherkinDialect GetFactoryDefault()
}
}

[JsonSourceGenerationOptions]
[JsonSerializable(typeof(Dictionary<string, GherkinLanguageSetting>))]
internal partial class SourceGenerationContext : JsonSerializerContext
{
}

public class GherkinLanguageSetting
{
public string Name { get; set; }
Expand Down
1 change: 1 addition & 0 deletions dotnet/Gherkin/GherkinLanguageConstants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ public static class GherkinLanguageConstants
public const string COMMENT_PREFIX = "#";
public const string TITLE_KEYWORD_SEPARATOR = ":";
public const string TABLE_CELL_SEPARATOR = "|";
public const char TABLE_CELL_SEPARATOR_CHAR = '|';
public const char TABLE_CELL_ESCAPE_CHAR = '\\';
public const char TABLE_CELL_NEWLINE_ESCAPE = 'n';
public const string DOCSTRING_SEPARATOR = "\"\"\"";
Expand Down
Loading

0 comments on commit be363bc

Please sign in to comment.