Skip to content

Commit

Permalink
Sorts docs by id for, don't cast, less allocations for better perf. U…
Browse files Browse the repository at this point in the history
…pdates benchmarks and adds TODOs
  • Loading branch information
Shazwazza committed Sep 18, 2024
1 parent c5b761e commit b78465d
Show file tree
Hide file tree
Showing 10 changed files with 307 additions and 39 deletions.
215 changes: 199 additions & 16 deletions src/Examine.Benchmarks/ConcurrentSearchBenchmarks.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,25 @@
using System.ComponentModel;
using System.IO;
using System.Linq;
using System.Runtime.Versioning;
using System.Threading;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Jobs;
using Examine.Lucene.Search;
using Examine.Search;
using Examine.Test;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Codecs.Lucene46;
using Lucene.Net.Index;
using Lucene.Net.Index.Extensions;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
using Lucene.Net.Store;
using Lucene.Net.Util;
using Microsoft.Extensions.Logging;
using Microsoft.VSDiagnostics;
using Directory = Lucene.Net.Store.Directory;

[assembly: Config(typeof(MyDefaultConfig))]
Expand Down Expand Up @@ -151,11 +154,11 @@ After changing to use singleton indexers/managers
*/
[ShortRunJob]
[MediumRunJob(RuntimeMoniker.Net80)]
[ThreadingDiagnoser]
[MemoryDiagnoser]
[DotNetCountersDiagnoser]
[CPUUsageDiagnoser]
//[DotNetCountersDiagnoser]
//[CPUUsageDiagnoser]
public class ConcurrentSearchBenchmarks : ExamineBaseTest
{
private readonly StandardAnalyzer _analyzer = new StandardAnalyzer(LuceneInfo.CurrentVersion);
Expand All @@ -181,9 +184,15 @@ public override void Setup()
var tempIndexer = InitializeAndIndexItems(_tempBasePath, _analyzer, out var indexDir);
tempIndexer.Dispose();
_indexDir = FSDirectory.Open(indexDir);
_writer = new IndexWriter(_indexDir, new IndexWriterConfig(LuceneVersion.LUCENE_48, _analyzer));
var writerConfig = new IndexWriterConfig(LuceneVersion.LUCENE_48, _analyzer);
//writerConfig.SetMaxBufferedDocs(1000);
//writerConfig.SetReaderTermsIndexDivisor(4);
//writerConfig.SetOpenMode(OpenMode.APPEND);
//writerConfig.SetReaderPooling(true);
//writerConfig.SetCodec(new Lucene46Codec());
_writer = new IndexWriter(_indexDir, writerConfig);
var trackingWriter = new TrackingIndexWriter(_writer);
_searcherManager = new SearcherManager(trackingWriter.IndexWriter, applyAllDeletes: false, new SearcherFactory());
_searcherManager = new SearcherManager(trackingWriter.IndexWriter, applyAllDeletes: true, new SearcherFactory());
}

[GlobalCleanup]
Expand All @@ -199,13 +208,13 @@ public override void TearDown()
System.IO.Directory.Delete(_tempBasePath, true);
}

[Params(1, 15, 30)]
[Params(/*1, 15, */30)]
public int ThreadCount { get; set; }

[Params(10, 100, 1000)]
[Params(10/*, 100, 1000*/)]
public int MaxResults { get; set; }

[Benchmark]
[Benchmark(Baseline = true)]
public async Task ExamineStandard()
{
var tasks = new List<Task>();
Expand Down Expand Up @@ -235,7 +244,7 @@ public async Task ExamineStandard()
}

[Benchmark]
public async Task LuceneSimple()
public async Task LuceneAcquireAlways()
{
var tasks = new List<Task>();

Expand All @@ -256,22 +265,195 @@ public async Task LuceneSimple()
var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true);
searcher.Search(query, topDocsCollector);
var topDocs = topDocsCollector.GetTopDocs(0, MaxResults);
var totalItemCount = topDocs.TotalHits;
var results = new List<ISearchResult>(topDocs.ScoreDocs.Length);
for (var i = 0; i < topDocs.ScoreDocs.Length; i++)
var results = new List<LuceneSearchResult>(topDocs.ScoreDocs.Length);
foreach (var scoreDoc in topDocs.ScoreDocs)
{
var scoreDoc = topDocs.ScoreDocs[i];
var docId = scoreDoc.Doc;
var docId = scoreDoc.Doc;
var score = scoreDoc.Score;
var shardIndex = scoreDoc.ShardIndex;
var doc = searcher.Doc(docId);
var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex);
results.Add(result);
}
var maxScore = topDocs.MaxScore;
// enumerate (forces the result to execute)
var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray());
_logger.LogDebug(logOutput);
}));
}

foreach (var task in tasks)
{
task.Start();
}

await Task.WhenAll(tasks);
}

[Benchmark]
public async Task LuceneAcquireAlwaysWithLock()
{
var tasks = new List<Task>();
var myLock = new object();

for (var i = 0; i < ThreadCount; i++)
{
tasks.Add(new Task(() =>
{
lock (myLock)
{
var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48));
var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*");
// this is like doing Acquire, does it perform the same (it will allocate more)
using var context = _searcherManager.GetContext();
var searcher = context.Reference;
// Don't use this, increasing the max docs substantially decreases performance
//var maxDoc = searcher.IndexReader.MaxDoc;
var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true);
searcher.Search(query, topDocsCollector);
var topDocs = topDocsCollector.GetTopDocs(0, MaxResults);
var totalItemCount = topDocs.TotalHits;
var results = new List<LuceneSearchResult>(topDocs.ScoreDocs.Length);
foreach (var scoreDoc in topDocs.ScoreDocs)
{
var docId = scoreDoc.Doc;
var score = scoreDoc.Score;
var shardIndex = scoreDoc.ShardIndex;
var doc = searcher.Doc(docId);
var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex);
results.Add(result);
}
var maxScore = topDocs.MaxScore;
// enumerate (forces the result to execute)
var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray());
_logger.LogDebug(logOutput);
}
}));
}

foreach (var task in tasks)
{
task.Start();
}

await Task.WhenAll(tasks);
}

[Benchmark]
public async Task LuceneAcquireOnce()
{
var tasks = new List<Task>();

var searcher = _searcherManager.Acquire();

try
{
for (var i = 0; i < ThreadCount; i++)
{
tasks.Add(new Task(() =>
{
var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48));
var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*");
// Don't use this, increasing the max docs substantially decreases performance
//var maxDoc = searcher.IndexReader.MaxDoc;
var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true);
searcher.Search(query, topDocsCollector);
var topDocs = topDocsCollector.GetTopDocs(0, MaxResults);
var totalItemCount = topDocs.TotalHits;
var results = new List<LuceneSearchResult>(topDocs.ScoreDocs.Length);
for (var i = 0; i < topDocs.ScoreDocs.Length; i++)
{
var scoreDoc = topDocs.ScoreDocs[i];
var docId = scoreDoc.Doc;
var doc = searcher.Doc(docId);
var score = scoreDoc.Score;
var shardIndex = scoreDoc.ShardIndex;
var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex);
results.Add(result);
}
var maxScore = topDocs.MaxScore;
// enumerate (forces the result to execute)
var logOutput = "ThreadID: " + Thread.CurrentThread.ManagedThreadId + ", Results: " + string.Join(',', results.Select(x => $"{x.Id}-{x.Values.Count}-{x.Score}").ToArray());
_logger.LogDebug(logOutput);
}));
}

foreach (var task in tasks)
{
task.Start();
}

await Task.WhenAll(tasks);
}
finally
{
_searcherManager.Release(searcher);
}
}

[Benchmark]
public async Task LuceneSortedDocIds()
{
var tasks = new List<Task>();

for (var i = 0; i < ThreadCount; i++)
{
tasks.Add(new Task(() =>
{
var parser = new QueryParser(LuceneVersion.LUCENE_48, ExamineFieldNames.ItemIdFieldName, new StandardAnalyzer(LuceneVersion.LUCENE_48));
var query = parser.Parse($"{ExamineFieldNames.CategoryFieldName}:content AND nodeName:location*");
// this is like doing Acquire, does it perform the same (it will allocate more)
using var context = _searcherManager.GetContext();
var searcher = context.Reference;
// Don't use this, increasing the max docs substantially decreases performance
//var maxDoc = searcher.IndexReader.MaxDoc;
var topDocsCollector = TopScoreDocCollector.Create(MaxResults, null, true);
searcher.Search(query, topDocsCollector);
var topDocs = topDocsCollector.GetTopDocs(0, MaxResults);
var totalItemCount = topDocs.TotalHits;
var results = new List<LuceneSearchResult>(topDocs.ScoreDocs.Length);
foreach (var scoreDoc in topDocs.ScoreDocs.OrderBy(x => x.Doc))
{
var docId = scoreDoc.Doc;
var score = scoreDoc.Score;
var shardIndex = scoreDoc.ShardIndex;
var doc = searcher.Doc(docId);
var result = LuceneSearchExecutor.CreateSearchResult(doc, score, shardIndex);
results.Add(result);
}
var searchAfterOptions = LuceneSearchExecutor.GetSearchAfterOptions(topDocs);
var maxScore = topDocs.MaxScore;
// enumerate (forces the result to execute)
Expand All @@ -288,9 +470,10 @@ public async Task LuceneSimple()
await Task.WhenAll(tasks);
}

#if RELEASE
protected override ILoggerFactory CreateLoggerFactory()
=> Microsoft.Extensions.Logging.LoggerFactory.Create(builder => builder.AddConsole().SetMinimumLevel(LogLevel.Information));

#endif
private TestIndex InitializeAndIndexItems(
string tempBasePath,
Analyzer analyzer,
Expand Down
4 changes: 1 addition & 3 deletions src/Examine.Benchmarks/Examine.Benchmarks.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<OutputType>Exe</OutputType>
</PropertyGroup>

<ItemGroup>
Expand All @@ -17,9 +18,6 @@

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="BenchmarkDotNet.TestAdapter" Version="0.14.0" />
<PackageReference Include="Microsoft.VisualStudio.DiagnosticsHub.BenchmarkDotNetDiagnosers" Version="17.12.35209.2" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
</ItemGroup>

</Project>
57 changes: 57 additions & 0 deletions src/Examine.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using BenchmarkDotNet.Running;
using Microsoft.Diagnostics.Tracing.Parsers.Kernel;

namespace Examine.Benchmarks
{
public class Program
{
public static async Task Main(string[] args)
{
#if RELEASE
// Benchmark your function here.
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args);
#else
var bench = new ConcurrentSearchBenchmarks();
try
{
bench.Setup();
await Threads100(bench);
await Threads1(bench);
}
finally
{
bench.TearDown();
}

#endif
// Call your function here.
}

private static async Task Threads100(ConcurrentSearchBenchmarks bench)
{
bench.ThreadCount = 50;
bench.MaxResults = 10;

for (var i = 0; i < 100; i++)
{
await bench.ExamineStandard();
}
}

private static async Task Threads1(ConcurrentSearchBenchmarks bench)
{
bench.ThreadCount = 1;
bench.MaxResults = 10;

for (var i = 0; i < 100; i++)
{
await bench.ExamineStandard();
}
}
}
}
1 change: 1 addition & 0 deletions src/Examine.Lucene/Indexing/FullTextType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ protected override void AddSingleValue(Document doc, object value)
if (_sortable)
{
//to be sortable it cannot be analyzed so we have to make a different field
// TODO: Investigate https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/document/SortedDocValuesField.html
doc.Add(new StringField(
ExamineFieldNames.SortedFieldNamePrefix + FieldName,
str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ protected override void AddSingleValue(Document doc, object value)
if (_sortable)
{
//to be sortable it cannot be analyzed so we have to make a different field
// TODO: Investigate https://lucene.apache.org/core/4_3_0/core/org/apache/lucene/document/SortedDocValuesField.html
doc.Add(new StringField(
ExamineFieldNames.SortedFieldNamePrefix + FieldName,
str,
Expand Down
Loading

0 comments on commit b78465d

Please sign in to comment.