Skip to content

Commit 19ce5e9

Browse files
committed
Lucene.Net.Tests.BenchmarkDotNet: Added benchmarks for IndexFiles and SearchFiles
1 parent e11b441 commit 19ce5e9

File tree

5 files changed

+540
-0
lines changed

5 files changed

+540
-0
lines changed
Lines changed: 231 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,231 @@
1+
using BenchmarkDotNet.Attributes;
2+
using BenchmarkDotNet.Configs;
3+
using BenchmarkDotNet.Jobs;
4+
using Lucene.Net.Analysis;
5+
using Lucene.Net.Analysis.Standard;
6+
using Lucene.Net.Documents;
7+
using Lucene.Net.Index;
8+
using Lucene.Net.Randomized.Generators;
9+
using Lucene.Net.Store;
10+
using Lucene.Net.Tests.BenchmarkDotNet.Util;
11+
using Lucene.Net.Util;
12+
using System;
13+
using System.IO;
14+
using System.Text;
15+
16+
namespace Lucene.Net.Tests.BenchmarkDotNet
17+
{
18+
/*
19+
* Licensed to the Apache Software Foundation (ASF) under one or more
20+
* contributor license agreements. See the NOTICE file distributed with
21+
* this work for additional information regarding copyright ownership.
22+
* The ASF licenses this file to You under the Apache License, Version 2.0
23+
* (the "License"); you may not use this file except in compliance with
24+
* the License. You may obtain a copy of the License at
25+
*
26+
* http://www.apache.org/licenses/LICENSE-2.0
27+
*
28+
* Unless required by applicable law or agreed to in writing, software
29+
* distributed under the License is distributed on an "AS IS" BASIS,
30+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31+
* See the License for the specific language governing permissions and
32+
* limitations under the License.
33+
*/
34+
35+
[MemoryDiagnoser]
36+
[Config(typeof(Config))]
37+
public class IndexFilesBenchmarks
38+
{
39+
private class Config : ManualConfig
40+
{
41+
public Config()
42+
{
43+
var baseJob = Job.MediumRun;
44+
45+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00010").WithId("4.8.0-beta00010"));
46+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00009").WithId("4.8.0-beta00009"));
47+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00008").WithId("4.8.0-beta00008"));
48+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00007").WithId("4.8.0-beta00007"));
49+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00006").WithId("4.8.0-beta00006"));
50+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00005").WithId("4.8.0-beta00005"));
51+
}
52+
}
53+
54+
private static DirectoryInfo sourceDirectory;
55+
private static DirectoryInfo indexDirectory;
56+
57+
[GlobalSetup]
58+
public void GlobalSetUp()
59+
{
60+
sourceDirectory = PathUtil.CreateTempDir("sourceFiles");
61+
int seed = 2342;
62+
ContentGenerator.GenerateFiles(new Random(seed), sourceDirectory.FullName, 250);
63+
}
64+
65+
[GlobalCleanup]
66+
public void GlobalTearDown()
67+
{
68+
try
69+
{
70+
if (System.IO.Directory.Exists(sourceDirectory.FullName))
71+
System.IO.Directory.Delete(sourceDirectory.FullName, recursive: true);
72+
}
73+
catch { }
74+
}
75+
76+
[IterationSetup]
77+
public void IterationSetUp()
78+
{
79+
indexDirectory = PathUtil.CreateTempDir("indexFiles");
80+
}
81+
82+
[IterationCleanup]
83+
public void IterationTearDown()
84+
{
85+
try
86+
{
87+
if (System.IO.Directory.Exists(indexDirectory.FullName))
88+
System.IO.Directory.Delete(indexDirectory.FullName, recursive: true);
89+
}
90+
catch { }
91+
92+
}
93+
94+
/// <summary>Index all text files under a directory.</summary>
95+
[Benchmark]
96+
public void IndexFiles() => IndexFiles(sourceDirectory, indexDirectory);
97+
98+
/// <summary>Index all text files under a directory.</summary>
99+
public static void IndexFiles(DirectoryInfo sourceDirectory, DirectoryInfo indexDirectory)
100+
{
101+
string indexPath = indexDirectory.FullName;
102+
103+
bool create = true;
104+
105+
Store.Directory dir = FSDirectory.Open(indexPath);
106+
// :Post-Release-Update-Version.LUCENE_XY:
107+
Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
108+
IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer);
109+
110+
if (create)
111+
{
112+
// Create a new index in the directory, removing any
113+
// previously indexed documents:
114+
iwc.OpenMode = OpenMode.CREATE;
115+
}
116+
else
117+
{
118+
// Add new documents to an existing index:
119+
iwc.OpenMode = OpenMode.CREATE_OR_APPEND;
120+
}
121+
122+
// Optional: for better indexing performance, if you
123+
// are indexing many documents, increase the RAM
124+
// buffer.
125+
//
126+
// iwc.RAMBufferSizeMB = 256.0;
127+
128+
using (IndexWriter writer = new IndexWriter(dir, iwc))
129+
{
130+
IndexDocs(writer, sourceDirectory);
131+
132+
// NOTE: if you want to maximize search performance,
133+
// you can optionally call forceMerge here. This can be
134+
// a terribly costly operation, so generally it's only
135+
// worth it when your index is relatively static (ie
136+
// you're done adding documents to it):
137+
//
138+
// writer.ForceMerge(1);
139+
}
140+
}
141+
142+
/// <summary>
143+
/// Recurses over files and directories found under the
144+
/// given directory and indexes each file.<para/>
145+
///
146+
/// NOTE: This method indexes one document per input file.
147+
/// This is slow. For good throughput, put multiple documents
148+
/// into your input file(s).
149+
/// </summary>
150+
/// <param name="writer">
151+
/// <see cref="IndexWriter"/> to the index where the given
152+
/// file/dir info will be stored
153+
/// </param>
154+
/// <param name="directoryInfo">
155+
/// The directory to recurse into to find files to index.
156+
/// </param>
157+
/// <exception cref="IOException">
158+
/// If there is a low-level I/O error.
159+
/// </exception>
160+
internal static void IndexDocs(IndexWriter writer, DirectoryInfo directoryInfo)
161+
{
162+
foreach (var dirInfo in directoryInfo.GetDirectories())
163+
{
164+
IndexDocs(writer, dirInfo);
165+
}
166+
foreach (var fileInfo in directoryInfo.GetFiles())
167+
{
168+
IndexDocs(writer, fileInfo);
169+
}
170+
}
171+
172+
/// <summary>
173+
/// Indexes the given file using the given writer.<para/>
174+
/// </summary>
175+
/// <param name="writer">
176+
/// <see cref="IndexWriter"/> to the index where the given
177+
/// file info will be stored.
178+
/// </param>
179+
/// <param name="file">
180+
/// The file to index.
181+
/// </param>
182+
/// <exception cref="IOException">
183+
/// If there is a low-level I/O error.
184+
/// </exception>
185+
internal static void IndexDocs(IndexWriter writer, FileInfo file)
186+
{
187+
using (FileStream fs = new FileStream(file.FullName, FileMode.Open, FileAccess.Read))
188+
{
189+
// make a new, empty document
190+
Document doc = new Document();
191+
192+
// Add the path of the file as a field named "path". Use a
193+
// field that is indexed (i.e. searchable), but don't tokenize
194+
// the field into separate words and don't index term frequency
195+
// or positional information:
196+
Field pathField = new StringField("path", file.FullName, Field.Store.YES);
197+
doc.Add(pathField);
198+
199+
// Add the last modified date of the file a field named "modified".
200+
// Use a LongField that is indexed (i.e. efficiently filterable with
201+
// NumericRangeFilter). This indexes to milli-second resolution, which
202+
// is often too fine. You could instead create a number based on
203+
// year/month/day/hour/minutes/seconds, down the resolution you require.
204+
// For example the long value 2011021714 would mean
205+
// February 17, 2011, 2-3 PM.
206+
doc.Add(new Int64Field("modified", file.LastWriteTimeUtc.Ticks, Field.Store.NO));
207+
208+
// Add the contents of the file to a field named "contents". Specify a Reader,
209+
// so that the text of the file is tokenized and indexed, but not stored.
210+
// Note that FileReader expects the file to be in UTF-8 encoding.
211+
// If that's not the case searching for special characters will fail.
212+
doc.Add(new TextField("contents", new StreamReader(fs, Encoding.UTF8)));
213+
214+
if (writer.Config.OpenMode == OpenMode.CREATE)
215+
{
216+
// New index, so we just add the document (no old document can be there):
217+
//Console.WriteLine("adding " + file);
218+
writer.AddDocument(doc);
219+
}
220+
else
221+
{
222+
// Existing index (an old copy of this document may have been indexed) so
223+
// we use updateDocument instead to replace the old one matching the exact
224+
// path, if present:
225+
//Console.WriteLine("updating " + file);
226+
writer.UpdateDocument(new Term("path", file.FullName), doc);
227+
}
228+
}
229+
}
230+
}
231+
}

src/Lucene.Net.Tests.BenchmarkDotNet/Lucene.Net.Tests.BenchmarkDotNet.csproj

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@
1212

1313
<ItemGroup>
1414
<PackageReference Include="Lucene.Net.Analysis.Common" Version="4.8.0-beta00005" />
15+
<PackageReference Include="Lucene.Net.QueryParser" Version="4.8.0-beta00005" />
16+
</ItemGroup>
17+
18+
<ItemGroup>
19+
<Compile Include="..\Lucene.Net.TestFramework\Support\Randomized\Generators\RandomPicks.cs" LinkBase="Util" />
20+
<Compile Include="..\Lucene.Net.TestFramework\Support\Randomized\Generators\RandomInts.cs" LinkBase="Util" />
21+
<Compile Include="..\Lucene.Net\Support\ExceptionToNetNumericConventionAttribute.cs" LinkBase="Util" />
1522
</ItemGroup>
1623

1724
</Project>
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
using BenchmarkDotNet.Attributes;
2+
using BenchmarkDotNet.Configs;
3+
using BenchmarkDotNet.Jobs;
4+
using Lucene.Net.Analysis;
5+
using Lucene.Net.Analysis.Standard;
6+
using Lucene.Net.Index;
7+
using Lucene.Net.QueryParsers.Classic;
8+
using Lucene.Net.Randomized.Generators;
9+
using Lucene.Net.Search;
10+
using Lucene.Net.Store;
11+
using Lucene.Net.Tests.BenchmarkDotNet.Util;
12+
using Lucene.Net.Util;
13+
using System;
14+
using System.IO;
15+
16+
namespace Lucene.Net.Tests.BenchmarkDotNet
17+
{
18+
/*
19+
* Licensed to the Apache Software Foundation (ASF) under one or more
20+
* contributor license agreements. See the NOTICE file distributed with
21+
* this work for additional information regarding copyright ownership.
22+
* The ASF licenses this file to You under the Apache License, Version 2.0
23+
* (the "License"); you may not use this file except in compliance with
24+
* the License. You may obtain a copy of the License at
25+
*
26+
* http://www.apache.org/licenses/LICENSE-2.0
27+
*
28+
* Unless required by applicable law or agreed to in writing, software
29+
* distributed under the License is distributed on an "AS IS" BASIS,
30+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31+
* See the License for the specific language governing permissions and
32+
* limitations under the License.
33+
*/
34+
35+
[MemoryDiagnoser]
36+
[Config(typeof(Config))]
37+
public class SearchFilesBenchmarks
38+
{
39+
private class Config : ManualConfig
40+
{
41+
public Config()
42+
{
43+
var baseJob = Job.MediumRun;
44+
45+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00010").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00010").WithId("4.8.0-beta00010"));
46+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00009").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00009").WithId("4.8.0-beta00009"));
47+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00008").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00008").WithId("4.8.0-beta00008"));
48+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00007").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00007").WithId("4.8.0-beta00007"));
49+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00006").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00006").WithId("4.8.0-beta00006"));
50+
AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00005").WithNuGet("Lucene.Net.QueryParser", "4.8.0-beta00005").WithId("4.8.0-beta00005"));
51+
}
52+
}
53+
54+
private const string QueryString = "settings";
55+
private static DirectoryInfo indexDirectory;
56+
57+
[GlobalSetup]
58+
public void GlobalSetUp()
59+
{
60+
var sourceDirectory = PathUtil.CreateTempDir("sourceFiles");
61+
62+
// Generate content to index (including our string that we will search for)
63+
int seed = 2342;
64+
ContentGenerator.GenerateFiles(new Random(seed), sourceDirectory.FullName, 1000, QueryString);
65+
66+
67+
// Index the content
68+
indexDirectory = PathUtil.CreateTempDir("indexFiles");
69+
IndexFilesBenchmarks.IndexFiles(sourceDirectory, indexDirectory);
70+
71+
// Cleanup our source files, they are no longer needed
72+
try
73+
{
74+
if (System.IO.Directory.Exists(sourceDirectory.FullName))
75+
System.IO.Directory.Delete(sourceDirectory.FullName, recursive: true);
76+
}
77+
catch { }
78+
}
79+
80+
[GlobalCleanup]
81+
public void GlobalTearDown()
82+
{
83+
try
84+
{
85+
if (System.IO.Directory.Exists(indexDirectory.FullName))
86+
System.IO.Directory.Delete(indexDirectory.FullName, recursive: true);
87+
}
88+
catch { }
89+
}
90+
91+
[Benchmark]
92+
public void SearchFiles()
93+
{
94+
95+
string index = indexDirectory.FullName;
96+
string field = "contents";
97+
//string queries = null;
98+
int repeat = 1000;
99+
//bool raw = false;
100+
string queryString = QueryString;
101+
//int hitsPerPage = 10;
102+
103+
using (IndexReader reader = DirectoryReader.Open(FSDirectory.Open(index)))
104+
{
105+
IndexSearcher searcher = new IndexSearcher(reader);
106+
// :Post-Release-Update-Version.LUCENE_XY:
107+
Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48);
108+
109+
// :Post-Release-Update-Version.LUCENE_XY:
110+
QueryParser parser = new QueryParser(LuceneVersion.LUCENE_48, field, analyzer);
111+
112+
Query query = parser.Parse(queryString.Trim());
113+
//Console.WriteLine("Searching for: " + query.ToString(field));
114+
115+
// repeat & time as benchmark
116+
{
117+
//DateTime start = DateTime.UtcNow;
118+
for (int i = 0; i < repeat; i++)
119+
{
120+
searcher.Search(query, null, 100);
121+
}
122+
//DateTime end = DateTime.UtcNow;
123+
//Console.WriteLine("Time: " + (end - start).TotalMilliseconds + "ms");
124+
}
125+
} // Disposes reader
126+
}
127+
}
128+
}

0 commit comments

Comments
 (0)