|
| 1 | +using BenchmarkDotNet.Attributes; |
| 2 | +using BenchmarkDotNet.Configs; |
| 3 | +using BenchmarkDotNet.Jobs; |
| 4 | +using Lucene.Net.Analysis; |
| 5 | +using Lucene.Net.Analysis.Standard; |
| 6 | +using Lucene.Net.Documents; |
| 7 | +using Lucene.Net.Index; |
| 8 | +using Lucene.Net.Randomized.Generators; |
| 9 | +using Lucene.Net.Store; |
| 10 | +using Lucene.Net.Tests.BenchmarkDotNet.Util; |
| 11 | +using Lucene.Net.Util; |
| 12 | +using System; |
| 13 | +using System.IO; |
| 14 | +using System.Text; |
| 15 | + |
| 16 | +namespace Lucene.Net.Tests.BenchmarkDotNet |
| 17 | +{ |
| 18 | + /* |
| 19 | + * Licensed to the Apache Software Foundation (ASF) under one or more |
| 20 | + * contributor license agreements. See the NOTICE file distributed with |
| 21 | + * this work for additional information regarding copyright ownership. |
| 22 | + * The ASF licenses this file to You under the Apache License, Version 2.0 |
| 23 | + * (the "License"); you may not use this file except in compliance with |
| 24 | + * the License. You may obtain a copy of the License at |
| 25 | + * |
| 26 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 27 | + * |
| 28 | + * Unless required by applicable law or agreed to in writing, software |
| 29 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 30 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 31 | + * See the License for the specific language governing permissions and |
| 32 | + * limitations under the License. |
| 33 | + */ |
| 34 | + |
| 35 | + [MemoryDiagnoser] |
| 36 | + [Config(typeof(Config))] |
| 37 | + public class IndexFilesBenchmarks |
| 38 | + { |
| 39 | + private class Config : ManualConfig |
| 40 | + { |
| 41 | + public Config() |
| 42 | + { |
| 43 | + var baseJob = Job.MediumRun; |
| 44 | + |
| 45 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00010").WithId("4.8.0-beta00010")); |
| 46 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00009").WithId("4.8.0-beta00009")); |
| 47 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00008").WithId("4.8.0-beta00008")); |
| 48 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00007").WithId("4.8.0-beta00007")); |
| 49 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00006").WithId("4.8.0-beta00006")); |
| 50 | + AddJob(baseJob.WithNuGet("Lucene.Net.Analysis.Common", "4.8.0-beta00005").WithId("4.8.0-beta00005")); |
| 51 | + } |
| 52 | + } |
| 53 | + |
| 54 | + private static DirectoryInfo sourceDirectory; |
| 55 | + private static DirectoryInfo indexDirectory; |
| 56 | + |
| 57 | + [GlobalSetup] |
| 58 | + public void GlobalSetUp() |
| 59 | + { |
| 60 | + sourceDirectory = PathUtil.CreateTempDir("sourceFiles"); |
| 61 | + int seed = 2342; |
| 62 | + ContentGenerator.GenerateFiles(new Random(seed), sourceDirectory.FullName, 250); |
| 63 | + } |
| 64 | + |
| 65 | + [GlobalCleanup] |
| 66 | + public void GlobalTearDown() |
| 67 | + { |
| 68 | + try |
| 69 | + { |
| 70 | + if (System.IO.Directory.Exists(sourceDirectory.FullName)) |
| 71 | + System.IO.Directory.Delete(sourceDirectory.FullName, recursive: true); |
| 72 | + } |
| 73 | + catch { } |
| 74 | + } |
| 75 | + |
| 76 | + [IterationSetup] |
| 77 | + public void IterationSetUp() |
| 78 | + { |
| 79 | + indexDirectory = PathUtil.CreateTempDir("indexFiles"); |
| 80 | + } |
| 81 | + |
| 82 | + [IterationCleanup] |
| 83 | + public void IterationTearDown() |
| 84 | + { |
| 85 | + try |
| 86 | + { |
| 87 | + if (System.IO.Directory.Exists(indexDirectory.FullName)) |
| 88 | + System.IO.Directory.Delete(indexDirectory.FullName, recursive: true); |
| 89 | + } |
| 90 | + catch { } |
| 91 | + |
| 92 | + } |
| 93 | + |
| 94 | + /// <summary>Index all text files under a directory.</summary> |
| 95 | + [Benchmark] |
| 96 | + public void IndexFiles() => IndexFiles(sourceDirectory, indexDirectory); |
| 97 | + |
| 98 | + /// <summary>Index all text files under a directory.</summary> |
| 99 | + public static void IndexFiles(DirectoryInfo sourceDirectory, DirectoryInfo indexDirectory) |
| 100 | + { |
| 101 | + string indexPath = indexDirectory.FullName; |
| 102 | + |
| 103 | + bool create = true; |
| 104 | + |
| 105 | + Store.Directory dir = FSDirectory.Open(indexPath); |
| 106 | + // :Post-Release-Update-Version.LUCENE_XY: |
| 107 | + Analyzer analyzer = new StandardAnalyzer(LuceneVersion.LUCENE_48); |
| 108 | + IndexWriterConfig iwc = new IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer); |
| 109 | + |
| 110 | + if (create) |
| 111 | + { |
| 112 | + // Create a new index in the directory, removing any |
| 113 | + // previously indexed documents: |
| 114 | + iwc.OpenMode = OpenMode.CREATE; |
| 115 | + } |
| 116 | + else |
| 117 | + { |
| 118 | + // Add new documents to an existing index: |
| 119 | + iwc.OpenMode = OpenMode.CREATE_OR_APPEND; |
| 120 | + } |
| 121 | + |
| 122 | + // Optional: for better indexing performance, if you |
| 123 | + // are indexing many documents, increase the RAM |
| 124 | + // buffer. |
| 125 | + // |
| 126 | + // iwc.RAMBufferSizeMB = 256.0; |
| 127 | + |
| 128 | + using (IndexWriter writer = new IndexWriter(dir, iwc)) |
| 129 | + { |
| 130 | + IndexDocs(writer, sourceDirectory); |
| 131 | + |
| 132 | + // NOTE: if you want to maximize search performance, |
| 133 | + // you can optionally call forceMerge here. This can be |
| 134 | + // a terribly costly operation, so generally it's only |
| 135 | + // worth it when your index is relatively static (ie |
| 136 | + // you're done adding documents to it): |
| 137 | + // |
| 138 | + // writer.ForceMerge(1); |
| 139 | + } |
| 140 | + } |
| 141 | + |
| 142 | + /// <summary> |
| 143 | + /// Recurses over files and directories found under the |
| 144 | + /// given directory and indexes each file.<para/> |
| 145 | + /// |
| 146 | + /// NOTE: This method indexes one document per input file. |
| 147 | + /// This is slow. For good throughput, put multiple documents |
| 148 | + /// into your input file(s). |
| 149 | + /// </summary> |
| 150 | + /// <param name="writer"> |
| 151 | + /// <see cref="IndexWriter"/> to the index where the given |
| 152 | + /// file/dir info will be stored |
| 153 | + /// </param> |
| 154 | + /// <param name="directoryInfo"> |
| 155 | + /// The directory to recurse into to find files to index. |
| 156 | + /// </param> |
| 157 | + /// <exception cref="IOException"> |
| 158 | + /// If there is a low-level I/O error. |
| 159 | + /// </exception> |
| 160 | + internal static void IndexDocs(IndexWriter writer, DirectoryInfo directoryInfo) |
| 161 | + { |
| 162 | + foreach (var dirInfo in directoryInfo.GetDirectories()) |
| 163 | + { |
| 164 | + IndexDocs(writer, dirInfo); |
| 165 | + } |
| 166 | + foreach (var fileInfo in directoryInfo.GetFiles()) |
| 167 | + { |
| 168 | + IndexDocs(writer, fileInfo); |
| 169 | + } |
| 170 | + } |
| 171 | + |
| 172 | + /// <summary> |
| 173 | + /// Indexes the given file using the given writer.<para/> |
| 174 | + /// </summary> |
| 175 | + /// <param name="writer"> |
| 176 | + /// <see cref="IndexWriter"/> to the index where the given |
| 177 | + /// file info will be stored. |
| 178 | + /// </param> |
| 179 | + /// <param name="file"> |
| 180 | + /// The file to index. |
| 181 | + /// </param> |
| 182 | + /// <exception cref="IOException"> |
| 183 | + /// If there is a low-level I/O error. |
| 184 | + /// </exception> |
| 185 | + internal static void IndexDocs(IndexWriter writer, FileInfo file) |
| 186 | + { |
| 187 | + using (FileStream fs = new FileStream(file.FullName, FileMode.Open, FileAccess.Read)) |
| 188 | + { |
| 189 | + // make a new, empty document |
| 190 | + Document doc = new Document(); |
| 191 | + |
| 192 | + // Add the path of the file as a field named "path". Use a |
| 193 | + // field that is indexed (i.e. searchable), but don't tokenize |
| 194 | + // the field into separate words and don't index term frequency |
| 195 | + // or positional information: |
| 196 | + Field pathField = new StringField("path", file.FullName, Field.Store.YES); |
| 197 | + doc.Add(pathField); |
| 198 | + |
| 199 | + // Add the last modified date of the file a field named "modified". |
| 200 | + // Use a LongField that is indexed (i.e. efficiently filterable with |
| 201 | + // NumericRangeFilter). This indexes to milli-second resolution, which |
| 202 | + // is often too fine. You could instead create a number based on |
| 203 | + // year/month/day/hour/minutes/seconds, down the resolution you require. |
| 204 | + // For example the long value 2011021714 would mean |
| 205 | + // February 17, 2011, 2-3 PM. |
| 206 | + doc.Add(new Int64Field("modified", file.LastWriteTimeUtc.Ticks, Field.Store.NO)); |
| 207 | + |
| 208 | + // Add the contents of the file to a field named "contents". Specify a Reader, |
| 209 | + // so that the text of the file is tokenized and indexed, but not stored. |
| 210 | + // Note that FileReader expects the file to be in UTF-8 encoding. |
| 211 | + // If that's not the case searching for special characters will fail. |
| 212 | + doc.Add(new TextField("contents", new StreamReader(fs, Encoding.UTF8))); |
| 213 | + |
| 214 | + if (writer.Config.OpenMode == OpenMode.CREATE) |
| 215 | + { |
| 216 | + // New index, so we just add the document (no old document can be there): |
| 217 | + //Console.WriteLine("adding " + file); |
| 218 | + writer.AddDocument(doc); |
| 219 | + } |
| 220 | + else |
| 221 | + { |
| 222 | + // Existing index (an old copy of this document may have been indexed) so |
| 223 | + // we use updateDocument instead to replace the old one matching the exact |
| 224 | + // path, if present: |
| 225 | + //Console.WriteLine("updating " + file); |
| 226 | + writer.UpdateDocument(new Term("path", file.FullName), doc); |
| 227 | + } |
| 228 | + } |
| 229 | + } |
| 230 | + } |
| 231 | +} |
0 commit comments