@@ -45,6 +45,23 @@ fn error_in_index_worker_thread(context: &str) -> TantivyError {
45
45
) )
46
46
}
47
47
48
+ #[ derive( Clone , bon:: Builder ) ]
49
+ /// A builder for creating a new [IndexWriter] for an index.
50
+ pub struct IndexWriterOptions {
51
+ #[ builder( default = MEMORY_BUDGET_NUM_BYTES_MIN ) ]
52
+ /// The memory budget per indexer thread.
53
+ ///
54
+ /// When an indexer thread has buffered this much data in memory
55
+ /// it will flush the segment to disk (although this is not searchable until commit is called.)
56
+ memory_budget_per_thread : usize ,
57
+ #[ builder( default = 1 ) ]
58
+ /// The number of indexer worker threads to use.
59
+ num_worker_threads : usize ,
60
+ #[ builder( default = 4 ) ]
61
+ /// Defines the number of merger threads to use.
62
+ num_merge_threads : usize ,
63
+ }
64
+
48
65
/// `IndexWriter` is the user entry-point to add document to an index.
49
66
///
50
67
/// It manages a small number of indexing thread, as well as a shared
@@ -58,8 +75,7 @@ pub struct IndexWriter<D: Document = TantivyDocument> {
58
75
59
76
index : Index ,
60
77
61
- // The memory budget per thread, after which a commit is triggered.
62
- memory_budget_in_bytes_per_thread : usize ,
78
+ options : IndexWriterOptions ,
63
79
64
80
workers_join_handle : Vec < JoinHandle < crate :: Result < ( ) > > > ,
65
81
@@ -70,9 +86,6 @@ pub struct IndexWriter<D: Document = TantivyDocument> {
70
86
71
87
worker_id : usize ,
72
88
73
- num_threads : usize ,
74
- num_merge_threads : usize ,
75
-
76
89
delete_queue : DeleteQueue ,
77
90
78
91
stamper : Stamper ,
@@ -266,24 +279,27 @@ impl<D: Document> IndexWriter<D> {
266
279
/// `TantivyError::InvalidArgument`
267
280
pub ( crate ) fn new (
268
281
index : & Index ,
269
- num_threads : usize ,
270
- memory_budget_in_bytes_per_thread : usize ,
282
+ options : IndexWriterOptions ,
271
283
directory_lock : DirectoryLock ,
272
- num_merge_threads : usize ,
273
284
) -> crate :: Result < Self > {
274
- if memory_budget_in_bytes_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
285
+ if options . memory_budget_per_thread < MEMORY_BUDGET_NUM_BYTES_MIN {
275
286
let err_msg = format ! (
276
287
"The memory arena in bytes per thread needs to be at least \
277
288
{MEMORY_BUDGET_NUM_BYTES_MIN}."
278
289
) ;
279
290
return Err ( TantivyError :: InvalidArgument ( err_msg) ) ;
280
291
}
281
- if memory_budget_in_bytes_per_thread >= MEMORY_BUDGET_NUM_BYTES_MAX {
292
+ if options . memory_budget_per_thread >= MEMORY_BUDGET_NUM_BYTES_MAX {
282
293
let err_msg = format ! (
283
294
"The memory arena in bytes per thread cannot exceed {MEMORY_BUDGET_NUM_BYTES_MAX}"
284
295
) ;
285
296
return Err ( TantivyError :: InvalidArgument ( err_msg) ) ;
286
297
}
298
+ if options. num_worker_threads == 0 {
299
+ let err_msg = "At least one worker thread is required, got 0" . to_string ( ) ;
300
+ return Err ( TantivyError :: InvalidArgument ( err_msg) ) ;
301
+ }
302
+
287
303
let ( document_sender, document_receiver) =
288
304
crossbeam_channel:: bounded ( PIPELINE_MAX_SIZE_IN_DOCS ) ;
289
305
@@ -297,23 +313,20 @@ impl<D: Document> IndexWriter<D> {
297
313
index. clone ( ) ,
298
314
stamper. clone ( ) ,
299
315
& delete_queue. cursor ( ) ,
300
- num_merge_threads,
316
+ options . num_merge_threads ,
301
317
) ?;
302
318
303
319
let mut index_writer = Self {
304
320
_directory_lock : Some ( directory_lock) ,
305
321
306
- memory_budget_in_bytes_per_thread ,
322
+ options : options . clone ( ) ,
307
323
index : index. clone ( ) ,
308
324
index_writer_status : IndexWriterStatus :: from ( document_receiver) ,
309
325
operation_sender : document_sender,
310
326
311
327
segment_updater,
312
328
313
329
workers_join_handle : vec ! [ ] ,
314
- num_threads,
315
-
316
- num_merge_threads,
317
330
318
331
delete_queue,
319
332
@@ -406,7 +419,7 @@ impl<D: Document> IndexWriter<D> {
406
419
407
420
let mut delete_cursor = self . delete_queue . cursor ( ) ;
408
421
409
- let mem_budget = self . memory_budget_in_bytes_per_thread ;
422
+ let mem_budget = self . options . memory_budget_per_thread ;
410
423
let index = self . index . clone ( ) ;
411
424
let join_handle: JoinHandle < crate :: Result < ( ) > > = thread:: Builder :: new ( )
412
425
. name ( format ! ( "thrd-tantivy-index{}" , self . worker_id) )
@@ -459,7 +472,7 @@ impl<D: Document> IndexWriter<D> {
459
472
}
460
473
461
474
fn start_workers ( & mut self ) -> crate :: Result < ( ) > {
462
- for _ in 0 ..self . num_threads {
475
+ for _ in 0 ..self . options . num_worker_threads {
463
476
self . add_indexing_worker ( ) ?;
464
477
}
465
478
Ok ( ( ) )
@@ -561,13 +574,7 @@ impl<D: Document> IndexWriter<D> {
561
574
. take ( )
562
575
. expect ( "The IndexWriter does not have any lock. This is a bug, please report." ) ;
563
576
564
- let new_index_writer = IndexWriter :: new (
565
- & self . index ,
566
- self . num_threads ,
567
- self . memory_budget_in_bytes_per_thread ,
568
- directory_lock,
569
- self . num_merge_threads ,
570
- ) ?;
577
+ let new_index_writer = IndexWriter :: new ( & self . index , self . options . clone ( ) , directory_lock) ?;
571
578
572
579
// the current `self` is dropped right away because of this call.
573
580
//
@@ -821,7 +828,7 @@ mod tests {
821
828
use crate :: directory:: error:: LockError ;
822
829
use crate :: error:: * ;
823
830
use crate :: indexer:: index_writer:: MEMORY_BUDGET_NUM_BYTES_MIN ;
824
- use crate :: indexer:: NoMergePolicy ;
831
+ use crate :: indexer:: { IndexWriterOptions , NoMergePolicy } ;
825
832
use crate :: query:: { QueryParser , TermQuery } ;
826
833
use crate :: schema:: {
827
834
self , Facet , FacetOptions , IndexRecordOption , IpAddrOptions , JsonObjectOptions ,
@@ -2542,4 +2549,36 @@ mod tests {
2542
2549
index_writer. commit ( ) . unwrap ( ) ;
2543
2550
Ok ( ( ) )
2544
2551
}
2552
+
2553
+ #[ test]
2554
+ fn test_writer_options_validation ( ) {
2555
+ let mut schema_builder = Schema :: builder ( ) ;
2556
+ let field = schema_builder. add_bool_field ( "example" , STORED ) ;
2557
+ let index = Index :: create_in_ram ( schema_builder. build ( ) ) ;
2558
+
2559
+ let opt_wo_threads = IndexWriterOptions :: builder ( ) . num_worker_threads ( 0 ) . build ( ) ;
2560
+ let result = index. writer_with_options :: < TantivyDocument > ( opt_wo_threads) ;
2561
+ assert ! ( result. is_err( ) , "Writer should reject 0 thread count" ) ;
2562
+ assert ! ( matches!( result, Err ( TantivyError :: InvalidArgument ( _) ) ) ) ;
2563
+
2564
+ let opt_with_low_memory = IndexWriterOptions :: builder ( )
2565
+ . memory_budget_per_thread ( 10 << 10 )
2566
+ . build ( ) ;
2567
+ let result = index. writer_with_options :: < TantivyDocument > ( opt_with_low_memory) ;
2568
+ assert ! (
2569
+ result. is_err( ) ,
2570
+ "Writer should reject options with too low memory size"
2571
+ ) ;
2572
+ assert ! ( matches!( result, Err ( TantivyError :: InvalidArgument ( _) ) ) ) ;
2573
+
2574
+ let opt_with_low_memory = IndexWriterOptions :: builder ( )
2575
+ . memory_budget_per_thread ( 5 << 30 )
2576
+ . build ( ) ;
2577
+ let result = index. writer_with_options :: < TantivyDocument > ( opt_with_low_memory) ;
2578
+ assert ! (
2579
+ result. is_err( ) ,
2580
+ "Writer should reject options with too high memory size"
2581
+ ) ;
2582
+ assert ! ( matches!( result, Err ( TantivyError :: InvalidArgument ( _) ) ) ) ;
2583
+ }
2545
2584
}
0 commit comments