Skip to content

Commit

Permalink
refactor(storage): improve inverted index read fst file first to redu…
Browse files Browse the repository at this point in the history
…ce load index
  • Loading branch information
b41sh committed Sep 16, 2024
1 parent 1a904c0 commit 3dceccc
Show file tree
Hide file tree
Showing 16 changed files with 1,257 additions and 307 deletions.
33 changes: 13 additions & 20 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ serde_with = { version = "3.8.1" }
serfig = "0.1.0"
sled = { version = "0.34", default-features = false }
stream-more = "0.1.3"
tantivy = "0.22.0"
tantivy = { git = "https://github.com/b41sh/tantivy", rev = "37aeac0" }
thiserror = { version = "1" }
tikv-jemalloc-ctl = { version = "0.5.0", features = ["use_std"] }
tokio = { version = "1.35.0", features = ["full"] }
Expand Down
35 changes: 25 additions & 10 deletions src/query/ee/tests/it/inverted_index/index_refresh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use std::collections::BTreeMap;
use std::collections::HashSet;

use databend_common_base::base::tokio;
use databend_common_catalog::plan::InvertedIndexInfo;
Expand All @@ -38,6 +39,7 @@ use databend_query::interpreters::RefreshTableIndexInterpreter;
use databend_query::test_kits::append_string_sample_data;
use databend_query::test_kits::*;
use databend_storages_common_cache::LoadParams;
use tantivy::schema::IndexRecordOption;

#[tokio::test(flavor = "multi_thread")]
async fn test_fuse_do_refresh_inverted_index() -> Result<()> {
Expand Down Expand Up @@ -144,9 +146,12 @@ async fn test_fuse_do_refresh_inverted_index() -> Result<()> {
let field_nums = query_fields.len();
let has_score = true;
let need_position = false;
let mut field_ids = HashSet::new();
field_ids.insert(0);
field_ids.insert(1);
let index_record = IndexRecordOption::WithFreqsAndPositions;

let index_reader =
InvertedIndexReader::try_create(dal.clone(), field_nums, need_position, &index_loc).await?;
let index_reader = InvertedIndexReader::create(dal.clone());

let queries = vec![
("rust".to_string(), vec![0, 1]),
Expand All @@ -166,14 +171,24 @@ async fn test_fuse_do_refresh_inverted_index() -> Result<()> {
inverted_index_option: None,
};

let (query, tokenizer_manager) = create_inverted_index_query(&inverted_index_info)?;

let matched_rows = index_reader.clone().do_filter(
has_score,
&query,
tokenizer_manager,
block_meta.row_count,
)?;
let (query, fuzziness, tokenizer_manager) =
create_inverted_index_query(&inverted_index_info)?;

let matched_rows = index_reader
.clone()
.do_filter(
field_nums,
need_position,
has_score,
query.box_clone(),
&field_ids,
&index_record,
&fuzziness,
tokenizer_manager,
block_meta.row_count as u32,
&index_loc,
)
.await?;
assert!(matched_rows.is_some());
let matched_rows = matched_rows.unwrap();
assert_eq!(matched_rows.len(), ids.len());
Expand Down
4 changes: 3 additions & 1 deletion src/query/storages/common/index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ databend-common-functions = { workspace = true }
databend-storages-common-table-meta = { workspace = true }
fastrace = { workspace = true }
jsonb = { workspace = true }
levenshtein_automata = "0.2.1"
log = { workspace = true }
match-template = { workspace = true }
parquet = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tantivy = { workspace = true }
tantivy-common = "0.7.0"
tantivy-common = { git = "https://github.com/b41sh/tantivy", rev = "37aeac0", package = "tantivy-common" }
tantivy-fst = "0.5"
thiserror = { workspace = true }
xorfilter-rs = { workspace = true, features = ["cbordata"] }

Expand Down
Loading

0 comments on commit 3dceccc

Please sign in to comment.