diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock
index f5b1bada6fe..72145496b03 100644
--- a/quickwit/Cargo.lock
+++ b/quickwit/Cargo.lock
@@ -93,6 +93,15 @@ version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "250f629c0161ad8107cf89319e990051fae62832fd343083bea452d93e2205fd"
+[[package]]
+name = "alloca"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7d05ea6aea7e9e64d25b9156ba2fee3fdd659e34e41063cd2fc7cd020d7f4"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "allocator-api2"
version = "0.2.18"
@@ -1016,6 +1025,23 @@ dependencies = [
"serde",
]
+[[package]]
+name = "binggan"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "012a2b0aae162c298472025aa0662323c157e63897e32146a6a1bdd286108d6f"
+dependencies = [
+ "alloca",
+ "bpu_trasher",
+ "miniserde",
+ "peakmem-alloc",
+ "perf-event",
+ "rustc-hash 2.0.0",
+ "rustop",
+ "unicode-width",
+ "yansi",
+]
+
[[package]]
name = "bit-set"
version = "0.5.3"
@@ -1109,6 +1135,15 @@ dependencies = [
"syn_derive",
]
+[[package]]
+name = "bpu_trasher"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b12b1a01c908052583934ce0bdcd738a394a11df7c41d8fd042a588a4231725a"
+dependencies = [
+ "rand 0.8.5",
+]
+
[[package]]
name = "bs58"
version = "0.5.1"
@@ -4080,12 +4115,34 @@ dependencies = [
"unicase",
]
+[[package]]
+name = "mini-internal"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f3cd9f9bbedc1b92683a9847b8db12f3203cf32af6a11db085fa007708dc9555"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.79",
+]
+
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+[[package]]
+name = "miniserde"
+version = "0.1.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e9b650e926368ad21aaabe6055341d1874df696178f47d70b6d9a691f616274e"
+dependencies = [
+ "itoa",
+ "mini-internal",
+ "ryu",
+]
+
[[package]]
name = "miniz_oxide"
version = "0.8.0"
@@ -4904,6 +4961,12 @@ dependencies = [
"hmac",
]
+[[package]]
+name = "peakmem-alloc"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5eb7428a977a472465aced57d8d2335d6167c0ce9c05c283fd6faed3d8d948f6"
+
[[package]]
name = "peeking_take_while"
version = "1.0.0"
@@ -4944,6 +5007,25 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
+[[package]]
+name = "perf-event"
+version = "0.4.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4d6393d9238342159080d79b78cb59c67399a8e7ecfa5d410bd614169e4e823"
+dependencies = [
+ "libc",
+ "perf-event-open-sys",
+]
+
+[[package]]
+name = "perf-event-open-sys"
+version = "4.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c44fb1c7651a45a3652c4afc6e754e40b3d6e6556f1487e2b230bfc4f33c2a8"
+dependencies = [
+ "libc",
+]
+
[[package]]
name = "pest"
version = "2.7.13"
@@ -5984,7 +6066,7 @@ version = "0.8.0"
dependencies = [
"anyhow",
"base64 0.22.1",
- "criterion",
+ "binggan",
"fnv",
"hex",
"indexmap 2.1.0",
@@ -7117,6 +7199,12 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+[[package]]
+name = "rustc-hash"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+
[[package]]
name = "rustc_version"
version = "0.4.1"
@@ -7192,6 +7280,12 @@ dependencies = [
"untrusted 0.9.0",
]
+[[package]]
+name = "rustop"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b5a6a926633a8ce739286680df905e1d1d01db609fc0e09d28e9b901ac7b22f"
+
[[package]]
name = "rustversion"
version = "1.0.17"
@@ -8266,7 +8360,7 @@ dependencies = [
"rayon",
"regex",
"rust-stemmers",
- "rustc-hash",
+ "rustc-hash 1.1.0",
"serde",
"serde_json",
"sketches-ddsketch",
@@ -9852,6 +9946,9 @@ name = "yansi"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
+dependencies = [
+ "is-terminal",
+]
[[package]]
name = "zerocopy"
diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml
index ba0963bf938..67562edbf77 100644
--- a/quickwit/Cargo.toml
+++ b/quickwit/Cargo.toml
@@ -86,6 +86,7 @@ async-compression = { version = "0.4", features = ["tokio", "gzip"] }
async-speed-limit = "0.4"
async-trait = "0.1"
base64 = "0.22"
+binggan = { version = "0.13" }
bytes = { version = "1", features = ["serde"] }
bytesize = { version = "1.3.0", features = ["serde"] }
bytestring = "1.3.0"
diff --git a/quickwit/quickwit-doc-mapper/Cargo.toml b/quickwit/quickwit-doc-mapper/Cargo.toml
index be75ec7c02e..44b846157bd 100644
--- a/quickwit/quickwit-doc-mapper/Cargo.toml
+++ b/quickwit/quickwit-doc-mapper/Cargo.toml
@@ -36,7 +36,7 @@ quickwit-proto = { workspace = true }
quickwit-query = { workspace = true }
[dev-dependencies]
-criterion = { workspace = true }
+binggan = { workspace = true }
matches = { workspace = true }
serde_yaml = { workspace = true }
time = { workspace = true }
diff --git a/quickwit/quickwit-doc-mapper/benches/doc_to_json_bench.rs b/quickwit/quickwit-doc-mapper/benches/doc_to_json_bench.rs
index 58bcfa413dc..cd138ef26df 100644
--- a/quickwit/quickwit-doc-mapper/benches/doc_to_json_bench.rs
+++ b/quickwit/quickwit-doc-mapper/benches/doc_to_json_bench.rs
@@ -17,13 +17,15 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
-use criterion::{criterion_group, criterion_main, Criterion, Throughput};
+use binggan::plugins::*;
+use binggan::{black_box, BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
use quickwit_doc_mapper::DocMapper;
use tantivy::TantivyDocument;
-const JSON_TEST_DATA: &str = include_str!("data/simple-parse-bench.json");
+const SIMPLE_JSON_TEST_DATA: &str = include_str!("data/simple-parse-bench.json");
+const ROUTING_TEST_DATA: &str = include_str!("data/simple-routing-expression-bench.json");
-const DOC_MAPPER_CONF: &str = r#"{
+const DOC_MAPPER_CONF_SIMPLE_JSON: &str = r#"{
"type": "default",
"default_search_fields": [],
"tag_fields": [],
@@ -35,28 +37,95 @@ const DOC_MAPPER_CONF: &str = r#"{
]
}"#;
-pub fn simple_json_to_doc_benchmark(c: &mut Criterion) {
- let doc_mapper: Box = serde_json::from_str(DOC_MAPPER_CONF).unwrap();
- let lines: Vec<&str> = JSON_TEST_DATA.lines().map(|line| line.trim()).collect();
+/// Note that {"name": "date", "type": "datetime", "input_formats": ["%Y-%m-%d"], "output_format":
+/// "%Y-%m-%d"}, is removed since tantivy parsing only supports RFC3339
+const ROUTING_DOC_MAPPER_CONF: &str = r#"{
+ "type": "default",
+ "default_search_fields": [],
+ "tag_fields": [],
+ "field_mappings": [
+ {"name": "timestamp", "type": "datetime", "input_formats": ["unix_timestamp"], "output_format": "%Y-%m-%d %H:%M:%S", "output_format": "%Y-%m-%d %H:%M:%S", "fast": true },
+ {"name": "source", "type": "text" },
+ {"name": "vin", "type": "text" },
+ {"name": "vid", "type": "text" },
+ {"name": "domain", "type": "text" },
+ {"name": "seller", "type": "object", "field_mappings": [
+ {"name": "id", "type": "text" },
+ {"name": "name", "type": "text" },
+ {"name": "address", "type": "text" },
+ {"name": "zip", "type": "text" }
+ ]}
+ ],
+ "partition_key": "seller.id"
+}"#;
+
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc = &INSTRUMENTED_SYSTEM;
+
+fn get_test_data(
+ name: &'static str,
+ raw: &'static str,
+ doc_mapper: &'static str,
+) -> (&'static str, usize, Vec<&'static str>, Box) {
+ let lines: Vec<&str> = raw.lines().map(|line| line.trim()).collect();
+ (
+ name,
+ raw.len(),
+ lines,
+ serde_json::from_str(doc_mapper).unwrap(),
+ )
+}
- let mut group = c.benchmark_group("simple-json-to-doc");
- group.throughput(Throughput::Bytes(JSON_TEST_DATA.len() as u64));
- group.bench_function("simple-json-to-doc", |b| {
- b.iter(|| {
- for line in &lines {
- doc_mapper.doc_from_json_str(line).unwrap();
+fn run_bench() {
+ let inputs: Vec<(&str, usize, Vec<&str>, Box)> = vec![
+ (get_test_data(
+ "flat_json",
+ SIMPLE_JSON_TEST_DATA,
+ DOC_MAPPER_CONF_SIMPLE_JSON,
+ )),
+ (get_test_data("routing_json", ROUTING_TEST_DATA, ROUTING_DOC_MAPPER_CONF)),
+ ];
+
+ let mut runner: BenchRunner = BenchRunner::new();
+
+ runner.config().set_num_iter_for_bench(1);
+ runner.config().set_num_iter_for_group(100);
+ runner
+ .add_plugin(CacheTrasher::default())
+ .add_plugin(BPUTrasher::default())
+ .add_plugin(PeakMemAllocPlugin::new(GLOBAL));
+
+ for (input_name, size, data, doc_mapper) in inputs.iter() {
+ let dynamic_doc_mapper: DocMapper =
+ serde_json::from_str(r#"{ "mode": "dynamic" }"#).unwrap();
+ let mut group = runner.new_group();
+ group.set_name(input_name);
+ group.set_input_size(*size);
+ group.register_with_input("doc_mapper", data, |lines| {
+ for line in lines {
+ black_box(doc_mapper.doc_from_json_str(line).unwrap());
}
- })
- });
- group.bench_function("simple-json-to-doc-tantivy", |b| {
- b.iter(|| {
+ Some(())
+ });
+
+ group.register_with_input("doc_mapper_dynamic", data, |lines| {
+ for line in lines {
+ black_box(dynamic_doc_mapper.doc_from_json_str(line).unwrap());
+ }
+ Some(())
+ });
+
+ group.register_with_input("tantivy parse json", data, |lines| {
let schema = doc_mapper.schema();
- for line in &lines {
- let _doc = TantivyDocument::parse_json(&schema, line).unwrap();
+ for line in lines {
+ let _doc = black_box(TantivyDocument::parse_json(&schema, line).unwrap());
}
- })
- });
+ Some(())
+ });
+ group.run();
+ }
}
-criterion_group!(benches, simple_json_to_doc_benchmark);
-criterion_main!(benches);
+fn main() {
+ run_bench();
+}
diff --git a/quickwit/quickwit-doc-mapper/benches/routing_expression_bench.rs b/quickwit/quickwit-doc-mapper/benches/routing_expression_bench.rs
index 8f4daa46d0f..6bac1f6e6e0 100644
--- a/quickwit/quickwit-doc-mapper/benches/routing_expression_bench.rs
+++ b/quickwit/quickwit-doc-mapper/benches/routing_expression_bench.rs
@@ -1,4 +1,4 @@
-// Copyright (C) 2023 Quickwit, Inc.
+// Copyright (C) 2024 Quickwit, Inc.
//
// Quickwit is offered under the AGPL v3.0 and as commercial software.
// For commercial licensing, contact us at hello@quickwit.io.
@@ -17,60 +17,45 @@
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see .
-use criterion::{criterion_group, criterion_main, Criterion, Throughput};
-use quickwit_doc_mapper::{DocMapper, RoutingExpr};
+use binggan::plugins::*;
+use binggan::{BenchRunner, PeakMemAlloc, INSTRUMENTED_SYSTEM};
+use quickwit_doc_mapper::RoutingExpr;
use serde_json::Value as JsonValue;
-const JSON_TEST_DATA: &str = include_str!("data/simple-routing-expression-bench.json");
-
-const DOC_MAPPER_CONF: &str = r#"{
- "type": "default",
- "default_search_fields": [],
- "tag_fields": [],
- "field_mappings": [
- {"name": "timestamp", "type": "datetime", "input_formats": ["unix_timestamp"], "output_format": "%Y-%m-%d %H:%M:%S", "output_format": "%Y-%m-%d %H:%M:%S", "fast": true },
- {"name": "source", "type": "text" },
- {"name": "vin", "type": "text" },
- {"name": "vid", "type": "text" },
- {"name": "date", "type": "datetime", "input_formats": ["%Y-%m-%d"], "output_format": "%Y-%m-%d"},
- {"name": "domain", "type": "text" },
- {"name": "seller", "type": "object", "field_mappings": [
- {"name": "id", "type": "text" },
- {"name": "name", "type": "text" },
- {"name": "address", "type": "text" },
- {"name": "zip", "type": "text" }
- ]}
- ],
- "partition_key": "seller.id"
-}"#;
+#[global_allocator]
+pub static GLOBAL: &PeakMemAlloc = &INSTRUMENTED_SYSTEM;
-pub fn simple_routing_expression_benchmark(c: &mut Criterion) {
- let doc_mapper: Box = serde_json::from_str(DOC_MAPPER_CONF).unwrap();
- let lines: Vec<&str> = JSON_TEST_DATA.lines().map(|line| line.trim()).collect();
+const JSON_TEST_DATA: &str = include_str!("data/simple-routing-expression-bench.json");
- let json_lines: Vec> = lines
- .iter()
+fn run_bench() {
+ let json_lines: Vec> = JSON_TEST_DATA
+ .lines()
.map(|line| serde_json::from_str(line).unwrap())
.collect();
- let mut group = c.benchmark_group("simple-routing-expression");
- group.throughput(Throughput::Bytes(JSON_TEST_DATA.len() as u64));
- group.bench_function("simple-json-to-doc", |b| {
- b.iter(|| {
- for line in &lines {
- doc_mapper.doc_from_json_str(line).unwrap();
- }
- })
- });
- group.bench_function("simple-eval-hash", |b| {
- b.iter(|| {
+ let mut runner: BenchRunner = BenchRunner::new();
+
+ runner
+ .add_plugin(CacheTrasher::default())
+ .add_plugin(PeakMemAllocPlugin::new(GLOBAL));
+
+ {
+ let (input_name, size, data) = &("routing_expr", JSON_TEST_DATA.len(), &json_lines);
+ let mut group = runner.new_group();
+ group.set_name(input_name);
+ group.set_input_size(*size);
+ group.register_with_input("simple-eval-hash", data, |lines| {
let routing_expr = RoutingExpr::new("seller.id").unwrap();
- for json in &json_lines {
+ for json in lines.iter() {
routing_expr.eval_hash(json);
}
- })
- });
+ Some(())
+ });
+
+ group.run();
+ }
}
-criterion_group!(benches, simple_routing_expression_benchmark);
-criterion_main!(benches);
+fn main() {
+ run_bench();
+}