From a6e0932885e03b6c1c8007d76b619f86f1f09c7e Mon Sep 17 00:00:00 2001 From: Benjamin Wagner Date: Mon, 6 Nov 2023 15:51:22 +0100 Subject: [PATCH] Add Big Join Query Based on Peter's advice to see if my microbenchmarks generalize to a very simple query - turns out they do! We add `q_bigjoin`: `SELECT count(*) FROM lineitem, orders WHERE l_orderkey = o_orderkey`. For this query at SF 10 we can actually nicely see that Interpretation & ROF start winning against JIT compilation because they can issue more independent hash table loads. --- src/common/TPCH.cpp | 53 ++++++++++++++++++++++++++++++++++++++ src/common/TPCH.h | 1 + test/tpch/test_queries.cpp | 4 ++- tools/inkfuse_bench.cpp | 1 + tools/inkfuse_runner.cpp | 3 +++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/common/TPCH.cpp b/src/common/TPCH.cpp index 07da0f8..c645bf2 100644 --- a/src/common/TPCH.cpp +++ b/src/common/TPCH.cpp @@ -1784,6 +1784,59 @@ std::unique_ptr q19(const Schema& schema) { } +std::unique_ptr q_bigjoin(const inkfuse::Schema& schema) { + // Join orders onto lineitem. + auto& rel_o = schema.at("orders"); + std::vector cols_o{ + "o_orderkey"}; + auto scan_o = TableScan::build(*rel_o, cols_o, "scan_o"); + auto& scan_ref_o = *scan_o; + + auto& rel_l = schema.at("lineitem"); + std::vector cols_l{ + "l_orderkey"}; + auto scan_l = TableScan::build(*rel_l, cols_l, "scan_l"); + auto& scan_ref_l = *scan_l; + + std::vector o_l_join_children; + o_l_join_children.push_back(std::move(scan_o)); + o_l_join_children.push_back(std::move(scan_l)); + auto o_l_join = Join::build( + std::move(o_l_join_children), + "o_l_join", + // Keys left (o_orderkey) + {scan_ref_o.getOutput()[0]}, + // Payload left () + {}, + // Keys right (l_orderkey) + {scan_ref_l.getOutput()[0]}, + {}, + JoinType::Inner, + true); + auto& o_l_join_ref = *o_l_join; + + // 2. Aggregate count(*). + std::vector agg_children; + agg_children.push_back(std::move(o_l_join)); + // Don't group by anything on this query. + std::vector group_by{}; + std::vector aggregates{ + {*o_l_join_ref.getOutput()[0], AggregateFunctions::Opcode::Count}}; + auto agg = Aggregation::build( + std::move(agg_children), + "agg", + std::move(group_by), + std::move(aggregates)); + + // 6. Print + std::vector out_ius{agg->getOutput()[0]}; + std::vector colnames = {"num_rows"}; + std::vector print_children; + print_children.push_back(std::move(agg)); + return Print::build(std::move(print_children), + std::move(out_ius), std::move(colnames)); +} + std::unique_ptr l_count(const inkfuse::Schema& schema) { // 1. Scan from lineitem. auto& rel = schema.at("lineitem"); diff --git a/src/common/TPCH.h b/src/common/TPCH.h index 0df550a..dd7da71 100644 --- a/src/common/TPCH.h +++ b/src/common/TPCH.h @@ -34,6 +34,7 @@ std::unique_ptr q18(const Schema& schema); std::unique_ptr q19(const Schema& schema); /// Some interesting custom queries. See /tpch for query text. +std::unique_ptr q_bigjoin(const Schema& schema); std::unique_ptr l_count(const Schema& schema); std::unique_ptr l_point(const Schema& schema); diff --git a/test/tpch/test_queries.cpp b/test/tpch/test_queries.cpp index 3abd1e2..1a3f112 100644 --- a/test/tpch/test_queries.cpp +++ b/test/tpch/test_queries.cpp @@ -38,6 +38,7 @@ const std::unordered_map generator_map{ {"q14", tpch::q14}, {"q18", tpch::q18}, {"q19", tpch::q19}, + {"q_bigjoin", tpch::q_bigjoin}, {"l_count", tpch::l_count}, {"l_point", tpch::l_point}, }; @@ -53,6 +54,7 @@ std::unordered_map expected_rows{ {"q14", 1}, {"q18", 0}, {"q19", 0}, + {"q_bigjoin", 1}, {"l_count", 1}, {"l_point", 6}, }; @@ -76,7 +78,7 @@ INSTANTIATE_TEST_CASE_P( tpch_queries, TPCHQueriesTestT, ::testing::Combine( - ::testing::Values("q1", "q3", "q4", "q5", "q6", "q14", "q18", "q19", "l_count", "l_point"), + ::testing::Values("q1", "q3", "q4", "q5", "q6", "q14", "q18", "q19", "l_count", "q_bigjoin", "l_point"), ::testing::Values( PipelineExecutor::ExecutionMode::Fused, PipelineExecutor::ExecutionMode::Interpreted, diff --git a/tools/inkfuse_bench.cpp b/tools/inkfuse_bench.cpp index e4c58d4..06d625d 100644 --- a/tools/inkfuse_bench.cpp +++ b/tools/inkfuse_bench.cpp @@ -43,6 +43,7 @@ const std::vector> queries = { {"q14", tpch::q14}, {"q18", tpch::q18}, {"q19", tpch::q19}, + {"q_bigjoin", tpch::q_bigjoin}, {"l_count", tpch::l_count}, {"l_point", tpch::l_point}, }; diff --git a/tools/inkfuse_runner.cpp b/tools/inkfuse_runner.cpp index f89af5a..cbdbd86 100644 --- a/tools/inkfuse_runner.cpp +++ b/tools/inkfuse_runner.cpp @@ -198,6 +198,9 @@ int main(int argc, char* argv[]) { } else if (split[1] == "q19") { auto q = tpch::q19(*loaded); runQuery("q19", std::move(q), mode, thread_count); + } else if (split[1] == "q_bigjoin") { + auto q = tpch::q_bigjoin(*loaded); + runQuery("q_bigjoin", std::move(q), mode, thread_count); } else if (split[1] == "l_count") { auto q = tpch::l_count(*loaded); runQuery("l_count", std::move(q), mode, thread_count);