diff --git a/src/engine/CMakeLists.txt b/src/engine/CMakeLists.txt index 41a9a33a68..06e21c1c1f 100644 --- a/src/engine/CMakeLists.txt +++ b/src/engine/CMakeLists.txt @@ -1,6 +1,8 @@ add_subdirectory(sparqlExpressions) +add_subdirectory(joinOrdering) add_library(SortPerformanceEstimator SortPerformanceEstimator.cpp) qlever_target_link_libraries(SortPerformanceEstimator) + add_library(engine Engine.cpp QueryExecutionTree.cpp Operation.cpp Result.cpp LocalVocab.cpp IndexScan.cpp Join.cpp Sort.cpp diff --git a/src/engine/joinOrdering/CMakeLists.txt b/src/engine/joinOrdering/CMakeLists.txt new file mode 100644 index 0000000000..584ca90101 --- /dev/null +++ b/src/engine/joinOrdering/CMakeLists.txt @@ -0,0 +1,10 @@ +add_library(joinOrdering + QueryGraph.cpp + JoinTree.cpp JoinNode.cpp + IKKBZ.cpp + LinearizedDP.cpp + RelationBasic.cpp + EdgeInfo.cpp + CostIKKBZ.cpp + CostCout.cpp) +qlever_target_link_libraries(joinOrdering) diff --git a/src/engine/joinOrdering/CostCout.cpp b/src/engine/joinOrdering/CostCout.cpp new file mode 100644 index 0000000000..b425bf2beb --- /dev/null +++ b/src/engine/joinOrdering/CostCout.cpp @@ -0,0 +1,140 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "CostCout.h" + +namespace JoinOrdering::Cost { + +template +requires RelationAble float selectivity( + std::shared_ptr> x, std::shared_ptr> y, + const std::map>& selectivities) { + if (!x) return 1.0; + if (!y) return 1.0; + + if (x->isLeaf() && y->isLeaf()) + // return selectivities.at(x.get()->relation.getLabel()) + // .at(y.get()->relation.getLabel()); + { + auto z = selectivities.at(x.get()->relation.getLabel()); + auto kk = y.get()->relation.getLabel(); + if (!z.contains(kk)) return 1.0; + auto zz = z.at(kk); // TODO: get or default + return zz; + } + + if (x->isLeaf() && !y->isLeaf()) + return selectivity(x, y->left, selectivities) * + selectivity(x, y->right, selectivities); + + if (!x->isLeaf() && y->isLeaf()) + return selectivity(y, x->left, selectivities) * + selectivity(y, x->right, selectivities); + + return selectivity(x->left, y->left, selectivities) * + selectivity(x->left, y->right, selectivities) * + selectivity(x->right, y->left, selectivities) * + selectivity(x->right, y->right, selectivities); +} + +// assumes independence of the predicates +// ref: 77/637 +template +requires RelationAble unsigned long long cardinality( + std::shared_ptr> n, + const std::map& cardinalities, + const std::map>& selectivities) { + if (n == nullptr) return 1; + + // TODO: log missing relation cardinality + if (n->isLeaf()) return cardinalities.at(n->relation.getLabel()); + + auto l = n->left; + auto r = n->right; + + if (l && r) + return cardinality(l, cardinalities, selectivities) * + cardinality(r, cardinalities, selectivities) * + selectivity(l, r, selectivities); + + if (l) return cardinality(n->left, cardinalities, selectivities); + // if (r) return cardinality(n->right, cardinalities, selectivities); + return cardinality(n->right, cardinalities, selectivities); + + // AD_CONTRACT_CHECK("How Did We Get Here?"); + // return 0; +} + +template +requires RelationAble +double Cout(const JoinTree& t, const QueryGraph& q) { + // q.selectivity + std::map> qselecm; + std::map qcards; + + // FIXME: garbage! + // std::map> + for (auto const& [k, xm] : q.edges_) { + auto l = k; + qcards[l.getLabel()] = l.getCardinality(); + for (auto const& [x, xe] : xm) { + auto r = x; + if (!xe.hidden) { + auto s = xe.weight; + qselecm[l.getLabel()][r.getLabel()] = s; + qselecm[r.getLabel()][l.getLabel()] = s; + } + } + } + + return Cout(t.root, qcards, qselecm); +} + +template +requires RelationAble double Cout( + const JoinTree& t, + const std::map& cardinalities, + const std::map>& selectivities) { + return Cout(t.root, cardinalities, selectivities); +} + +// ref: 79/637 +template +requires RelationAble double Cout( + std::shared_ptr> n, + const std::map& cardinalities, + const std::map>& selectivities) { + if (n == nullptr) return 0; // empty join tree, DP table + if (n->isLeaf()) return 0; + + auto l = n->left; + auto r = n->right; + + if (l && r) + return cardinality(n, cardinalities, selectivities) + + Cout(l, cardinalities, selectivities) + + Cout(r, cardinalities, selectivities); + + if (l) return Cout(l, cardinalities, selectivities); + // if (r) return Cout(r, cardinalities, selectivities); + return Cout(r, cardinalities, selectivities); + // AD_CONTRACT_CHECK("How Did We Get Here?"); + // return 0; +} + +template double Cout(const JoinTree& t, + const QueryGraph& q); + +template double Cout( + const JoinTree& t, + const std::map& cardinalities, + const std::map>& selectivities); + +template double Cout( + std::shared_ptr> n, + const std::map& cardinalities, + const std::map>& selectivities); + +} // namespace JoinOrdering::Cost diff --git a/src/engine/joinOrdering/CostCout.h b/src/engine/joinOrdering/CostCout.h new file mode 100644 index 0000000000..592e404fb3 --- /dev/null +++ b/src/engine/joinOrdering/CostCout.h @@ -0,0 +1,74 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include +#include + +#include "JoinNode.h" +#include "JoinTree.h" +#include "QueryGraph.h" + +namespace JoinOrdering::Cost { + +template +requires RelationAble +double Cout(const JoinTree& t, const QueryGraph& q); + +/** + * + * Basic Cost Function that returns an estimate on how expensive to + * evaluate a given JoinTree. low cost implies cheap execution plan. + * + * ref: 79/637 + * + * // TODO: can be inferred by RelationAble::getCardinality + * // TODO: better use some sort of map of unordered pairs since selectivity is + * direction-less + * // TODO: default to 1.0 when the selectivity between 2 relations is not + * defined. + * + * @tparam N type that satisfies RelationAble concept + * @param t Linear JoinTree (left-deep, right-deep, zigzag, ...) + * @param cardinalities map of cardinality of each relation in the tree + * @param selectivities map of selectivity for each pair of relation in the tree + * @return Cost Evaluation for given JoinTree + */ +template +requires RelationAble double Cout( + const JoinTree& t, + const std::map& cardinalities, + const std::map>& selectivities); + +/** + * + * + * @tparam N type that satisfies RelationAble concept + * @param r JoinNode that can be inner (join operators) or leaf node (relations) + * @param cardinalities map of cardinality of each relation in the tree + * @param selectivities map of selectivity for each pair of relation in the tree + * @return Cost Evaluation for given JoinNode + */ +template +requires RelationAble double Cout( + std::shared_ptr> r, + const std::map& cardinalities, + const std::map>& selectivities); + +// template +// requires RelationAble unsigned long long cardinality( +// std::shared_ptr> r, +// const std::map& cardinalities, +// const std::map>& +// selectivities); +// +// template +// requires RelationAble float selectivity( +// std::shared_ptr> x, std::shared_ptr> y, +// const std::map>& +// selectivities); + +} // namespace JoinOrdering::Cost diff --git a/src/engine/joinOrdering/CostIKKBZ.cpp b/src/engine/joinOrdering/CostIKKBZ.cpp new file mode 100644 index 0000000000..0d8eb73e50 --- /dev/null +++ b/src/engine/joinOrdering/CostIKKBZ.cpp @@ -0,0 +1,67 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "CostIKKBZ.h" + +#include "RelationBasic.h" + +namespace JoinOrdering { + +template +requires RelationAble +float CostIKKBZ::C(const QueryGraph& g, std::span seq) { + if (seq.empty()) return 0.0f; + auto s1 = seq.front(); + // auto s2 = seq | std::views::drop(1); + auto s2 = seq.subspan(1); + return C(g, s1) + T(g, s1) * C(g, s2); // TODO: might overflow +} + +template +requires RelationAble +float CostIKKBZ::C(const QueryGraph& g, const N& n) { + // return 0 if Ri is root 113/637 + if (g.root == n) return 0; + + // i.e: regular relation + if (!g.is_compound_relation(n)) return T(g, n); + + auto const& [s1, s2] = g.hist.at(n).value(); + return C(g, s1) + T(g, s1) * C(g, s2); // TODO: might overflow +} + +template +requires RelationAble +float CostIKKBZ::T(const QueryGraph& g, const N& n) { + // return 0 if Ri is root 113/637 + if (g.root == n) return 1; + return g.selectivity.at(n) * static_cast(n.getCardinality()); +} + +template +requires RelationAble +float CostIKKBZ::rank(const QueryGraph& g, const N& n) { + // memorize cost and rank + // avoid recomputing for long sequences + if (rank_m.contains(n)) return rank_m[n]; // important + auto c = C_m.contains(n) ? C_m[n] : C(g, n); // important + auto t = T_m.contains(n) ? T_m[n] : T(g, n); // maybe not important + + if (c == 0) return 0; + auto r = (t - 1) / c; + AD_CONTRACT_CHECK(r >= 0 && r <= 1); + + rank_m[n] = r; + C_m[n] = c; + T_m[n] = t; + return r; +} + +template float CostIKKBZ::C(const QueryGraph& g, + std::span seq); + +template float CostIKKBZ::rank( + const QueryGraph& g, const RelationBasic& n); +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/CostIKKBZ.h b/src/engine/joinOrdering/CostIKKBZ.h new file mode 100644 index 0000000000..dc19936006 --- /dev/null +++ b/src/engine/joinOrdering/CostIKKBZ.h @@ -0,0 +1,72 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include + +#include "ICostASI.h" +// #include "util/HashMap.h" +#include + +namespace JoinOrdering { + +template +requires RelationAble class CostIKKBZ : public ICostASI { + public: + // ad_utility::HashMap rank_m; + // ad_utility::HashMap C_m; + // ad_utility::HashMap T_m; + + std::map rank_m; + std::map C_m; + std::map T_m; + + float rank(const QueryGraph& g, const N& n); + + /** + * + * calculate T for an uncompound relation s_i * n_i + * (cardinality * selectivity) + * + * + * @param g precedence tree + * @param n Relation + * @return T(n) + */ + float T(const QueryGraph& g, const N& n); + /** + * + * a join is called increasing if cost > 1 + * a join is called decreasing if cost < 1 + * + * ref: 113/637 + * + * @param g precedence tree + * @param n Relation + * @return C(n) + */ + float C(const QueryGraph& g, const N& n); + + /** + * + * calculate cost for a sequence of relations + * + * + * C(eps) = 0 + * C(R) = 0 (if R is root) + * C(R) = h_i * (n_i) + * C(S_1 S_2) = C(S1) + T(S1) * C(S2) + * + * ref: 113/637 + * + * @param g precedence tree + * @param seq sequence of relations (may include compound relations) + * @return C(S_1 S_2) + */ + float C(const QueryGraph& g, std::span seq); +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/EdgeInfo.cpp b/src/engine/joinOrdering/EdgeInfo.cpp new file mode 100644 index 0000000000..471325736b --- /dev/null +++ b/src/engine/joinOrdering/EdgeInfo.cpp @@ -0,0 +1,14 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "EdgeInfo.h" + +namespace JoinOrdering { + +EdgeInfo::EdgeInfo() = default; +// EdgeInfo::EdgeInfo(Direction dir) : direction(dir) {} +EdgeInfo::EdgeInfo(Direction dir, float weight) + : direction(dir), weight(weight) {} +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/EdgeInfo.h b/src/engine/joinOrdering/EdgeInfo.h new file mode 100644 index 0000000000..071bcd4d88 --- /dev/null +++ b/src/engine/joinOrdering/EdgeInfo.h @@ -0,0 +1,30 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +namespace JoinOrdering { + +enum class Direction { + UNDIRECTED, + PARENT, + CHILD, + +}; + +class EdgeInfo { + public: + // read from left to right + // Ra is a dir of Rb + Direction direction{Direction::UNDIRECTED}; + bool hidden{false}; // instead of erasing + float weight{-1}; + + EdgeInfo(); + // explicit EdgeInfo(Direction dir); + EdgeInfo(Direction dir, float weight); +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/ICostASI.h b/src/engine/joinOrdering/ICostASI.h new file mode 100644 index 0000000000..56f7e4a478 --- /dev/null +++ b/src/engine/joinOrdering/ICostASI.h @@ -0,0 +1,41 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include "QueryGraph.h" + +namespace JoinOrdering { + +/** + * adjacent sequence interchange + * + * let A, B two sequence and V, U two non-sequences + * a cost function C is ASI if the following holds: + * + * C(AUVB) <= C(AVUB) <=> rank(U) <= rank(V) + * + * ref: 114/637 + */ + +template +requires RelationAble class ICostASI { + public: + /** + * calculate rank ("benefit") for a relation + * + * if rank(R2) < rank(R3) then joining + * (R1 x R2) x R3 is cheaper than + * (R1 x R3) x R2 + * + * + * @param g precedence tree + * @param n Relation (may be compound relation) + * @return r(n) + */ + virtual auto rank(const QueryGraph& g, const N& n) -> float = 0; +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/IKKBZ.cpp b/src/engine/joinOrdering/IKKBZ.cpp new file mode 100644 index 0000000000..4042b764cd --- /dev/null +++ b/src/engine/joinOrdering/IKKBZ.cpp @@ -0,0 +1,278 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "IKKBZ.h" + +#include "CostIKKBZ.h" +#include "RelationBasic.h" + +namespace JoinOrdering { + +template +requires RelationAble std::vector IKKBZ(QueryGraph g) { + // execute the IKKBZ routine for EVERY relation on the graph + // then take return the permutation with the minimum cost. + auto rxs(g.relations_); + AD_CONTRACT_CHECK(!rxs.empty()); + + typedef std::pair, float> vf; + auto [ldtree_opt, cost] = std::transform_reduce( + // TODO: macos doesn't like it + // std::execution::par_unseq, // (3) in parallel if hw allows it + rxs.begin(), rxs.end(), // (1) for every relation in query + vf({}, std::numeric_limits::max()), + [&](const vf& l, const vf& r) { // (4) return the tree with min cost + return std::ranges::min( + l, r, [](const vf& a, const vf& b) { return a.second < b.second; }); + }, + [&](const N& n) { // (2) run IKKBZ routine + auto Ch = CostIKKBZ(); + auto ldtree = IKKBZ(g, Ch, n); + auto seq = ldtree.iter(); + auto seqv = std::span(seq); + return vf(seq, Ch.C(ldtree, seqv)); + }); + + return ldtree_opt; +} + +template +requires RelationAble QueryGraph IKKBZ(QueryGraph g, const N& n) { + auto new_g = toPrecedenceGraph(g, n); + auto Ch = CostIKKBZ(); + IKKBZ_Sub(new_g, Ch); + return new_g; +} + +template +requires RelationAble +QueryGraph IKKBZ(QueryGraph g, ICostASI& Ch, const N& n) { + auto new_g = toPrecedenceGraph(g, n); + IKKBZ_Sub(new_g, Ch); + return new_g; +} + +template +requires RelationAble +[[nodiscard]] QueryGraph toPrecedenceGraph(QueryGraph& g, const N& root) { + // bfs-ing over g and assign direction to visited relation + auto pg = QueryGraph(); + auto v = std::set(); + auto q = std::queue(); + pg.root = root; + v.insert(pg.root); + q.push(pg.root); + + while (!q.empty()) { + auto a = q.front(); + q.pop(); + for (auto const& [b, _] : g.edges_[a]) { // std::views::keys(g.edges_[a]); + if (v.contains(b)) continue; + if (!pg.has_relation(a)) pg.add_relation(a); + if (!pg.has_relation(b)) pg.add_relation(b); + + // we assign selectivity here + pg.add_rjoin(a, b, g.selectivity[b], Direction::PARENT); + q.push(b); + v.insert(b); + } + } + + return pg; +} + +template +requires RelationAble void IKKBZ_Sub(QueryGraph& g, ICostASI& Ch) { + while (!g.is_chain(g.root)) { + auto subtree_root = g.get_chained_subtree(g.root); + auto normalized_subtree = IKKBZ_Normalized(g, Ch, subtree_root); + IKKBZ_merge(g, Ch, normalized_subtree); + } + IKKBZ_Denormalize(g); +} + +template +requires RelationAble +std::vector IKKBZ_Normalized(QueryGraph& g, ICostASI& Ch, + const N& subtree_root) { + for (bool normalized = true;; normalized = true) { + auto subtree = g.iter(subtree_root); + + for (auto const& d : subtree) { + // iter includes subtree_root back + // skip subtree root + if (d == subtree_root) continue; + auto pv = g.get_parent(d); + + // absence of a parent means g.root + // skip query graph root + if (pv.empty()) continue; + auto p = pv.front(); + + // subtree_root is excluded from the ranking comparison + if (p == subtree_root) continue; + + for (auto const& c : g.get_children(p)) + // "precedence graph demands A -> B but rank(A) > rank(B), + // we speak of contradictory sequences." + // 118/637 + if (Ch.rank(g, p) > Ch.rank(g, c)) { + // a new node representing compound relation + IKKBZ_combine(g, p, c); + // mark as dirty + // subtree_root might (or might not) need more normalization + normalized = false; + } + } + if (normalized) return subtree; + } +} + +template +requires RelationAble +void IKKBZ_merge(QueryGraph& g, ICostASI& Ch, std::vector& dv) { + // we get here after we are already sure that descendents + // are going to be in a SINGLE chain + + // subchain root not considered during sorting + // subchain root is always at the beginning regardless of it's rank + // subchain is always at the beginning of dv + std::ranges::partial_sort( + dv.begin() + 1, dv.end(), dv.end(), + [&](const N& a, const N& b) { return Ch.rank(g, a) < Ch.rank(g, b); }); + + // given a sequence post sort dv (a, b, c, d, ...) + // include subchain root at the beginning (n, a, b, c, d, ...) + // we remove all connections they have (except n) and conform to the order + // we get post the sorting process (n -> a -> b -> c -> d) + + for (size_t i = 1; i < dv.size(); i++) { + g.unlink(dv[i]); + g.add_rjoin(dv[i - 1], dv[i], g.selectivity[dv[i]], Direction::PARENT); + } +} + +template +requires RelationAble +[[maybe_unused]] N IKKBZ_combine(QueryGraph& g, const N& a, const N& b) { + // 104/637 + // "if the ordering violates the query constraints, it constructs compounds" + + AD_CONTRACT_CHECK(g.has_rjoin(a, b)); + + // 118/637 + // "its cardinality is computed by multiplying the cardinalities of + // all relations in A and B" + // auto w = cardinality[a] * cardinality[b]; + auto w = a.getCardinality() * b.getCardinality(); + + // "its selectivity is the product of all selectivities (s_i) of relations + // R_i contained in A and B" + auto s = g.selectivity[a] * g.selectivity[b]; + + // add the newly computed cardinality to the + // cardinality map of the query graph. + auto n = N(a.getLabel() + "," + b.getLabel(), w); + g.add_relation(n); + + // to be able to apply the inverse operation (IKKBZ_uncombine) + // we keep track of the combined relation in the `hist` map + + g.hist[n] = {a, b}; + + // TODO: use common neighbor? + std::set parents; + for (auto const& x : g.get_parent(a)) parents.insert(x); + for (auto const& x : g.get_parent(b)) parents.insert(x); + + // IN CASE merging bc + // a -> b -> c + // we don't want b to be the parent of bc + parents.erase(a); + parents.erase(b); + + // for (auto const& x : parents) add_rjoin(x, n, s, Direction::PARENT); + AD_CONTRACT_CHECK(parents.size() == 1); + g.add_rjoin(*parents.begin(), n, s, Direction::PARENT); + + // filters out duplicate relation if the 2 relation have common descendants. + // yes. it should never happen in an acyclic graph. + // rationale behind using a std::set here + std::set children{}; + + // collect all children of relation a + // collect all children of relation b + // connect relation n to each child of a and b + + auto ca = g.get_children(a); + auto cb = g.get_children(b); + children.insert(ca.begin(), ca.end()); + children.insert(cb.begin(), cb.end()); + + // equiv. to add_rjoin(n, c, s, Direction::PARENT); + for (auto const& c : children) g.add_rjoin(c, n, s, Direction::CHILD); + + // make these relations unreachable + g.rm_relation(a); + g.rm_relation(b); + return n; +} + +template +requires RelationAble void IKKBZ_uncombine(QueryGraph& g, const N& n) { + // ref: 121/637 + // don't attempt to uncombine regular relation + if (!g.is_compound_relation(n)) return; + + auto pn = g.get_parent(n); + auto cn = g.get_children(n); + + std::vector rxs{}; + + // breaks down a given compound relation (n) + // to its basic components (r1, r2, ....) + g.unpack(n, rxs); + + // put the parent of relation first (1) + std::vector v{pn.begin(), pn.end()}; + // assert single parent to the compound relation + AD_CONTRACT_CHECK(v.size() == 1); + + // then the basic relation (r1, r2, ...) (2) + v.insert(v.end(), rxs.begin(), rxs.end()); + // then the children of (n) (3) + v.insert(v.end(), cn.begin(), cn.end()); + + // also removes all incoming and outgoing connections + g.rm_relation(n); + + // given {p, r1, r2, ..., c1, c2, ...}, connect them such that + // p -> r1 -> r2 -> ... -> c1 -> c2 -> ... + for (size_t i = 1; i < v.size(); i++) + g.add_rjoin(v[i - 1], v[i], g.selectivity[v[i]], Direction::PARENT); +} + +template +requires RelationAble void IKKBZ_Denormalize(QueryGraph& g) { + auto is_compound = [&](const N& n) { return g.is_compound_relation(n); }; + auto uncombine = [&](const N& n) { IKKBZ_uncombine(g, n); }; + + auto rxs = g.iter(); + // R1 -> R4R6R7 -> R5 -> R3 -> R2 + auto fv = std::views::filter(rxs, is_compound); // R4R6R7 + + // R1 -> R4 -> R6 -> R7 -> R5 -> R3 -> R2 + std::for_each(fv.begin(), fv.end(), uncombine); +} + +// explicit template instantiation + +template std::vector IKKBZ(QueryGraph); +template QueryGraph IKKBZ(QueryGraph, + const RelationBasic&); +template void IKKBZ_merge(QueryGraph&, ICostASI&, + std::vector&); + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/IKKBZ.h b/src/engine/joinOrdering/IKKBZ.h new file mode 100644 index 0000000000..75b8118f68 --- /dev/null +++ b/src/engine/joinOrdering/IKKBZ.h @@ -0,0 +1,200 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include +#include + +#include "ICostASI.h" +#include "QueryGraph.h" + +namespace JoinOrdering { + +/** + * + * Polynomial algorithm for join ordering + * + * produces optimal left-deep trees without cross products + * requires acyclic join graphs + * cost function must have ASI property + * + * Can be used as heuristic if the requirements are violated + * + * ref: 103,120/637 + * + * @param g acyclic query graph + * @tparam N type that satisfies IKKBZ::RelationAble concept + * @return optimal left-deep tree + */ +template +requires RelationAble std::vector IKKBZ(QueryGraph g); + +/** + * + * Generate a precedence graph out of an undirected graph and trigger + * the main subroutine. + * + * @param g acyclic query graph + * @param n relation used as root for the JoinOrdering::toPrecedenceGraph + * @return left-deep tree rooted at n + */ +template +requires RelationAble QueryGraph IKKBZ(QueryGraph g, const N& n); + +/** + * + * Generate a precedence graph out of an undirected graph and trigger + * the main subroutine. + * + * @param g acyclic query graph + * @param Ch cost function that has ASI property + * @param n relation used as root for the JoinOrdering::toPrecedenceGraph + * @return left-deep tree rooted at n + */ +template +requires RelationAble +QueryGraph IKKBZ(QueryGraph g, ICostASI& Ch, const N& n); + +/** + * The precedence graph describes the (partial) ordering of joins + * implied by the query graph. + * + * z.B: + * + + R1 -+ +- R5 + | | + + R3 --- R4 + + | | + R2 -+ +- R6 + + query graph + + + + R1 + + | + | + v + + R3 --> R2 + + | + | + v + + R4 --> R6 + + | + | + v + + R5 + + + precedence graph rooted in R1 + * + * ref: 106/637 + * + * @param g acyclic query graph + * @param root starting relation + * @return new query graph (precedence tree) + */ +template +requires RelationAble +[[nodiscard]] QueryGraph toPrecedenceGraph(QueryGraph& g, const N& root); + +template +requires RelationAble void IKKBZ_Sub(QueryGraph& g); + +/** + * continued process of building compound relations until + * no contradictory sequences exist. + * + * "merges relations that would have been reorder if only considering the rank + * guarantees that rank is ascending in each subchain" + * + * + * ref: 119,122/637 + * @param g precedence tree + * @param subtree_root subtree of g + * @param Ch cost function that has ASI property + * @return normalized relations under given subtree + * @see IKKBZ_combine + * @see IKKBZ_merge + */ +template +requires RelationAble +std::vector IKKBZ_Normalized(QueryGraph& g, ICostASI& Ch, + const N& subtree_root); + +/** + * Merge the chains under relation n according the rank function. + * + * post IKKBZ_Normalized, + * if rank(b) < rank(cd) and a -> b, a -> cd + * then we merge them into a single chain where a is + * the subtree_root + * + * ref: 121,126/637 + * @param g precedence tree with subchains ready to merge + * @param Ch cost function that has ASI property + * @param normalized_subtree vector of the relations in all the chains + * @see IKKBZ_Normalized + */ +template +requires RelationAble void IKKBZ_merge(QueryGraph& g, ICostASI& Ch, + std::vector& normalized_subtree); + +/** + * Given 2 Relations (already exist on the QueryGraph), + * combine there 2 relation into a new compound relation. + * + * All descendents of Relation a and Relation b become descendents of the newly + * created relation ab. Relation a and Relation b are expected to be neighbours. + * + * Does NOT work with undirected graph, in such case use GOO_combine instead. + * + * @param g precedence tree + * @param a Relation a + * @param b Relation b + * @return Relation ab + * @see IKKBZ_uncombine + */ +template +requires RelationAble +[[maybe_unused]] N IKKBZ_combine(QueryGraph& g, const N& a, const N& b); + +/** + * Inverse operation of IKKBZ_combine. + * + * Spread a compound relation back into it's direct components. + * @param n Compound Relation + * @see QueryGraph::unpack + * @see IKKBZ_denormalize + */ +template +requires RelationAble void IKKBZ_uncombine(QueryGraph& g, const N& n); + +/** + * the opposite step of JoinOrdering::IKKBZ_Normalized. + * + * transform precedence tree into a single chain + * + * replacing every compound relation by the sequence of relations + * it was derived from + * + * ref: 119,121/637 + * @param g precedence tree + * @see IKKBZ_uncombine + */ +template +requires RelationAble void IKKBZ_Denormalize(QueryGraph& g); + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/JoinNode.cpp b/src/engine/joinOrdering/JoinNode.cpp new file mode 100644 index 0000000000..bbd125d568 --- /dev/null +++ b/src/engine/joinOrdering/JoinNode.cpp @@ -0,0 +1,53 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "JoinNode.h" + +#include "RelationBasic.h" + +namespace JoinOrdering { + +template +requires RelationAble JoinNode::JoinNode() { + // this->relation = nullptr; + this->left = nullptr; + this->right = nullptr; + this->joinType = BOWTIE; +} + +template +requires RelationAble JoinNode::JoinNode(N relation) { + this->relation = relation; + this->left = nullptr; + this->right = nullptr; + this->joinType = BOWTIE; +} + +template +requires RelationAble +JoinNode::JoinNode(std::shared_ptr> l_, + std::shared_ptr> r_, JoinType joinType) { + // this->relation = NULL; + this->left = l_; + this->right = r_; + this->joinType = joinType; +} + +template +requires RelationAble bool JoinNode::isLeaf() { + // TODO: check for relation (this->relation != nullptr) + return this->left == nullptr and this->right == nullptr; +} + +// template +// requires RelationAble JoinNode::JoinNode(N r) { +// this->relation = r; +// this->left = nullptr; +// this->right = nullptr; +// } + +// explicit template instantiation +template class JoinNode; +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/JoinNode.h b/src/engine/joinOrdering/JoinNode.h new file mode 100644 index 0000000000..8493f59d72 --- /dev/null +++ b/src/engine/joinOrdering/JoinNode.h @@ -0,0 +1,44 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include "QueryGraph.h" +#include "memory" + +namespace JoinOrdering { + +/** + * join operators at inner nodes + * NATURAL_JOIN, CARTESIAN_JOIN + */ +enum JoinType { BOWTIE, CROSS }; // predicate? + +/** + * + * JoinTree payload + * + * @tparam N type that satisfies RelationAble concept + * @see JoinTree + */ +template +requires RelationAble class JoinNode { + public: + N relation; // TODO: std::optional? inner nodes has no relations + std::shared_ptr> left, right; // TODO: std::optional? + JoinType joinType = BOWTIE; + + explicit JoinNode(); + explicit JoinNode(N relation); + JoinNode(std::shared_ptr> left, + std::shared_ptr> right, JoinType joinType_ = BOWTIE); + /** + * Leaf node hold relations + * @return True if node has no children and contains JoinNode::relation info + */ + bool isLeaf(); +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/JoinTree.cpp b/src/engine/joinOrdering/JoinTree.cpp new file mode 100644 index 0000000000..792ee48232 --- /dev/null +++ b/src/engine/joinOrdering/JoinTree.cpp @@ -0,0 +1,146 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "JoinTree.h" + +namespace JoinOrdering { + +template +requires RelationAble JoinTree::JoinTree() = default; + +template +requires RelationAble +JoinTree::JoinTree(std::shared_ptr> r_) : root(r_) {} + +template +requires RelationAble +JoinTree::JoinTree(std::shared_ptr> left_, + std::shared_ptr> right_, JoinType joinType) { + this->root = + std::make_shared>(JoinNode(left_, right_, joinType)); +} + +template +requires RelationAble +JoinTree::JoinTree(const N& a, const N& b, JoinType jt) + : JoinTree(std::make_shared>( + JoinOrdering::JoinNode(a)), + std::make_shared>( + JoinOrdering::JoinNode(b)), + jt) {} + +template +requires RelationAble JoinTree::JoinTree(const N& a) + : JoinTree(std::make_shared>( + JoinOrdering::JoinNode(a)), + nullptr) {} + +template +requires RelationAble +JoinTree::JoinTree(const JoinTree& t1, const JoinTree& t2, JoinType jt) + : JoinTree( + std::make_shared>(JoinNode(t1.root, t2.root, jt))) {} + +template +requires RelationAble std::vector JoinTree::relations_iter() const { + std::vector erg; + relations_iter(this->root, erg); + return erg; +} + +template +requires RelationAble +void JoinTree::relations_iter(std::shared_ptr> r, + std::vector& s) const { + if (r == nullptr) return; + if (r->isLeaf()) s.emplace_back(r.get()->relation); + + relations_iter(r->left, s); + relations_iter(r->right, s); +} + +template +requires RelationAble +std::set JoinTree::relations_iter_str() const { + std::set erg; + relations_iter_str(this->root, erg); + return erg; +} + +template +requires RelationAble +void JoinTree::relations_iter_str(std::shared_ptr> r, + std::set& s) const { + if (r == nullptr) return; + if (r->isLeaf()) s.insert(r.get()->relation.getLabel()); + + relations_iter_str(r->left, s); + relations_iter_str(r->right, s); +} + +template +requires RelationAble std::string JoinTree::expr() { + return expr(root); +} + +template +requires RelationAble +std::string JoinTree::expr(std::shared_ptr> r) { + if (r == nullptr) return ""; + if (r->isLeaf()) return r.get()->relation.getLabel(); + + if (!r->right) return "(" + expr(r->left) + ")"; + if (!r->left) return "(" + expr(r->right) + ")"; + + std::string jsymbol = "⋈"; + switch (r->joinType) { + case BOWTIE: + jsymbol = "⋈"; + break; + case CROSS: + jsymbol = "x"; + break; + default: + jsymbol = "?"; + break; + } + return "(" + expr(r->left) + jsymbol + expr(r->right) + ")"; +} + +template +requires RelationAble bool JoinTree::isRightDeep() { + return isRightDeep(this->root); +} + +template +requires RelationAble +bool JoinTree::isRightDeep(std::shared_ptr> r) { + if (r == nullptr) return true; + if (r->isLeaf()) return true; + if (r->left && r->left->isLeaf()) return true; // TODO: fix + if (r->left) return false; + // if (!r->left && r->right) + return isRightDeep(r->right); +} + +template +requires RelationAble bool JoinTree::isLeftDeep() { + return isLeftDeep(this->root); +} + +template +requires RelationAble +bool JoinTree::isLeftDeep(std::shared_ptr> r) { + if (r == nullptr) return true; + if (r->isLeaf()) return true; + if (r->right && r->right->isLeaf()) return true; // TODO: fix + if (r->right) return false; + // if (!r->right && r->left) + return isLeftDeep(r->left); +} + +// explicit template instantiation +template class JoinTree; +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/JoinTree.h b/src/engine/joinOrdering/JoinTree.h new file mode 100644 index 0000000000..91cfcd592d --- /dev/null +++ b/src/engine/joinOrdering/JoinTree.h @@ -0,0 +1,81 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include +#include + +#include "JoinNode.h" +#include "QueryGraph.h" +#include "RelationBasic.h" + +namespace JoinOrdering { + +/** + * + * JoinTree as a direction-less binary tree with join operators (BOWTIE, CROSS) + * as inner nodes and relations as leaf nodes. + * + * + * z.B: let t be join tree (((R1xR2)⋈(R4⋈R5))x(R3)) + * + * x + * / \ + * ⋈ R3 + * / \ + * / \ + * / \ + * x ⋈ + * / \ / \ + * R1 R2 R4 R5 + * + * + * can be represented as: + * + * JoinTree( + * JoinTree( + * JoinTree(R1, R2, JoinType::CROSS), + * JoinTree(R4, R5, JoinType::BOWTIE)), + * JoinTree(R3), JoinType::CROSS + * ) + * + * + * + * ref: 74/637 + * @tparam N N type that satisfies RelationAble concept + * @see JoinNode + */ +template +requires RelationAble class JoinTree { + public: + std::shared_ptr> root; + JoinTree(); + explicit JoinTree(const N& a); + explicit JoinTree(std::shared_ptr> root); + JoinTree(std::shared_ptr> left, + std::shared_ptr> right, JoinType joinType = BOWTIE); + JoinTree(const N& a, const N& b, JoinType = BOWTIE); + + // TODO: use createJoinTree as described in 149/637 + JoinTree(const JoinTree& t1, const JoinTree& t2, JoinType = BOWTIE); + + [[nodiscard]] std::vector relations_iter() const; + void relations_iter(std::shared_ptr>, std::vector&) const; + + [[nodiscard]] std::set relations_iter_str() const; + void relations_iter_str(std::shared_ptr>, + std::set&) const; + + std::string expr(); + std::string expr(std::shared_ptr> r); + + bool isRightDeep(); + bool isRightDeep(std::shared_ptr> r); + bool isLeftDeep(); + bool isLeftDeep(std::shared_ptr> r); +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/LinearizedDP.cpp b/src/engine/joinOrdering/LinearizedDP.cpp new file mode 100644 index 0000000000..46324111ac --- /dev/null +++ b/src/engine/joinOrdering/LinearizedDP.cpp @@ -0,0 +1,108 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "LinearizedDP.h" + +#include "CostCout.h" + +namespace JoinOrdering { + +template +requires RelationAble JoinTree linearizedDP(const QueryGraph& g) { + // find a linearization using IKKBZ + std::vector O = IKKBZ(g); + size_t sz_v = O.size(); + + // empty DP table of size |V|*|V| + std::vector>> T(O.size(), + std::vector>(O.size())); + + for (size_t i = 0; i < sz_v; i++) + for (size_t j = 0; j < sz_v; j++) T[i][j] = JoinTree(O[i]); + + // find the optimal plan for the linearization + for (size_t s = 2; s <= sz_v; s++) { + for (size_t i = 0; i <= sz_v - s; i++) { + for (size_t j = 1; j <= s - 1; j++) { + auto L = T[i][i + j - 1]; + // FIXME: is this a typo?? + auto R = T[i + j][i + s - 1]; // auto R = T[i + s][i + s - 1]; + if (canJoin(g, L, R)) { + JoinTree P = JoinTree(L, R); + + // TODO: how to argmin when the cost of single relation is zero? + if (Cost::Cout(P, g) > Cost::Cout(T[i][i + s - 1], g)) + T[i][i + s - 1] = P; + } + } + } + } + + // for (size_t i = 0; i < sz_v; i++) { + // for (size_t j = 0; j < sz_v; j++) std::cout << T[i][j].expr() << " "; + // std::cout << std::endl; + // } + + return T[0][sz_v - 1]; +} + +template +requires RelationAble +bool canJoin(const QueryGraph& g, const JoinTree& t1, + const JoinTree& t2) { + // FIXME: this doesn't sound right... + // TODO: all wrong! + auto r1 = t1.relations_iter(); + auto r2 = t2.relations_iter(); + + auto s1 = t1.relations_iter_str(); + auto s2 = t2.relations_iter_str(); + + // empty join tree can be joined with anything + // useful when initing the DP table + // TODO: useless + if (s1.empty() || s2.empty()) return true; + + // TODO: again not like this... + for (auto const& x : t1.relations_iter()) + for (auto const& y : t2.relations_iter()) + if (g.has_rjoin(x, y)) return true; + + return false; +} + +template +requires RelationAble +JoinTree createJoinTree(const JoinTree& t1, const JoinTree& t2) { + // ref: 149/637 + // TODO: consider cartisan, hash joins, etc... + // applicable join implementations (from previously defined jointype enum?) + // TODO: assert 2 tree don't share relations? + + return JoinTree(t1, t2); + + std::vector> B; + + // checking whether a given tree is leftdeep is a O(n) + bool t1_isLeftDeep = t1.isLeftDeep(); + bool t1_isRightDeep = t1.isRightDeep(); + bool t2_isLeftDeep = t1.isLeftDeep(); + bool t2_isRightDeep = t1.isRightDeep(); + + t1.relations_iter(); + if (t1_isLeftDeep && t2_isLeftDeep) return JoinTree(t1, t2); + if (t1_isRightDeep && t2_isRightDeep) return JoinTree(t2, t1); + + return B[0]; // TODO: argmin cost +} + +// explicit template instantiation +template JoinTree linearizedDP( + const QueryGraph&); +template bool canJoin(const QueryGraph&, + const JoinTree&, + const JoinTree&); + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/LinearizedDP.h b/src/engine/joinOrdering/LinearizedDP.h new file mode 100644 index 0000000000..fa17927ed9 --- /dev/null +++ b/src/engine/joinOrdering/LinearizedDP.h @@ -0,0 +1,61 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include "CostIKKBZ.h" +#include "IKKBZ.h" +#include "JoinTree.h" +#include "QueryGraph.h" +#include "RelationBasic.h" + +namespace JoinOrdering { + +/** + * + * State Space Linearization in Combination with DP + * + * Given a medium-sized query(~100 relations) start with a reasonably good + * (optimal left-deep) relative order for the relations using IKKBZ before + * applying a Selinger's DP approach to construct optimal bushy join tree + * (for the given relative order). + * + * ref: 5/16 + * + * @tparam N type that satisfies RelationAble concept + * @param g acyclic query graph + * @return optimal bushy join tree for the query Q + */ +template +requires RelationAble JoinTree linearizedDP(const QueryGraph& g); + +/** + * + * @tparam N type that satisfies RelationAble concept + * @param g acyclic query graph + * @param t1 non-empty JoinTree + * @param t2 non-empty JoinTree + * @return True if both t1 and t2 comply with at least 1 join predicate + */ +template +requires RelationAble +bool canJoin(const QueryGraph& g, const JoinTree& t1, + const JoinTree& t2); + +/** + * + * for linear trees, assume t2 is a single relation + * + * + * ref: 149/637 + * @tparam N type that satisfies RelationAble concept + * @param t1 "optimal" join tree + * @param t2 "optimal" join tree + * @return optimal join tree (t1 ⋈ t2) + */ +template +requires RelationAble +JoinTree createJoinTree(const JoinTree& t1, const JoinTree& t2); +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/QueryGraph.cpp b/src/engine/joinOrdering/QueryGraph.cpp new file mode 100644 index 0000000000..d30a668350 --- /dev/null +++ b/src/engine/joinOrdering/QueryGraph.cpp @@ -0,0 +1,293 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "QueryGraph.h" + +#include "RelationBasic.h" + +namespace JoinOrdering { + +template +requires RelationAble void QueryGraph::add_relation(const N& n) { + // extract the cardinality and add to the cardinality map to make + // the lookup process easy when using cost function + cardinality[n] = n.getCardinality(); + if (!has_relation(n)) relations_.push_back(n); +} + +template +requires RelationAble bool QueryGraph::has_relation(const N& n) const { + // TODO: it's faster to lookup the edges_ map... + // return edges_.contains(n); + return std::find(relations_.begin(), relations_.end(), n) != relations_.end(); +} + +template +requires RelationAble void QueryGraph::rm_relation(const N& n) { + // Removing a relation by turning-off all it's neighbours + // hiding the connections instead of erasing them, effectively deleting them. + // the hidden property is used to filter out these relation in + // JoinOrdering::get_parent and JoinOrdering::get_children. + for (auto& [x, e] : edges_[n]) { + edges_[x][n].hidden = true; + edges_[n][x].hidden = true; + } +} + +template +requires RelationAble +bool QueryGraph::is_compound_relation(const N& n) const { + return hist.contains(n) && hist.at(n).has_value(); +} + +template +requires RelationAble +bool QueryGraph::is_common_neighbour(const N& a, const N& b, + const N& n) const { + return has_rjoin(a, n) && has_rjoin(b, n); +} + +template +requires RelationAble +void QueryGraph::add_rjoin(const N& a, const N& b, float join_selectivity, + Direction dir) { + // add connection between a -> b + edges_[a][b] = EdgeInfo(dir, join_selectivity); + + // add connection between b -> a + edges_[b][a] = EdgeInfo(inv(dir), join_selectivity); + + // TODO: avoid overwriting selectivity + // selectivity is a relation property + switch (dir) { + case Direction::UNDIRECTED: + if (!selectivity.contains(a)) selectivity[a] = join_selectivity; + if (!selectivity.contains(b)) selectivity[b] = join_selectivity; + break; + case Direction::PARENT: + if (!selectivity.contains(b)) selectivity[b] = join_selectivity; + break; + case Direction::CHILD: + if (!selectivity.contains(a)) selectivity[a] = join_selectivity; + break; + } +} + +template +requires RelationAble +bool QueryGraph::has_rjoin(const N& a, const N& b) const { + // does relation a exists + // does relation b exists + // is there a connection between a and b + // is there a connection between b and a + // is the connection between a and is NOT hidden + return (edges_.contains(a) && edges_.contains(b) && + edges_.at(a).contains(b) && edges_.at(b).contains(a) && + !edges_.at(a).at(b).hidden); // !edges_[a][b].hidden; +} + +template +requires RelationAble void QueryGraph::rm_rjoin(const N& a, const N& b) { + // r[a].erase(b); + // r[b].erase(a); + + // hide the connection between a and b [dir] + edges_[a][b].hidden = true; + + // hide the connection between b and a [inv(dir)] + edges_[b][a].hidden = true; +} + +template +requires RelationAble +void QueryGraph::unpack(const N& n, std::vector& acc) { // NOLINT + + // cannot be broken down into small parts anymore + // i.e. regular relation + if (!is_compound_relation(n)) { + acc.push_back(n); + return; + } + + // otherwise it consists of 2 relations s1 and s2 + // they may or may not be compound too, so we call unpack again + auto const& [s1, s2] = hist[n].value(); + unpack(s1, acc); + unpack(s2, acc); +} + +template +requires RelationAble void QueryGraph::unlink(const N& n) { + // auto cv = get_children(n); + // auto pv = get_parent(n); + // std::set children(cv.begin(), cv.end()); + // std::set parent(pv.begin(), pv.end()); + // + // // cut all connections from n to it's children + // for (auto const& c : children) rm_rjoin(c, n); + // // cut all connections from n to it's parent(s)? + // for (auto const& p : parent) rm_rjoin(p, n); + + // TODO: redundant, remove from IKKBZ_merge + rm_relation(n); +} + +template +requires RelationAble +bool QueryGraph::is_chain(const N& n) const { // NOLINT + auto cv = get_children(n); + auto len = std::ranges::distance(cv); + + if (len == 0) return true; // leaf + if (len > 1) return false; // another subtree + + // len == 1 + return is_chain(cv.front()); +} + +template +requires RelationAble bool QueryGraph::is_subtree(const N& n) const { + // TODO: mem subtrees? + return !is_chain(n) and std::ranges::all_of(get_children(n), [&](const N& x) { + return is_chain(x); + }); +} +// +// template +// requires RelationAble auto QueryGraph::get_parent(const N& n) const { +// return std::views::filter(edges_.at(n), // edges_[n], +// [](std::pair t) { +// auto const& [x, e] = t; +// return e.direction == Direction::CHILD && +// !e.hidden; +// }) | +// std::views::transform( +// [](std::pair t) { return t.first; }); +//} + +// template +// requires RelationAble auto QueryGraph::get_children(const N& n) const { +// return std::views::filter(edges_.at(n), // edges_[n] +// [](std::pair t) { +// // TODO: structural binding in args +// auto const& [x, e] = t; +// return e.direction == Direction::PARENT && +// !e.hidden; +// }) | +// std::views::transform( +// [](std::pair t) { return t.first; }); +// } + +template +requires RelationAble +auto QueryGraph::get_chained_subtree(const N& n) -> N { + auto dxs = iter(n); + // lookup the first subtree + auto it = + std::ranges::find_if(dxs, [&](const N& x) { return is_subtree(x); }); + + // since this is called from IKKBZ_Normalize + // we have already checked the existence of a subtree + // if (it != dxs.end()) + return *it; + + // AD_CONTRACT_CHECK(false); + // throw std::runtime_error("how did we get here?"); +} + +template +requires RelationAble auto QueryGraph::iter() -> std::vector { + // QueryGraph(Relation)? + // AD_CONTRACT_CHECK(&root != nullptr); // always true + return iter(root); +} + +template +requires RelationAble +auto QueryGraph::iter(const N& n) -> std::vector { + // bfs-ing over all relations starting from n + + auto erg = std::vector(); + auto q = std::queue(); + auto v = std::set(); + + v.insert(n); + q.push(n); + erg.push_back(n); + + while (!q.empty()) { + auto f = q.front(); + q.pop(); + + for (auto const& x : get_children(f)) { + if (v.contains(x)) continue; + q.push(x); + v.insert(x); + erg.push_back(x); + } + } + + return erg; +} + +// template +// requires RelationAble +// auto QueryGraph::iter_pairs() -> std::vector> { +// auto v = std::set>(); +// +// for (auto const& [a, be] : edges_) +// for (auto const& [b, e] : be) { +// auto p = std::pair(b, a); +// // skip implicitly removed relation (with hidden edges) +// // skip already visited pairs +// // skip duplicates. (R1, R2) is the same as (R2, R1) +// if (e.hidden || v.contains(p) || v.contains(std::pair(a, b))) continue; +// v.insert(p); +// } +// +// return std::vector(v.begin(), v.end()); +// } +// +// template +// requires RelationAble +// auto QueryGraph::iter_pairs(const N& n) -> std::vector> { +// auto v = std::set>(); +// +// for (auto const& [b, e] : edges_[n]) { +// auto p = std::pair(b, n); +// // skip implicitly removed relation (with hidden edges) +// // skip already visited pairs +// // skip duplicates. (R1, R2) is the same as (R2, R1) +// if (e.hidden || v.contains(p) || v.contains(std::pair(n, b))) continue; +// v.insert(p); +// } +// +// return std::vector(v.begin(), v.end()); +// } + +template +requires RelationAble constexpr Direction QueryGraph::inv(Direction dir) { + // const ad_utility::HashMap m{ + // {Direction::UNDIRECTED, Direction::UNDIRECTED}, + // {Direction::PARENT, Direction::CHILD}, + // {Direction::CHILD, Direction::PARENT}, + // }; + + switch (dir) { + // case Direction::UNDIRECTED: + // return Direction::UNDIRECTED; + case Direction::PARENT: + return Direction::CHILD; + case Direction::CHILD: + return Direction::PARENT; + default: + // suppress compiler warning + return Direction::UNDIRECTED; + } +} + +template class QueryGraph; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/QueryGraph.h b/src/engine/joinOrdering/QueryGraph.h new file mode 100644 index 0000000000..ec1e2246f7 --- /dev/null +++ b/src/engine/joinOrdering/QueryGraph.h @@ -0,0 +1,288 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "EdgeInfo.h" +#include "util/Exception.h" +#include "util/HashMap.h" + +namespace JoinOrdering { + +template +concept RelationAble = requires(N n) { + // using the relation as a key for some maps + // using std::sets all over the place + // FIXME: constrain hashable + // { std::hash{}(n) } -> std::convertible_to; + // TODO: static assert with a meaningful diagnostics message + { n.getCardinality() } -> std::integral; + { n.getLabel() } -> std::same_as; // std::assignable_from? + + // TODO: check for constructor? + // { std::constructible_from }; +}; + +template +requires RelationAble class QueryGraph { + public: + QueryGraph() = default; + + std::vector relations_; + // ad_utility::HashMap> edges_; + // ad_utility::HashMap> hist; + // ad_utility::HashMap cardinality; + // ad_utility::HashMap selectivity; + + std::map> edges_; + std::map>> hist; + std::map cardinality; + std::map selectivity; // FIXME: directed unordered pair + N root; + + /** + * Add a relation to the query graph and and append it's cardinality + * to the graph's cardinality lookup table + * + * ref: 77/637 + * TODO: 91/637 do not add single relations, but subchains + * @param n Relation with a cardinality property (getCardinality) + */ + void add_relation(const N& n); + + /** + * Check whether a give relation has been added to the query graph or not. + * + * @param n Relation to check + * @return True if it has been added before with QueryGraph::add_relation + */ + bool has_relation(const N& n) const; + + /** + * + * Disable any edge between a relation and all of it's neighbours + * (parent and children) effectively removing it. + * + * @param n Relation to set as unreachable. + */ + void rm_relation(const N& n); + + /** + * + * Checks whether a give relation is regular or compound. + * + * regular relations are ones added during construction of the QueryGraph + * compound relation are the result of the QueryGraph::combine + * + * @param n Relation + * @return True if Relation n is a compound relation + */ + bool is_compound_relation(const N& n) const; + + /** + * + * Checks whether Relation n is a common neighbour between + * Relation a and Relation b. + * + * Relation n is a common neighbour of Relation a and Relation b if + * there exists a connection between Relation n and Relation a + * AND + * there exists a connection between Relation n and Relation b + * + * @param a Relation a + * @param b Relation b + * @param n Relation n + * @return True if Relation n is a common neighbour between a and b. + */ + bool is_common_neighbour(const N& a, const N& b, const N& n) const; + + /** + * + * Connect 2 relations and assign the selectivity for the path. + * + * JoinOrdering::toPrecedenceTree will mutated the dir + * and create parent, children relationships. + * + * ref: 76/637 + * @param a Relation A + * @param b Relation B + * @param join_selectivity selectivity of the join with Relation B + * @param dir Relation A is a (dir) to Relation B + */ + void add_rjoin(const N& a, const N& b, float join_selectivity, + Direction dir = Direction::UNDIRECTED); + + /** + * Check whether there is a connection between given 2 relations + * + * @param a Relation + * @param b Relation + * @return True if a connection has been created with QueryGraph::add_rjoin + */ + [[nodiscard]] bool has_rjoin(const N& a, const N& b) const; + + /** + * Remove connection between 2 given relations by setting `hidden` attribute + * to true, effectively removing the connection from the query graph + * @param a Relation + * @param b Relation + */ + void rm_rjoin(const N& a, const N& b); + + /** + * Gets all **direct** neighbours (1-level) of a given relation where + * relation n is set as a Direction::PARENT to the neighbour relation. + * + * Ignores any connections where hidden is set to true. + * @see QueryGraph::iter + * @param n Relation + * @return A view to the children of Relation n + */ + + auto get_children(const N& n) const { + return std::views::filter(edges_.at(n), // edges_[n] + [](std::pair t) { + // TODO: structural binding in args + auto const& [x, e] = t; + return e.direction == Direction::PARENT && + !e.hidden; + }) | + std::views::transform( + [](std::pair t) { return t.first; }); + } + + /** + * Gets the direct parent of a given relation where relation n is set as a + * Direction::CHILD to the neighbour relation. + * + * Ignores any connections where hidden is set to true. + * + * Similar to QueryGraph::get_children + * @param n Relation + * @return A view to the parent of Relation n + */ + auto get_parent(const N& n) const { + return std::views::filter(edges_.at(n), // edges_[n], + [](std::pair t) { + auto const& [x, e] = t; + return e.direction == Direction::CHILD && + !e.hidden; + }) | + std::views::transform( + [](std::pair t) { return t.first; }); + } + + /** + * Recursively breaks down a compound relation till into basic relations.\ + * + * @param n Compound Relation + * @param erg Vector of n's basic relations + * @see JoinOrdering::IKKBZ_combine + * @see JoinOrdering::IKKBZ_uncombine + */ + void unpack(const N& n, std::vector& erg); + /** + * Remove all connections between a relation and it's neighbours + * + * @param n non-root Relation + */ + void unlink(const N& n); + + /** + * Give Relation n is said to be part of a chain if all it's descendants + * have no more than one child each. + * + * @param n Relation + * @return True if Relation n is part of a subchain + */ + bool is_chain(const N& n) const; + + /** + * + * "The generalization to bushy trees is not as obvious + * each subtree must contain a subchain to avoid cross products + * thus do not add single relations but subchains + * whole chain must be R1 − . . . − Rn, cut anywhere." + * + * ref: 91/637 + * + * @param n Relation + * @return True if n is NOT a chain a chain and all children ARE chains. + */ + bool is_subtree(const N& n) const; + + /** + * + * Looks for the first subtree that exists as a descendant to Relation n. + * + * @param n Relation + * @return the root of the subtree whose subtrees are chains + * @see IKKBZ_Normalized + */ + auto get_chained_subtree(const N& n) -> N; + + // TODO: std::iterator or std::iterator_traits + + /** + * Get all relations in a query graph starting from it's root + * + * @return vector of all relation in the QueryGraph + */ + auto iter() -> std::vector; + + /** + * Gets ALL relations where given relation n is an ancestor + * (parent, grandparent, ...). + * + * Relation n itself is ALSO include in the resultant set (for convenience). + * + * @see QueryGraph::get_children + * @param n Relation + * @return vector of lineage relations to give Relation N including n itself + */ + auto iter(const N& n) -> std::vector; + + /** + * Gets ALL relations pairs on a the QueryGraph. + * + * @return set of pairs of connected relations + */ + // auto iter_pairs() -> std::vector>; + + /** + * Gets relation pairs that involve a particular relation. + * i.e the direct connected neighbours of give relation n. + * + * @param n Relation n + * @return set of pairs of connected relations that involve n + * @see iter_pairs() + */ + // auto iter_pairs(const N& n) -> std::vector>; + + /** + * + * used to assign bidirectional connections when populating the QueryGraph + * + * + * inverse of a DIRECTION::PARENT is DIRECTION::CHILD + * inverse of a DIRECTION::CHILD is DIRECTION::PARENT + * inverse of a DIRECTION::UNDIRECTED is DIRECTION::UNDIRECTED + * + * @see QueryGraph::add_rjoin + */ + constexpr static Direction inv(Direction); +}; + +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/RelationBasic.cpp b/src/engine/joinOrdering/RelationBasic.cpp new file mode 100644 index 0000000000..6d2a784197 --- /dev/null +++ b/src/engine/joinOrdering/RelationBasic.cpp @@ -0,0 +1,37 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include "RelationBasic.h" + +#include + +namespace JoinOrdering { + +RelationBasic::RelationBasic() = default; +// RelationBasic::RelationBasic(const RelationBasic& r) { +// this->label = r.label; +// this->cardinality = r.cardinality; +// } + +RelationBasic::RelationBasic(std::string label, int cardinality) + : cardinality(cardinality), label(std::move(label)) {} +std::strong_ordering RelationBasic::operator<=>( + const RelationBasic& other) const = default; + +// bool RelationBasic::operator<(const RelationBasic& other) const { +// return this->cardinality < other.cardinality; +// } +bool RelationBasic::operator==(const RelationBasic& other) const { + return this->cardinality == other.cardinality && this->label == other.label; +} +int RelationBasic::getCardinality() const { return cardinality; } +std::string RelationBasic::getLabel() const { return label; } + +// ref: https://abseil.io/docs/cpp/guides/hash +template +H AbslHashValue(H h, const RelationBasic& r) { + return H::combine(std::move(h), r.label, r.cardinality); +} +} // namespace JoinOrdering diff --git a/src/engine/joinOrdering/RelationBasic.h b/src/engine/joinOrdering/RelationBasic.h new file mode 100644 index 0000000000..e3e0e9eac3 --- /dev/null +++ b/src/engine/joinOrdering/RelationBasic.h @@ -0,0 +1,37 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#pragma once + +#include +#include + +namespace JoinOrdering { + +/** + * bare-minimum required for a relation to be added to the + * QueryGraph::add_relation + */ +class RelationBasic { + public: + RelationBasic(); + // RelationBasic(const RelationBasic& r); + RelationBasic(std::string label, int cardinality); + std::strong_ordering operator<=>(const RelationBasic& other) const; + + // bool operator<(const RelationBasic& other) const; + bool operator==(const RelationBasic& other) const; + [[nodiscard]] int getCardinality() const; + [[nodiscard]] std::string getLabel() const; + + template + friend H AbslHashValue(H h, const RelationBasic& r); + + private: + int cardinality{-1}; + std::string label{"R?"}; +}; + +} // namespace JoinOrdering diff --git a/test/engine/CMakeLists.txt b/test/engine/CMakeLists.txt index 76ba563b79..b5fe62c6fd 100644 --- a/test/engine/CMakeLists.txt +++ b/test/engine/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(idTable) +add_subdirectory(joinOrdering) addLinkAndDiscoverTest(IndexScanTest engine) addLinkAndDiscoverTest(CartesianProductJoinTest engine) addLinkAndDiscoverTest(TextIndexScanForWordTest engine) diff --git a/test/engine/joinOrdering/CMakeLists.txt b/test/engine/joinOrdering/CMakeLists.txt new file mode 100644 index 0000000000..3aafbd5e7d --- /dev/null +++ b/test/engine/joinOrdering/CMakeLists.txt @@ -0,0 +1,4 @@ +addLinkAndDiscoverTest(IKKBZTest joinOrdering) +addLinkAndDiscoverTest(TreeCostTest joinOrdering) +addLinkAndDiscoverTest(LinearizedDPTest joinOrdering) +#addLinkAndDiscoverTest(CostASITest joinOrdering) diff --git a/test/engine/joinOrdering/IKKBZTest.cpp b/test/engine/joinOrdering/IKKBZTest.cpp new file mode 100644 index 0000000000..504704cad2 --- /dev/null +++ b/test/engine/joinOrdering/IKKBZTest.cpp @@ -0,0 +1,618 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include + +#include "engine/joinOrdering/CostIKKBZ.h" +#include "engine/joinOrdering/IKKBZ.h" +#include "engine/joinOrdering/QueryGraph.h" +#include "engine/joinOrdering/RelationBasic.h" + +#define eps 0.001 + +using JoinOrdering::RelationBasic; + +TEST(IKKBZ_SANITY, EX1_R1toR7) { + /* + R2 1/2 1/3 R5 + (10) ---------+ +----------- (18) + | | + + R1 1/5 R4 + (10) ------ (100) + + | | + R3 1/4 | | 1/2 R6 1/10 R7 + (100) ---------+ +----------- (10) ------- (20) + + + 124/647 + */ + + auto R1 = RelationBasic("R1", 10); + auto R2 = RelationBasic("R2", 100); + auto R3 = RelationBasic("R3", 100); + auto R4 = RelationBasic("R4", 100); + auto R5 = RelationBasic("R5", 18); + auto R6 = RelationBasic("R6", 10); + auto R7 = RelationBasic("R7", 20); + + auto g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + + g.add_rjoin(R1, R2, 1.0 / 2); + g.add_rjoin(R1, R3, 1.0 / 4); + g.add_rjoin(R1, R4, 1.0 / 5); + g.add_rjoin(R4, R5, 1.0 / 3); + g.add_rjoin(R4, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 10); + + auto g_R1 = IKKBZ(g, R1); + auto g_R2 = IKKBZ(g, R2); + auto g_R3 = IKKBZ(g, R3); + auto g_R4 = IKKBZ(g, R4); + auto g_R5 = IKKBZ(g, R5); + auto g_R6 = IKKBZ(g, R6); + auto g_R7 = IKKBZ(g, R7); + + ASSERT_EQ(g_R1.iter(), (std::vector{R1, R4, R6, R7, R5, R3, R2})); + ASSERT_EQ(g_R2.iter(), (std::vector{R2, R1, R4, R6, R7, R5, R3})); + ASSERT_EQ(g_R3.iter(), (std::vector{R3, R1, R4, R6, R7, R5, R2})); + ASSERT_EQ(g_R4.iter(), (std::vector{R4, R6, R7, R1, R5, R3, R2})); + ASSERT_EQ(g_R5.iter(), (std::vector{R5, R4, R6, R7, R1, R3, R2})); + ASSERT_EQ(g_R6.iter(), (std::vector{R6, R7, R4, R1, R5, R3, R2})); + ASSERT_EQ(g_R7.iter(), (std::vector{R7, R6, R4, R1, R5, R3, R2})); +} + +TEST(IKKBZ_SANITY, EX2_R1) { + /* + + R1 1/6 +(30) ----------+ + | + | + + R3 1/20 R4 3/4 R5 1/2 R6 1/14 R7 + (30) ------- (20) ------ (10) ------ (20) ------- (70) + + | | + R2 1/10 | | +(100) ----------+ | 1/5 + | + + R8 + (100) + + | + | 1/25 + | + + R9 + (100) + + + 25/39 + */ + + auto R1 = RelationBasic("R1", 30); + auto R2 = RelationBasic("R2", 100); + auto R3 = RelationBasic("R3", 30); + auto R4 = RelationBasic("R4", 20); + auto R5 = RelationBasic("R5", 10); + auto R6 = RelationBasic("R6", 20); + auto R7 = RelationBasic("R7", 70); + auto R8 = RelationBasic("R8", 100); + auto R9 = RelationBasic("R9", 100); + + auto g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + g.add_relation(R8); + g.add_relation(R9); + + g.add_rjoin(R1, R3, 1.0 / 6); + g.add_rjoin(R2, R3, 1.0 / 10); + g.add_rjoin(R3, R4, 1.0 / 20); + g.add_rjoin(R4, R5, 3.0 / 4); + g.add_rjoin(R5, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 14); + g.add_rjoin(R5, R8, 1.0 / 5); + g.add_rjoin(R8, R9, 1.0 / 25); + + auto g2_R1 = JoinOrdering::IKKBZ(g, R1); + + ASSERT_EQ(g2_R1.iter(), (std::vector({R1, R3, R4, R5, R8, R9, R6, R7, R2}))); +} + +TEST(IKKBZ_SANITY, PrecedenceGraph1) { + /** + + R1 -+ +- R5 + | | + + R3 --- R4 + + | | + R2 -+ +- R6 + + query graph + + + + R1 + + | + | + v + + R3 --> R2 + + | + | + v + + R4 --> R6 + + | + | + v + + R5 + + + precedence graph rooted in R1 + + ref: 107/637 + */ + + auto R1 = RelationBasic("R1", 1); + auto R2 = RelationBasic("R2", 1); + auto R3 = RelationBasic("R3", 1); + auto R4 = RelationBasic("R4", 1); + auto R5 = RelationBasic("R5", 1); + auto R6 = RelationBasic("R6", 1); + + auto g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + + g.add_rjoin(R1, R3, 1); + g.add_rjoin(R2, R3, 1); + g.add_rjoin(R3, R4, 1); + g.add_rjoin(R4, R5, 1); + g.add_rjoin(R4, R6, 1); + + auto pg = JoinOrdering::toPrecedenceGraph(g, R1); + + ASSERT_TRUE(pg.has_rjoin(R1, R3)); + ASSERT_EQ(pg.edges_[R1][R3].direction, JoinOrdering::Direction::PARENT); + + ASSERT_TRUE(pg.has_rjoin(R2, R3)); + ASSERT_EQ(pg.edges_[R3][R2].direction, JoinOrdering::Direction::PARENT); + + ASSERT_TRUE(pg.has_rjoin(R3, R4)); + ASSERT_EQ(pg.edges_[R3][R4].direction, JoinOrdering::Direction::PARENT); + + ASSERT_TRUE(pg.has_rjoin(R4, R5)); + ASSERT_EQ(pg.edges_[R4][R5].direction, JoinOrdering::Direction::PARENT); + + ASSERT_TRUE(pg.has_rjoin(R4, R6)); + ASSERT_EQ(pg.edges_[R4][R6].direction, JoinOrdering::Direction::PARENT); +} + +TEST(IKKBZ_SANITY, IKKBZ_ARGMIN_EX1) { + auto R1 = RelationBasic("R1", 10); + auto R2 = RelationBasic("R2", 100); + auto R3 = RelationBasic("R3", 100); + auto R4 = RelationBasic("R4", 100); + auto R5 = RelationBasic("R5", 18); + auto R6 = RelationBasic("R6", 10); + auto R7 = RelationBasic("R7", 20); + + auto g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + + g.add_rjoin(R1, R2, 1.0 / 2); + g.add_rjoin(R1, R3, 1.0 / 4); + g.add_rjoin(R1, R4, 1.0 / 5); + g.add_rjoin(R4, R5, 1.0 / 3); + g.add_rjoin(R4, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 10); + + ASSERT_EQ(IKKBZ(g), (std::vector{R2, R1, R4, R6, R7, R5, R3})); +} + +TEST(IKKBZ_SANITY, IKKBZ_ARGMIN_EX2) { + auto R1 = RelationBasic("R1", 30); + auto R2 = RelationBasic("R2", 100); + auto R3 = RelationBasic("R3", 30); + auto R4 = RelationBasic("R4", 20); + auto R5 = RelationBasic("R5", 10); + auto R6 = RelationBasic("R6", 20); + auto R7 = RelationBasic("R7", 70); + auto R8 = RelationBasic("R8", 100); + auto R9 = RelationBasic("R9", 100); + + auto g = JoinOrdering::QueryGraph(); + + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + g.add_relation(R8); + g.add_relation(R9); + + g.add_rjoin(R1, R3, 1.0 / 6); + g.add_rjoin(R2, R3, 1.0 / 10); + g.add_rjoin(R3, R4, 1.0 / 20); + g.add_rjoin(R4, R5, 3.0 / 4); + g.add_rjoin(R5, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 14); + g.add_rjoin(R5, R8, 1.0 / 5); + g.add_rjoin(R8, R9, 1.0 / 25); + + // ASSERT_EQ(IKKBZ(g), (std::vector({R8, R5, R4, R9, R1, R3, R6, R7, R2}))); + ASSERT_EQ(IKKBZ(g), (std::vector({R8, R5, R4, R9, R3, R1, R6, R7, R2}))); +} + +TEST(IKKBZ_SANITY, KRISHNAMURTHY1986_133) { + /** + + R1 + (100) + + 1/10 | | 1 + +-------------------+ +------------------+ + | | + + R2 R3 + (1000000) (1000) + + 1/30 | | 1 + +------------------+ +----------+ + | | + + R4 R5 + (150000) (50) + + + 133 + + */ + auto R1 = RelationBasic("R1", 100); + auto R2 = RelationBasic("R2", 1000000); + auto R3 = RelationBasic("R3", 1000); + auto R4 = RelationBasic("R4", 150000); + auto R5 = RelationBasic("R5", 50); + + auto g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + + g.add_rjoin(R1, R2, 1.0 / 100); + g.add_rjoin(R1, R3, 1.0 / 1); + g.add_rjoin(R3, R4, 1.0 / 30); + g.add_rjoin(R3, R5, 1.0 / 1); + + ASSERT_EQ(IKKBZ(g, R1).iter(), (std::vector({R1, R3, R5, R4, R2}))); +} + +TEST(COSTASI_SANITY, SESSION04_EX1) { + /** + R1 + + 1/5 | | 1/3 + +-------------+ +--------------+ + | | + + R2 R3 + (20) (30) + + 1/10 | | 1 + +--------------+ +----------+ + | | + + R4 R5 + (50) (2) + + + 20/39 + + + + +------+----+------+----+----+-------+ + | R | n | s | C | T | rank | + +------+----+------+----+----+-------+ + | R2 | 20 | 1/5 | 4 | 4 | 3/4 | + | R3 | 30 | 1/15 | 10 | 10 | 9/10 | + | R4 | 50 | 1/10 | 5 | 5 | 4/5 | + | R5 | 2 | 1 | 2 | 2 | 1/2 | + | R3R5 | 60 | 1/3 | 30 | 20 | 19/30 | + +------+----+------+----+----+-------+ + + */ + + auto R1 = RelationBasic("R1", 1); + auto R2 = RelationBasic("R2", 20); + auto R3 = RelationBasic("R3", 30); + auto R4 = RelationBasic("R4", 50); + auto R5 = RelationBasic("R5", 2); + + auto g = JoinOrdering::QueryGraph(); + g.add_rjoin(R1, R2, 1.0 / 5); + g.add_rjoin(R1, R3, 1.0 / 3); + g.add_rjoin(R3, R4, 1.0 / 10); + g.add_rjoin(R3, R5, 1.0); + + auto pg = JoinOrdering::toPrecedenceGraph(g, R1); + auto Ch = JoinOrdering::CostIKKBZ(); + + EXPECT_NEAR(Ch.rank(pg, R2), 3.0 / 4, eps); + EXPECT_NEAR(Ch.rank(pg, R3), 9.0 / 10, eps); + EXPECT_NEAR(Ch.rank(pg, R4), 4.0 / 5, eps); + EXPECT_NEAR(Ch.rank(pg, R5), 1.0 / 2, eps); + + auto subtree_R3 = pg.iter(R3); + JoinOrdering::IKKBZ_merge(pg, Ch, subtree_R3); + auto R3R5 = JoinOrdering::IKKBZ_combine(pg, R3, R5); + ASSERT_EQ(R3R5.getCardinality(), 60); + EXPECT_NEAR(Ch.rank(pg, R3R5), 19.0 / 30, 0.001); +} + +TEST(COSTASI_SANITY, SESSION04_EX2) { + /* + + R1 1/6 +(30) ----------+ + | + | + + R3 1/20 R4 3/4 R5 1/2 R6 1/14 R7 + (30) ------- (20) ------ (10) ------ (20) ------- (70) + + | | + R2 1/10 | | +(100) ----------+ | 1/5 + | + + R8 + (100) + + | + | 1/25 + | + + R9 + (100) + + + 25/39 + + + +--------+--------+-------+--------+------+-----------+ + | R | n | s | C | T | rank | + +--------+--------+-------+--------+------+-----------+ + | R1 | 30 | 1/6 | 5 | 5 | 4/5 | + | R2 | 100 | 1/10 | 10 | 10 | 9/10 | + | R4 | 20 | 1/20 | 1 | 1 | 0 | + | R5 | 10 | 3/4 | 15/2 | 15/2 | 13/15 | + | R6 | 20 | 1/2 | 10 | 10 | 9/10 | + | R7 | 70 | 1/14 | 5 | 5 | 4/5 | + | R8 | 100 | 1/5 | 20 | 20 | 19/20 | + | R9 | 100 | 1/25 | 4 | 4 | 3/4 | + | R8R9 | 10000 | 1/125 | 100 | 80 | 237/300 | + | R6R7 | 1400 | 1/28 | 60 | 50 | 245/300 | + | R5R8R9 | 100000 | 3/500 | 1515/2 | 600 | 1198/1515 | + +--------+--------+-------+--------+------+-----------+ + + */ + + auto R1 = RelationBasic("R1", 30); + auto R2 = RelationBasic("R2", 100); + auto R3 = RelationBasic("R3", 30); + auto R4 = RelationBasic("R4", 20); + auto R5 = RelationBasic("R5", 10); + auto R6 = RelationBasic("R6", 20); + auto R7 = RelationBasic("R7", 70); + auto R8 = RelationBasic("R8", 100); + auto R9 = RelationBasic("R9", 100); + + auto g = JoinOrdering::QueryGraph(); + auto Ch = JoinOrdering::CostIKKBZ(); + + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + g.add_relation(R8); + g.add_relation(R9); + + g.add_rjoin(R1, R3, 1.0 / 6); + g.add_rjoin(R2, R3, 1.0 / 10); + g.add_rjoin(R3, R4, 1.0 / 20); + g.add_rjoin(R4, R5, 3.0 / 4); + g.add_rjoin(R5, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 14); + g.add_rjoin(R5, R8, 1.0 / 5); + g.add_rjoin(R8, R9, 1.0 / 25); + + auto pg = JoinOrdering::toPrecedenceGraph(g, R1); + + EXPECT_NEAR(Ch.rank(pg, R2), 9.0 / 10, eps); + EXPECT_NEAR(Ch.rank(pg, R3), 4.0 / 5, eps); + EXPECT_NEAR(Ch.rank(pg, R4), 0, eps); + EXPECT_NEAR(Ch.rank(pg, R5), 13.0 / 15, eps); + EXPECT_NEAR(Ch.rank(pg, R6), 9.0 / 10, eps); + EXPECT_NEAR(Ch.rank(pg, R7), 4.0 / 5, eps); + EXPECT_NEAR(Ch.rank(pg, R8), 19.0 / 20, eps); + EXPECT_NEAR(Ch.rank(pg, R9), 3.0 / 4, eps); + + auto R6R7 = JoinOrdering::IKKBZ_combine(pg, R6, R7); + auto R8R9 = JoinOrdering::IKKBZ_combine(pg, R8, R9); + + EXPECT_NEAR(Ch.rank(pg, R6R7), 49.0 / 60, eps); + EXPECT_NEAR(Ch.rank(pg, R8R9), 79.0 / 100, eps); + + auto subtree_R5 = pg.iter(R5); + JoinOrdering::IKKBZ_merge(pg, Ch, subtree_R5); + + auto R5R8R9 = JoinOrdering::IKKBZ_combine(pg, R5, R8R9); + EXPECT_NEAR(Ch.rank(pg, R5R8R9), 1198.0 / 1515, eps); + + // TODO: separate test + auto unpacked = std::vector{}; + pg.unpack(R5R8R9, unpacked); + ASSERT_EQ(unpacked, std::vector({R5, R8, R9})); +} + +TEST(COSTASI_SANITY, KRISHNAMURTHY1986_133) { + auto R1 = RelationBasic("R1", 100); + auto R2 = RelationBasic("R2", 1000000); + auto R3 = RelationBasic("R3", 1000); + auto R4 = RelationBasic("R4", 150000); + auto R5 = RelationBasic("R5", 50); + + auto g = JoinOrdering::QueryGraph(); + + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + + g.add_rjoin(R1, R2, 1.0 / 100); + g.add_rjoin(R1, R3, 1.0 / 1); + g.add_rjoin(R3, R4, 1.0 / 30); + g.add_rjoin(R3, R5, 1.0 / 1); + + auto pg = JoinOrdering::toPrecedenceGraph(g, R1); + auto Ch = JoinOrdering::CostIKKBZ(); + EXPECT_NEAR(Ch.rank(pg, R5), 0.98, eps); +} + +TEST(IKKBZ_SANITY, UNPACK_COMPOUND_1) { + auto R1 = RelationBasic("R1", 100); + auto R2 = RelationBasic("R2", 1000000); + auto R3 = RelationBasic("R3", 1000); + auto R4 = RelationBasic("R4", 150000); + auto R5 = RelationBasic("R5", 50); + + auto g = JoinOrdering::QueryGraph(); + + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + + g.add_rjoin(R1, R2, 1.0 / 100); + g.add_rjoin(R1, R3, 1.0 / 1); + g.add_rjoin(R3, R4, 1.0 / 30); + g.add_rjoin(R3, R5, 1.0 / 1); + + auto pg = JoinOrdering::toPrecedenceGraph(g, R1); + auto R3R5 = JoinOrdering::IKKBZ_combine(pg, R3, R5); + + auto unpacked = std::vector{}; + pg.unpack(R3R5, unpacked); + + ASSERT_EQ(unpacked, std::vector({R3, R5})); + EXPECT_ANY_THROW(JoinOrdering::IKKBZ_combine(g, R1, R4)); +} + +TEST(GOO_SANITY, SESSION04_EX) { + /** + + R1 0.8 R2 0.5 R3 0.3 R4 + (10) ------ (10) ------ (10) ------ (10) + + | | | + | 0.6 | | + | | | + | | + R9 0.3 R6 0.6 | | + (10) ------ (10) ---------+ | + | + | | 0.2 | + | 0.6 +-------------+ | 0.9 + | | | + | + R8 0.3 R7 | R5 + (10) ------ (10) +- (10) + + + 8/39 + + */ + + auto R1 = RelationBasic("R1", 10); + auto R2 = RelationBasic("R2", 10); + auto R3 = RelationBasic("R3", 10); + auto R4 = RelationBasic("R4", 10); + auto R5 = RelationBasic("R5", 10); + auto R6 = RelationBasic("R6", 10); + auto R7 = RelationBasic("R7", 10); + auto R8 = RelationBasic("R8", 10); + auto R9 = RelationBasic("R9", 10); + + auto g = JoinOrdering::QueryGraph(); + + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + g.add_relation(R8); + g.add_relation(R9); + + g.add_rjoin(R1, R2, 0.8); + g.add_rjoin(R1, R9, 0.6); + g.add_rjoin(R2, R3, 0.5); + g.add_rjoin(R2, R6, 0.7); + g.add_rjoin(R3, R6, 0.6); + g.add_rjoin(R3, R4, 0.3); + g.add_rjoin(R3, R5, 0.9); + g.add_rjoin(R5, R6, 0.2); + g.add_rjoin(R6, R9, 0.3); + g.add_rjoin(R9, R8, 0.6); + g.add_rjoin(R8, R7, 0.3); + + // TODO: undeterministic + // EXPECT_NO_THROW(JoinOrdering::GOO(g)); + // auto erg = JoinOrdering::GOO(g); + // for (auto const& x : g.hist[erg]) std::cout << x.getLabel() << "\n"; +} diff --git a/test/engine/joinOrdering/LinearizedDPTest.cpp b/test/engine/joinOrdering/LinearizedDPTest.cpp new file mode 100644 index 0000000000..0dc1107645 --- /dev/null +++ b/test/engine/joinOrdering/LinearizedDPTest.cpp @@ -0,0 +1,134 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include + +#include "engine/joinOrdering/CostCout.h" +#include "engine/joinOrdering/LinearizedDP.h" +#include "engine/joinOrdering/QueryGraph.h" + +using JoinOrdering::RelationBasic, JoinOrdering::JoinTree; + +class LinDPJoin1 : public testing::Test { + protected: + /* + R2 1/2 1/3 R5 + (10) ---------+ +----------- (18) + | | + + R1 1/5 R4 + (10) ------ (100) + + | | + R3 1/4 | | 1/2 R6 1/10 R7 + (100) ---------+ +----------- (10) ------- (20) + + + 124/647 + */ + + RelationBasic R1, R2, R3, R4, R5, R6, R7; + JoinOrdering::QueryGraph g; + + LinDPJoin1() { + R1 = RelationBasic("R1", 10); + R2 = RelationBasic("R2", 100); + R3 = RelationBasic("R3", 100); + R4 = RelationBasic("R4", 100); + R5 = RelationBasic("R5", 18); + R6 = RelationBasic("R6", 10); + R7 = RelationBasic("R7", 20); + + g = JoinOrdering::QueryGraph(); + g.add_relation(R1); + g.add_relation(R2); + g.add_relation(R3); + g.add_relation(R4); + g.add_relation(R5); + g.add_relation(R6); + g.add_relation(R7); + + g.add_rjoin(R1, R2, 1.0 / 2); + g.add_rjoin(R1, R3, 1.0 / 4); + g.add_rjoin(R1, R4, 1.0 / 5); + g.add_rjoin(R4, R5, 1.0 / 3); + g.add_rjoin(R4, R6, 1.0 / 2); + g.add_rjoin(R6, R7, 1.0 / 10); + } +}; + +class LinDPJoin2 : public testing::Test { + protected: + RelationBasic A, B, C, D, E, F; + JoinOrdering::QueryGraph g; + + LinDPJoin2() { + A = RelationBasic("A", 100); + B = RelationBasic("B", 100); + C = RelationBasic("C", 50); + D = RelationBasic("D", 50); + E = RelationBasic("E", 100); + F = RelationBasic("F", 100); + + g = JoinOrdering::QueryGraph(); + g.add_relation(A); + g.add_relation(B); + g.add_relation(C); + g.add_relation(D); + g.add_relation(E); + g.add_relation(F); + + g.add_rjoin(A, B, 0.4); + g.add_rjoin(B, C, 0.02); + g.add_rjoin(B, D, 0.04); + // g.add_rjoin({C, D}}, E, 0.01); // TODO: hyperedge? + g.add_rjoin(E, F, 0.5); + } +}; + +TEST_F(LinDPJoin1, CAN_JOIN_SAMPLE_1) { + auto t1 = JoinTree(R1); + auto t2 = JoinTree(R2); + auto t3 = JoinTree(R3); + auto t4 = JoinTree(R4); + auto t5 = JoinTree(R5); + auto t6 = JoinTree(R6); + auto t7 = JoinTree(R7); + + ASSERT_TRUE(JoinOrdering::canJoin(g, t1, t2)); + ASSERT_TRUE(JoinOrdering::canJoin(g, t1, t3)); + ASSERT_TRUE(JoinOrdering::canJoin(g, t1, t4)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t1, t5)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t1, t6)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t1, t7)); + + ASSERT_TRUE(JoinOrdering::canJoin(g, t2, t1)); + ASSERT_TRUE(JoinOrdering::canJoin(g, t3, t1)); + ASSERT_TRUE(JoinOrdering::canJoin(g, t4, t1)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t5, t1)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t6, t1)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t7, t1)); + + ASSERT_FALSE(JoinOrdering::canJoin(g, t2, t3)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t2, t4)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t3, t2)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t4, t2)); + + auto t1t2 = JoinTree(t1, t2); + + ASSERT_TRUE(JoinOrdering::canJoin(g, t1t2, t3)); + ASSERT_TRUE(JoinOrdering::canJoin(g, t1t2, t4)); + ASSERT_FALSE(JoinOrdering::canJoin(g, t1t2, t5)); + + auto t4t6 = JoinTree(t4, t6); + ASSERT_TRUE(JoinOrdering::canJoin(g, t1t2, t4t6)); +} + +TEST_F(LinDPJoin1, ADAPTIVE_5_16) { + auto erg = JoinOrdering::linearizedDP(g); + // std::cout << erg.expr() << "\n"; + // FIXME: just suppress codecov + ASSERT_EQ(erg.expr(), "(((((((R2)⋈(R1))⋈(R4))⋈(R6))⋈(R7))⋈(R5))⋈(R3))"); +} diff --git a/test/engine/joinOrdering/TreeCostTest.cpp b/test/engine/joinOrdering/TreeCostTest.cpp new file mode 100644 index 0000000000..8fd6a29490 --- /dev/null +++ b/test/engine/joinOrdering/TreeCostTest.cpp @@ -0,0 +1,309 @@ +// Copyright 2024, University of Freiburg, +// Chair of Algorithms and Data Structures. +// Author: +// Mahmoud Khalaf (2024-, khalaf@cs.uni-freiburg.de) + +#include +#include + +#include "engine/joinOrdering/CostCout.h" +#include "engine/joinOrdering/JoinTree.h" +#include "engine/joinOrdering/RelationBasic.h" + +using JoinOrdering::JoinTree, JoinOrdering::RelationBasic, + JoinOrdering::JoinType; + +class LinearTreeSanity : public testing::Test { + protected: + RelationBasic R1, R2, R3, R4, R5, R6, R7; + LinearTreeSanity() { + R1 = RelationBasic("R1", 10); + R2 = RelationBasic("R2", 100); + R3 = RelationBasic("R3", 100); + R4 = RelationBasic("R4", 100); + R5 = RelationBasic("R5", 18); + R6 = RelationBasic("R6", 10); + R7 = RelationBasic("R7", 20); + } +}; + +class LinearTreeCost1 : public testing::Test { + protected: + RelationBasic R1, R2, R3; + std::map cardinalities; + std::map> selectivities; + LinearTreeCost1() { + R1 = RelationBasic("R1", 10); + R2 = RelationBasic("R2", 100); + R3 = RelationBasic("R3", 1000); + + cardinalities["R1"] = 10; + cardinalities["R2"] = 100; + cardinalities["R3"] = 1000; + + selectivities["R1"]["R2"] = 0.1; + selectivities["R2"]["R1"] = 0.1; + + selectivities["R2"]["R3"] = 0.2; + selectivities["R3"]["R2"] = 0.2; + + selectivities["R1"]["R3"] = 1; + selectivities["R3"]["R1"] = 1; + } +}; + +class LinearTreeCost2 : public testing::Test { + protected: + RelationBasic R1, R2, R3; + std::map cardinalities; + std::map> selectivities; + LinearTreeCost2() { + R1 = RelationBasic("R1", 1000); + R2 = RelationBasic("R2", 2); + R3 = RelationBasic("R3", 2); + + cardinalities["R1"] = 1000; + cardinalities["R2"] = 2; + cardinalities["R3"] = 2; + + selectivities["R1"]["R2"] = 0.1; + selectivities["R2"]["R1"] = 0.1; + + selectivities["R2"]["R3"] = 1.0; + selectivities["R3"]["R2"] = 1.0; + + selectivities["R1"]["R3"] = 0.1; + selectivities["R3"]["R1"] = 0.1; + } +}; + +class LinearTreeCost3 : public testing::Test { + protected: + RelationBasic R1, R2, R3, R4; + std::map cardinalities; + std::map> selectivities; + LinearTreeCost3() { + R1 = RelationBasic("R1", 10); + R2 = RelationBasic("R2", 20); + R3 = RelationBasic("R3", 20); + R4 = RelationBasic("R4", 10); + + cardinalities["R1"] = 10; + cardinalities["R2"] = 20; + cardinalities["R3"] = 20; + cardinalities["R4"] = 10; + + selectivities["R1"]["R2"] = 0.01; + selectivities["R2"]["R1"] = 0.01; + + selectivities["R1"]["R3"] = 1.0; + selectivities["R3"]["R1"] = 1.0; + + selectivities["R1"]["R4"] = 1.0; + selectivities["R4"]["R1"] = 1.0; + + selectivities["R2"]["R3"] = 0.5; + selectivities["R3"]["R2"] = 0.5; + + selectivities["R2"]["R4"] = 1.0; + selectivities["R4"]["R2"] = 1.0; + + selectivities["R3"]["R4"] = 0.01; + selectivities["R4"]["R3"] = 0.01; + } +}; + +TEST_F(LinearTreeSanity, JOIN_RELATION_LABELS) { + auto tn = JoinOrdering::JoinNode(); + auto t0 = JoinTree(); + auto t1 = JoinTree(R1, R2); + auto t2 = JoinTree(R3, R4); + auto tt = JoinTree(t1, t2); + + ASSERT_TRUE(tn.isLeaf()); + ASSERT_EQ(t0.expr(), ""); + ASSERT_EQ(tt.root->left->left->relation.getLabel(), "R1"); + ASSERT_EQ(tt.root->left->right->relation.getLabel(), "R2"); + ASSERT_EQ(tt.root->right->left->relation.getLabel(), "R3"); + ASSERT_EQ(tt.root->right->right->relation.getLabel(), "R4"); + + ASSERT_FALSE(tt.isLeftDeep()); + ASSERT_FALSE(tt.isRightDeep()); + + EXPECT_THAT(t1.relations_iter_str(), testing::ElementsAre("R1", "R2")); + EXPECT_THAT(t2.relations_iter_str(), testing::ElementsAre("R3", "R4")); + EXPECT_THAT(tt.relations_iter_str(), + testing::ElementsAre("R1", "R2", "R3", "R4")); +} + +/** + ⋈ + / \ + / \ + / \ + ⋈ ⋈ + / \ / \ + R1 R2 R3 R4 + + */ +TEST_F(LinearTreeSanity, CONSTRUCT_2_JOIN_TREES) { + auto tt = JoinTree(JoinTree(R1, R2), JoinTree(R3, R4), JoinType::BOWTIE); + ASSERT_EQ(tt.expr(), "((R1⋈R2)⋈(R3⋈R4))"); +} + +/** + ⋈ + / \ + ⋈ ⋈ + / \ \ + R1 R2 R5 + */ +TEST_F(LinearTreeSanity, CONSTRUCT_2_1_JOIN_TREES) { + auto tt = JoinTree(JoinTree(R1, R2, JoinType::BOWTIE), JoinTree(R5), + JoinType::BOWTIE); + ASSERT_EQ(tt.expr(), "((R1⋈R2)⋈(R5))"); + ASSERT_FALSE(tt.isLeftDeep()); + ASSERT_FALSE(tt.isRightDeep()); +} + +/** + ⋈ + / \ + ⋈ R3 + / \ + / \ + / \ + ⋈ ⋈ + / \ / \ + R1 R2 R4 R5 + + */ +TEST_F(LinearTreeSanity, CONSTRUCT_3_JOIN_TREES) { + auto t1 = JoinTree(R1, R2); + auto t2 = JoinTree(R4, R5); + auto t3 = JoinTree(R3); + auto tt = JoinTree(JoinTree(t1, t2), t3); + ASSERT_EQ(tt.expr(), "(((R1⋈R2)⋈(R4⋈R5))⋈(R3))"); +} + +/** + x + / \ + ⋈ R3 + / \ + / \ + / \ + x ⋈ + / \ / \ + R1 R2 R4 R5 + + */ +TEST_F(LinearTreeSanity, CONSTRUCT_3_1_JOIN_TREES) { + auto t1 = JoinTree(R1, R2, JoinType::CROSS); + auto t2 = JoinTree(R4, R5, JoinType::BOWTIE); + auto t3 = JoinTree(R3); + auto tt = JoinTree(JoinTree(t1, t2), t3, JoinType::CROSS); + ASSERT_EQ(tt.expr(), "(((R1xR2)⋈(R4⋈R5))x(R3))"); +} + +/** + * + * +------------------+---------+ + * | | C_{out} | + * +------------------+---------+ + * | R1 ⋈ R2 | 100 | + * | R2 ⋈ R3 | 20000 | + * | R1 x R3 | 10000 | + * | ((R1 ⋈ R2) ⋈ R3) | 20100 | + * | ((R2 ⋈ R3) ⋈ R1) | 40000 | + * | (R1 x R3) ⋈ R2 | 30000 | + * +------------------+---------+ + * + * ref: 82/637 + */ +TEST_F(LinearTreeCost1, SAMPLE_COST_CALC_1) { + auto t1 = JoinTree(R1, R2, JoinType::BOWTIE); + auto t2 = JoinTree(R2, R3, JoinType::BOWTIE); + auto t3 = JoinTree(R1, R3, JoinType::CROSS); + + auto t4 = JoinTree(t1, JoinTree(R3), JoinType::BOWTIE); + auto t5 = JoinTree(t2, JoinTree(R1), JoinType::BOWTIE); + auto t6 = JoinTree(JoinTree(R1, R3, JoinType::CROSS), JoinTree(R2), + JoinType::BOWTIE); + + ASSERT_EQ(JoinOrdering::Cost::Cout(t1, cardinalities, selectivities), 100); + ASSERT_EQ(JoinOrdering::Cost::Cout(t2, cardinalities, selectivities), 20000); + ASSERT_EQ(JoinOrdering::Cost::Cout(t3, cardinalities, selectivities), 10000); + ASSERT_EQ(JoinOrdering::Cost::Cout(t4, cardinalities, selectivities), 20100); + ASSERT_EQ(JoinOrdering::Cost::Cout(t5, cardinalities, selectivities), 40000); + ASSERT_EQ(JoinOrdering::Cost::Cout(t6, cardinalities, selectivities), 30000); +} + +/** + * + * +------------------+---------+ + * | | C_{out} | + * +------------------+---------+ + * | R1 ⋈ R2 | 200 | + * | R2 x R3 | 4 | + * | R1 ⋈ R3 | 200 | + * | ((R1 ⋈ R2) ⋈ R3) | 240 | + * | ((R2 x R3) ⋈ R1) | 44 | + * | (R1 ⋈ R3) ⋈ R2 | 240 | + * +------------------+---------+ + * + * ref: 83/637 + */ +TEST_F(LinearTreeCost2, SAMPLE_COST_CALC_2) { + auto t1 = JoinTree(R1, R2, JoinType::BOWTIE); + auto t2 = JoinTree(R2, R3, JoinType::CROSS); + auto t3 = JoinTree(R1, R3, JoinType::BOWTIE); + + auto t4 = JoinTree(t1, JoinTree(R3), JoinType::BOWTIE); + auto t5 = JoinTree(t2, JoinTree(R1), JoinType::CROSS); + auto t6 = JoinTree(JoinTree(R1, R3, JoinType::BOWTIE), JoinTree(R2), + JoinType::BOWTIE); + + ASSERT_EQ(JoinOrdering::Cost::Cout(t1, cardinalities, selectivities), 200); + ASSERT_EQ(JoinOrdering::Cost::Cout(t2, cardinalities, selectivities), 4); + ASSERT_EQ(JoinOrdering::Cost::Cout(t3, cardinalities, selectivities), 200); + ASSERT_EQ(JoinOrdering::Cost::Cout(t4, cardinalities, selectivities), 240); + ASSERT_EQ(JoinOrdering::Cost::Cout(t5, cardinalities, selectivities), 44); + ASSERT_EQ(JoinOrdering::Cost::Cout(t6, cardinalities, selectivities), 240); +} + +/** + * +-----------------------+---------+ + * | | C_{out} | + * +-----------------------+---------+ + * | R1 ⋈ R2 | 2 | + * | R2 ⋈ R3 | 200 | + * | R3 ⋈ R4 | 2 | + * | ((R1 ⋈ R2) ⋈ R3) ⋈ R4 | 24 | + * | ((R2 x R3) ⋈ R1) ⋈ R4 | 222 | + * | (R1 ⋈ R2) ⋈ (R3 ⋈ R4) | 6 | + * +-----------------------+---------+ + * + * ref: 84/637 + */ +TEST_F(LinearTreeCost3, SAMPLE_COST_CALC_3) { + auto t1 = JoinTree(R1, R2, JoinType::BOWTIE); + auto t2 = JoinTree(R2, R3, JoinType::BOWTIE); + auto t3 = JoinTree(R3, R4, JoinType::BOWTIE); + + auto t4 = + JoinTree(JoinTree(t1, JoinTree(R3), JoinType::BOWTIE), JoinTree(R4)); + + auto t5 = JoinTree(JoinTree(JoinTree(R2, R3, JoinType::CROSS), JoinTree(R1), + JoinType::BOWTIE), + JoinTree(R4), JoinType::BOWTIE); + + auto t6 = JoinTree(JoinTree(R1, R2), JoinTree(R3, R4), JoinType::BOWTIE); + + ASSERT_EQ(JoinOrdering::Cost::Cout(t1, cardinalities, selectivities), 2); + ASSERT_EQ(JoinOrdering::Cost::Cout(t2, cardinalities, selectivities), 200); + ASSERT_EQ(JoinOrdering::Cost::Cout(t3, cardinalities, selectivities), 2); + ASSERT_EQ(JoinOrdering::Cost::Cout(t4, cardinalities, selectivities), 24); + ASSERT_EQ(JoinOrdering::Cost::Cout(t5, cardinalities, selectivities), 222); + ASSERT_EQ(JoinOrdering::Cost::Cout(t6, cardinalities, selectivities), 6); +}