Skip to content

Commit

Permalink
feat(tesseract): Support multiple join paths within single query
Browse files Browse the repository at this point in the history
  • Loading branch information
paveltiunov committed Dec 16, 2024
1 parent a56462e commit 278fef1
Show file tree
Hide file tree
Showing 20 changed files with 555 additions and 72 deletions.
13 changes: 9 additions & 4 deletions packages/cubejs-schema-compiler/src/adapter/BaseQuery.js
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,10 @@ export class BaseQuery {
}).filter(R.identity).map(this.newTimeDimension.bind(this));
this.allFilters = this.timeDimensions.concat(this.segments).concat(this.filters);

this.join = this.joinGraph.buildJoin(this.allJoinHints);
if (!getEnv('nativeSqlPlanner')) {
// Tesseract doesn't require join to be prebuilt and there's a case where single join can't be built for multi-fact query
this.join = this.joinGraph.buildJoin(this.allJoinHints);
}
this.cubeAliasPrefix = this.options.cubeAliasPrefix;
this.preAggregationsSchemaOption = this.options.preAggregationsSchema ?? DEFAULT_PREAGGREGATIONS_SCHEMA;
this.externalQueryClass = this.options.externalQueryClass;
Expand Down Expand Up @@ -349,7 +352,8 @@ export class BaseQuery {
initUngrouped() {
this.ungrouped = this.options.ungrouped;
if (this.ungrouped) {
if (!this.options.allowUngroupedWithoutPrimaryKey) {
// this.join is not defined for Tesseract
if (!this.options.allowUngroupedWithoutPrimaryKey && !getEnv('nativeSqlPlanner')) {
const cubes = R.uniq([this.join.root].concat(this.join.joins.map(j => j.originalTo)));
const primaryKeyNames = cubes.flatMap(c => this.primaryKeyNames(c));
const missingPrimaryKeys = primaryKeyNames.filter(key => !this.dimensions.find(d => d.dimension === key));
Expand Down Expand Up @@ -616,7 +620,6 @@ export class BaseQuery {
dimensions: this.options.dimensions,
timeDimensions: this.options.timeDimensions,
timezone: this.options.timezone,
joinRoot: this.join.root,
joinGraph: this.joinGraph,
cubeEvaluator: this.cubeEvaluator,
order,
Expand Down Expand Up @@ -3312,6 +3315,7 @@ export class BaseQuery {
always_true: '1 = 1'

},
operators: {},
quotes: {
identifiers: '"',
escape: '""'
Expand All @@ -3321,7 +3325,8 @@ export class BaseQuery {
},
join_types: {
inner: 'INNER',
left: 'LEFT'
left: 'LEFT',
full: 'FULL',
},
window_frame_types: {
rows: 'ROWS',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,7 @@ export class BigqueryQuery extends BaseQuery {
templates.types.double = 'FLOAT64';
templates.types.decimal = 'BIGDECIMAL({{ precision }},{{ scale }})';
templates.types.binary = 'BYTES';
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
return templates;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ export class PostgresQuery extends BaseQuery {
templates.types.float = 'REAL';
templates.types.double = 'DOUBLE PRECISION';
templates.types.binary = 'BYTEA';
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
return templates;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ export class SnowflakeQuery extends BaseQuery {
templates.expressions.extract = 'EXTRACT({{ date_part }} FROM {{ expr }})';
templates.expressions.interval = 'INTERVAL \'{{ interval }}\'';
templates.expressions.timestamp_literal = '\'{{ value }}\'::timestamp_tz';
templates.operators.is_not_distinct_from = 'IS NOT DISTINCT FROM';
delete templates.types.interval;
return templates;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import { PostgresQuery } from '../../../src/adapter/PostgresQuery';
import { prepareCompiler } from '../../unit/PrepareCompiler';
import { dbRunner } from './PostgresDBRunner';
import {

Check failure on line 4 in packages/cubejs-schema-compiler/test/integration/postgres/multi-fact-join.test.ts

View workflow job for this annotation

GitHub Actions / lint

`@cubejs-backend/shared` import should occur before import of `../../../src/adapter/PostgresQuery`
getEnv,
} from '@cubejs-backend/shared';

describe('Multi-fact join', () => {
jest.setTimeout(200000);

const { compiler, joinGraph, cubeEvaluator } = prepareCompiler(`
cube(\`orders\`, {
sql: \`
SELECT 79 AS id, 1 AS amount, 1 AS city_id UNION ALL
SELECT 80 AS id, 2 AS amount, 1 AS city_id UNION ALL
SELECT 81 AS id, 3 AS amount, 1 AS city_id UNION ALL
SELECT 82 AS id, 4 AS amount, 2 AS city_id UNION ALL
SELECT 83 AS id, 5 AS amount, 2 AS city_id UNION ALL
SELECT 84 AS id, 6 AS amount, 3 AS city_id
\`,
joins: {
city: {
relationship: \`many_to_one\`,
sql: \`\${orders}.city_id = \${city}.id\`,
},
},
measures: {
amount: {
sql: \`amount\`,
type: 'sum'
}
},
dimensions: {
id: {
sql: \`id\`,
type: \`number\`,
primaryKey: true,
},
},
});
cube(\`shipments\`, {
sql: \`
SELECT 100 AS id, 1 AS foo_id, 1 AS city_id UNION ALL
SELECT 101 AS id, 2 AS foo_id, 2 AS city_id UNION ALL
SELECT 102 AS id, 3 AS foo_id, 2 AS city_id UNION ALL
SELECT 103 AS id, 4 AS foo_id, 2 AS city_id UNION ALL
SELECT 104 AS id, 5 AS foo_id, 4 AS city_id
\`,
joins: {
city: {
relationship: \`many_to_one\`,
sql: \`\${shipments}.city_id = \${city}.id\`,
},
},
measures: {
count: {
type: \`count\`
},
},
dimensions: {
id: {
sql: \`id\`,
type: \`number\`,
primaryKey: true,
shown: true
},
}
});
cube(\`city\`, {
sql: \`
SELECT 1 AS id, 'San Francisco' AS name UNION ALL
SELECT 2 AS id, 'New York City' AS name
\`,
dimensions: {
id: {
sql: \`id\`,
type: \`number\`,
primaryKey: true,
},
name: {
sql: \`\${CUBE}.name\`,
type: \`string\`,
},
},
});
`);

async function runQueryTest(q, expectedResult) {
if (!getEnv('nativeSqlPlanner')) {
return;
}
await compiler.compile();
const query = new PostgresQuery({ joinGraph, cubeEvaluator, compiler }, q);

console.log(query.buildSqlAndParams());

const res = await dbRunner.testQuery(query.buildSqlAndParams());
console.log(JSON.stringify(res));

expect(res).toEqual(
expectedResult
);
}

it('two regular sub-queries', async () => runQueryTest({
measures: ['orders.amount', 'shipments.count'],
dimensions: [
'city.name'
],
order: [{ id: 'city.name' }]
}, [{
city__name: 'New York City',
orders__amount: '9',
shipments__count: '3',
}, {
city__name: 'San Francisco',
orders__amount: '6',
shipments__count: '1',
}, {
city__name: null,
orders__amount: '6',
shipments__count: '1',
}]));
});
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use std::any::Any;
use std::marker::PhantomData;
use std::rc::Rc;

#[derive(Serialize, Deserialize, Debug)]
#[derive(Serialize, Deserialize, Debug, Clone, Eq, PartialEq, Hash)]
pub struct JoinItemStatic {
pub from: String,
pub to: String,
Expand Down
41 changes: 29 additions & 12 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/builder/join.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::plan::join::JoinType;
use crate::plan::{Join, JoinCondition, JoinItem, QueryPlan, Schema, Select, SingleAliasedSource};
use crate::planner::BaseCube;
use std::rc::Rc;
Expand Down Expand Up @@ -41,15 +42,19 @@ impl JoinBuilder {
}

pub fn left_join_subselect(&mut self, subquery: Rc<Select>, alias: String, on: JoinCondition) {
self.join_subselect(subquery, alias, on, false)
self.join_subselect(subquery, alias, on, JoinType::Left)
}

pub fn inner_join_subselect(&mut self, subquery: Rc<Select>, alias: String, on: JoinCondition) {
self.join_subselect(subquery, alias, on, true)
self.join_subselect(subquery, alias, on, JoinType::Inner)
}

pub fn full_join_subselect(&mut self, subquery: Rc<Select>, alias: String, on: JoinCondition) {
self.join_subselect(subquery, alias, on, JoinType::Full)
}

pub fn left_join_cube(&mut self, cube: Rc<BaseCube>, alias: Option<String>, on: JoinCondition) {
self.join_cube(cube, alias, on, false)
self.join_cube(cube, alias, on, JoinType::Left)
}

pub fn inner_join_cube(
Expand All @@ -58,7 +63,7 @@ impl JoinBuilder {
alias: Option<String>,
on: JoinCondition,
) {
self.join_cube(cube, alias, on, true)
self.join_cube(cube, alias, on, JoinType::Inner)
}

pub fn left_join_table_reference(
Expand All @@ -68,7 +73,7 @@ impl JoinBuilder {
alias: Option<String>,
on: JoinCondition,
) {
self.join_table_reference(reference, schema, alias, on, false)
self.join_table_reference(reference, schema, alias, on, JoinType::Left)
}

pub fn inner_join_table_reference(
Expand All @@ -78,7 +83,7 @@ impl JoinBuilder {
alias: Option<String>,
on: JoinCondition,
) {
self.join_table_reference(reference, schema, alias, on, true)
self.join_table_reference(reference, schema, alias, on, JoinType::Inner)
}

pub fn build(self) -> Rc<Join> {
Expand All @@ -93,22 +98,30 @@ impl JoinBuilder {
subquery: Rc<Select>,
alias: String,
on: JoinCondition,
is_inner: bool,
join_type: JoinType,
) {
let subquery = Rc::new(QueryPlan::Select(subquery));
let from = SingleAliasedSource::new_from_subquery(subquery, alias);
self.joins.push(JoinItem { from, on, is_inner })
self.joins.push(JoinItem {
from,
on,
join_type,
})
}

fn join_cube(
&mut self,
cube: Rc<BaseCube>,
alias: Option<String>,
on: JoinCondition,
is_inner: bool,
join_type: JoinType,
) {
let from = SingleAliasedSource::new_from_cube(cube, alias);
self.joins.push(JoinItem { from, on, is_inner })
self.joins.push(JoinItem {
from,
on,
join_type,
})
}

fn join_table_reference(
Expand All @@ -117,9 +130,13 @@ impl JoinBuilder {
schema: Rc<Schema>,
alias: Option<String>,
on: JoinCondition,
is_inner: bool,
join_type: JoinType,
) {
let from = SingleAliasedSource::new_from_table_reference(reference, schema, alias);
self.joins.push(JoinItem { from, on, is_inner })
self.joins.push(JoinItem {
from,
on,
join_type,
})
}
}
18 changes: 18 additions & 0 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/filter.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::planner::filter::BaseFilter;
use crate::planner::sql_evaluator::MemberSymbol;
use crate::planner::sql_templates::PlanSqlTemplates;
use crate::planner::VisitorContext;
use cubenativeutils::CubeError;
Expand Down Expand Up @@ -79,6 +80,23 @@ impl FilterItem {
};
Ok(res)
}

pub fn all_member_evaluators(&self) -> Vec<Rc<MemberSymbol>> {
let mut result = Vec::new();
self.find_all_member_evaluators(&mut result);
result
}

pub fn find_all_member_evaluators(&self, result: &mut Vec<Rc<MemberSymbol>>) {
match self {
FilterItem::Group(group) => {
for item in group.items.iter() {
item.find_all_member_evaluators(result)
}
}
FilterItem::Item(item) => result.push(item.member_evaluator().clone()),
}
}
}

impl Filter {
Expand Down
10 changes: 8 additions & 2 deletions rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,14 +179,20 @@ impl JoinCondition {
pub struct JoinItem {
pub from: SingleAliasedSource,
pub on: JoinCondition,
pub is_inner: bool,
pub join_type: JoinType,
}

pub struct Join {
pub root: SingleAliasedSource,
pub joins: Vec<JoinItem>,
}

pub enum JoinType {
Inner,
Left,
Full,
}

impl JoinItem {
pub fn to_sql(
&self,
Expand All @@ -197,7 +203,7 @@ impl JoinItem {
let result = templates.join(
&self.from.to_sql(templates, context)?,
&on_sql,
self.is_inner,
&self.join_type,
)?;
Ok(result)
}
Expand Down
Loading

0 comments on commit 278fef1

Please sign in to comment.