Skip to content

Commit

Permalink
nested pipelines generate bad order by (#2022)
Browse files Browse the repository at this point in the history
* nested pipelines generate bad order by

* ordering an reduce is now enforced at translation time

* plus, minus, whatever ....

* mark default orderings to make lenses work
* add tests for default orderings
* delete pre 4.0 TOP N BY because it became costly

* keep default order by sane

* add canOrderB

* slightly better order by copmputation

* use first, not last, field for default sort

* ignore analytic fields in order by

---------

Co-authored-by: Michael Toy <66150587+mtoy-googly-moogly@users.noreply.github.com>
  • Loading branch information
lloydtabb and mtoy-googly-moogly authored Nov 27, 2024
1 parent 7ab1d30 commit 3740e17
Show file tree
Hide file tree
Showing 12 changed files with 275 additions and 224 deletions.
4 changes: 3 additions & 1 deletion packages/malloy/src/dialect/dialect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import {
LeafAtomicTypeDef,
isRawCast,
isLeafAtomic,
OrderBy,
} from '../model/malloy_types';
import {DialectFunctionOverloadDef} from './functions';

Expand Down Expand Up @@ -237,7 +238,8 @@ export abstract class Dialect {

abstract sqlCreateFunctionCombineLastStage(
lastStageName: string,
fieldList: DialectFieldList
fieldList: DialectFieldList,
orderBy: OrderBy[] | undefined
): string;
abstract sqlCreateTableAsSelect(tableName: string, sql: string): string;

Expand Down
22 changes: 20 additions & 2 deletions packages/malloy/src/dialect/duckdb/duckdb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
MeasureTimeExpr,
LeafAtomicTypeDef,
TD,
OrderBy,
} from '../../model/malloy_types';
import {indent} from '../../model/utils';
import {
Expand Down Expand Up @@ -247,11 +248,28 @@ export class DuckDBDialect extends PostgresBase {

sqlCreateFunctionCombineLastStage(
lastStageName: string,
dialectFieldList: DialectFieldList
dialectFieldList: DialectFieldList,
orderBy: OrderBy[] | undefined
): string {
let o = '';
if (orderBy) {
const clauses: string[] = [];
for (const c of orderBy) {
if (typeof c.field === 'string') {
clauses.push(`${c.field} ${c.dir || 'asc'}`);
} else {
clauses.push(
`${dialectFieldList[c.field].sqlOutputName} ${c.dir || 'asc'}`
);
}
}
if (clauses.length > 0) {
o = ` ORDER BY ${clauses.join(', ')}`;
}
}
return `SELECT LIST(STRUCT_PACK(${dialectFieldList
.map(d => this.sqlMaybeQuoteIdentifier(d.sqlOutputName))
.join(',')})) FROM ${lastStageName}\n`;
.join(',')})${o}) FROM ${lastStageName}\n`;
}

sqlSelectAliasAsStruct(
Expand Down
1 change: 0 additions & 1 deletion packages/malloy/src/lang/ast/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ export * from './query-properties/ordering';
export * from './query-properties/project-statement';
export * from './query-properties/qop-desc';
export * from './query-properties/sampling';
export * from './query-properties/top';
export * from './source-elements/named-source';
export * from './source-elements/query-source';
export * from './source-elements/sql-source';
Expand Down
114 changes: 76 additions & 38 deletions packages/malloy/src/lang/ast/query-builders/reduce-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,18 +26,23 @@ import {
FilterCondition,
PartialSegment,
PipeSegment,
QueryFieldDef,
QuerySegment,
ReduceSegment,
canOrderBy,
expressionIsAggregate,
expressionIsAnalytic,
hasExpression,
isPartialSegment,
isQuerySegment,
isReduceSegment,
isTemporalField,
} from '../../../model/malloy_types';

import {ErrorFactory} from '../error-factory';
import {SourceFieldSpace} from '../types/field-space';
import {FieldName, SourceFieldSpace} from '../types/field-space';
import {Limit} from '../query-properties/limit';
import {Ordering} from '../query-properties/ordering';
import {Top} from '../query-properties/top';
import {QueryProperty} from '../types/query-property';
import {QueryBuilder} from '../types/query-builder';
import {
Expand All @@ -52,8 +57,15 @@ import {
mergeCompositeFieldUsage,
} from '../../../model/composite_source_utils';

function queryFieldName(qf: QueryFieldDef): string {
if (qf.type === 'fieldref') {
return qf.path[qf.path.length - 1];
}
return qf.name;
}

export abstract class QuerySegmentBuilder implements QueryBuilder {
order?: Top | Ordering;
order?: Ordering;
limit?: number;
alwaysJoins: string[] = [];
abstract inputFS: QueryInputSpace;
Expand All @@ -68,25 +80,6 @@ export abstract class QuerySegmentBuilder implements QueryBuilder {
}
if (qp instanceof DefinitionList) {
this.resultFS.pushFields(...qp.list);
} else if (qp instanceof Top) {
if (this.limit) {
qp.logError(
'limit-already-specified',
'Query operation already limited'
);
} else {
this.limit = qp.limit;
}
if (qp.by) {
if (this.order) {
qp.logError(
'ordering-already-specified',
'Query operation is already sorted'
);
} else {
this.order = qp;
}
}
} else if (qp instanceof Limit) {
if (this.limit) {
qp.logError(
Expand Down Expand Up @@ -116,30 +109,21 @@ export abstract class QuerySegmentBuilder implements QueryBuilder {

refineFrom(from: PipeSegment | undefined, to: QuerySegment): void {
if (from && from.type !== 'index' && from.type !== 'raw') {
if (!this.order) {
if (from.orderBy) {
to.orderBy = from.orderBy;
} else if (from.by) {
to.by = from.by;
}
if (!this.limit && from.orderBy && !from.defaultOrderBy) {
to.orderBy = from.orderBy;
}
if (!this.limit && from.limit) {
to.limit = from.limit;
}
}

if (this.limit) {
to.limit = this.limit;
if (this.order) {
to.orderBy = this.order.getOrderBy(this.inputFS);
delete to.defaultOrderBy;
}

if (this.order instanceof Top) {
const topBy = this.order.getBy(this.inputFS);
if (topBy) {
to.by = topBy;
}
}
if (this.order instanceof Ordering) {
to.orderBy = this.order.getOrderBy(this.inputFS);
if (this.limit) {
to.limit = this.limit;
}

const oldFilters = from?.filterList || [];
Expand Down Expand Up @@ -196,6 +180,60 @@ export class ReduceBuilder extends QuerySegmentBuilder implements QueryBuilder {
const reduceSegment = this.resultFS.getQuerySegment(from);
this.refineFrom(from, reduceSegment);

if (reduceSegment.orderBy) {
// In the modern world, we will ONLY allow names and not numbers in order by lists
for (const by of reduceSegment.orderBy) {
if (typeof by.field === 'number') {
const by_field = reduceSegment.queryFields[by.field - 1];
by.field = queryFieldName(by_field);
}
}
}
if (reduceSegment.orderBy === undefined || reduceSegment.defaultOrderBy) {
// In the modern world, we will order all reduce segments with the default ordering
let usableDefaultOrderField: string | undefined;
for (const field of reduceSegment.queryFields) {
let fieldAggregate = false;
let fieldAnalytic = false;
let fieldType: string;
const fieldName = queryFieldName(field);
if (field.type === 'fieldref') {
const lookupPath = field.path.map(el => new FieldName(el));
const refField = this.inputFS.lookup(lookupPath).found;
if (refField) {
const typeDesc = refField.typeDesc();
fieldType = typeDesc.type;
fieldAggregate = expressionIsAggregate(typeDesc.expressionType);
fieldAnalytic = expressionIsAnalytic(typeDesc.expressionType);
} else {
continue;
}
} else {
fieldType = field.type;
fieldAggregate =
hasExpression(field) && expressionIsAggregate(field.expressionType);
fieldAnalytic =
hasExpression(field) && expressionIsAnalytic(field.expressionType);
}
if (isTemporalField(fieldType) || fieldAggregate) {
reduceSegment.defaultOrderBy = true;
reduceSegment.orderBy = [{field: fieldName, dir: 'desc'}];
usableDefaultOrderField = undefined;
break;
}
if (
canOrderBy(fieldType) &&
!fieldAnalytic &&
!usableDefaultOrderField
) {
usableDefaultOrderField = fieldName;
}
}
if (usableDefaultOrderField) {
reduceSegment.defaultOrderBy = true;
reduceSegment.orderBy = [{field: usableDefaultOrderField, dir: 'asc'}];
}
}
return reduceSegment;
}
}
97 changes: 0 additions & 97 deletions packages/malloy/src/lang/ast/query-properties/top.ts

This file was deleted.

10 changes: 3 additions & 7 deletions packages/malloy/src/lang/ast/view-elements/refine-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,9 @@ export function refine(
}

if (from.type !== 'index' && to.type !== 'index' && from.type !== 'raw') {
if (from.orderBy !== undefined || from.by !== undefined) {
if (to.orderBy === undefined && to.by === undefined) {
if (from.orderBy) {
to.orderBy = from.orderBy;
} else if (from.by) {
to.by = from.by;
}
if (from.orderBy !== undefined && !from.defaultOrderBy) {
if (to.orderBy === undefined || to.defaultOrderBy) {
to.orderBy = from.orderBy;
} else {
logTo.logError(
'ordering-overridden-in-refinement',
Expand Down
2 changes: 1 addition & 1 deletion packages/malloy/src/lang/grammar/MalloyParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ bySpec
;

topStatement
: TOP INTEGER_LITERAL bySpec?
: TOP INTEGER_LITERAL
;

indexElement
Expand Down
25 changes: 2 additions & 23 deletions packages/malloy/src/lang/malloy-to-ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1090,30 +1090,9 @@ export class MalloyToAST
return this.astAt(new ast.Ordering(orderList), pcx);
}

visitTopStatement(pcx: parse.TopStatementContext): ast.Top {
const byCx = pcx.bySpec();
visitTopStatement(pcx: parse.TopStatementContext): ast.Limit {
const topN = this.getNumber(pcx.INTEGER_LITERAL());
let top: ast.Top | undefined;
if (byCx) {
this.m4advisory(
byCx,
'top-by',
'by clause of top statement unupported. Use order_by instead'
);
const nameCx = byCx.fieldName();
if (nameCx) {
const name = this.getFieldName(nameCx);
top = new ast.Top(topN, name);
}
const exprCx = byCx.fieldExpr();
if (exprCx) {
top = new ast.Top(topN, this.getFieldExpr(exprCx));
}
}
if (!top) {
top = new ast.Top(topN, undefined);
}
return this.astAt(top, pcx);
return this.astAt(new ast.Limit(topN), pcx);
}

visitTopLevelQueryDefs(
Expand Down
Loading

0 comments on commit 3740e17

Please sign in to comment.