Skip to content

Commit a9267b4

Browse files
authored
Miss-counting one_to_many... (#1585)
* Interm Checkin * Skip the leafy test for now * add more complete tests, write the fix * Wow, concat in postgres sucks. * weird order of precedence with jsonb * A better fix for the problem.
1 parent 449c6c0 commit a9267b4

File tree

5 files changed

+162
-38
lines changed

5 files changed

+162
-38
lines changed

packages/malloy/src/dialect/postgres/postgres.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -248,16 +248,16 @@ export class PostgresDialect extends Dialect {
248248
isNested: boolean,
249249
_isArray: boolean
250250
): string {
251-
let ret = `${alias}->>'${fieldName}'`;
251+
let ret = `(${alias}->>'${fieldName}')`;
252252
if (isNested) {
253253
switch (fieldType) {
254254
case 'string':
255255
break;
256256
case 'number':
257-
ret = `(${ret})::double precision`;
257+
ret = `${ret}::double precision`;
258258
break;
259259
case 'struct':
260-
ret = `(${ret})::jsonb`;
260+
ret = `${ret}::jsonb`;
261261
break;
262262
}
263263
return ret;

packages/malloy/src/lang/ast/expressions/expr-aggregate-function.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
import {
2525
AggregateFragment,
26+
AggregateFunctionType,
2627
expressionIsAggregate,
2728
FieldDef,
2829
FieldValueType,
@@ -50,7 +51,7 @@ export abstract class ExprAggregateFunction extends ExpressionDef {
5051
explicitSource?: boolean;
5152
legalChildTypes = [FT.numberT];
5253
constructor(
53-
readonly func: string,
54+
readonly func: AggregateFunctionType,
5455
expr?: ExpressionDef,
5556
explicitSource?: boolean
5657
) {

packages/malloy/src/model/malloy_query.ts

Lines changed: 88 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import {Dialect, DialectFieldList, getDialect} from '../dialect';
2525
import {StandardSQLDialect} from '../dialect/standardsql/standardsql';
2626
import {
2727
AggregateFragment,
28+
AggregateFunctionType,
2829
CompiledQuery,
2930
DialectFragment,
3031
Expr,
@@ -127,6 +128,21 @@ interface OutputPipelinedSQL {
127128
pipelineSQL: string;
128129
}
129130

131+
// Track the times we might need a unique key
132+
type UniqueKeyPossibleUse = AggregateFunctionType | 'generic_aggregate';
133+
134+
class UniqueKeyUse extends Set<UniqueKeyPossibleUse> {
135+
add_use(k: UniqueKeyPossibleUse | undefined) {
136+
if (k !== undefined) {
137+
return this.add(k);
138+
}
139+
}
140+
141+
hasAsymetricFunctions(): boolean {
142+
return this.has('sum') || this.has('avg') || this.has('count');
143+
}
144+
}
145+
130146
class StageWriter {
131147
withs: string[] = [];
132148
udfs: string[] = [];
@@ -307,8 +323,8 @@ class QueryField extends QueryNode {
307323
this.fieldDef = fieldDef;
308324
}
309325

310-
mayNeedUniqueKey(): boolean {
311-
return false;
326+
uniqueKeyPossibleUse(): UniqueKeyPossibleUse | undefined {
327+
return undefined;
312328
}
313329

314330
getJoinableParent(): QueryStruct {
@@ -750,16 +766,31 @@ class QueryField extends QueryNode {
750766
): string {
751767
let func = 'COUNT(';
752768
let thing = '1';
753-
const distinctKeySQL = this.generateDistinctKeyIfNecessary(
754-
resultSet,
755-
context,
756-
expr.structPath
757-
);
758-
if (distinctKeySQL) {
769+
770+
let struct = context;
771+
if (expr.structPath) {
772+
struct = this.parent.root().getStructByName(expr.structPath);
773+
}
774+
const joinName = struct.getJoinableParent().getIdentifier();
775+
const join = resultSet.root().joins.get(joinName);
776+
if (!join) {
777+
throw new Error(`Join ${joinName} not found in result set`);
778+
}
779+
if (!join.leafiest || join.makeUniqueKey) {
759780
func = 'COUNT(DISTINCT';
760-
thing = distinctKeySQL;
781+
thing = struct.getDistinctKey().generateExpression(resultSet);
761782
}
762783

784+
// const distinctKeySQL = this.generateDistinctKeyIfNecessary(
785+
// resultSet,
786+
// context,
787+
// expr.structPath
788+
// );
789+
// if (distinctKeySQL) {
790+
// func = 'COUNT(DISTINCT';
791+
// thing = distinctKeySQL;
792+
// }
793+
763794
// find the structDef and return the path to the field...
764795
if (state.whereSQL) {
765796
return `${func} CASE WHEN ${state.whereSQL} THEN ${thing} END)`;
@@ -1149,13 +1180,17 @@ class QueryFieldDistinctKey extends QueryAtomicField {
11491180
const parentKey = this.parent.parent
11501181
?.getDistinctKey()
11511182
.generateExpression(resultSet);
1152-
return `CONCAT(${parentKey}, 'x', ${this.parent.dialect.sqlFieldReference(
1153-
this.parent.getIdentifier(),
1154-
'__row_id',
1155-
'string',
1156-
true,
1157-
false
1158-
)})`;
1183+
return this.parent.dialect.concat(
1184+
parentKey || '', // shouldn't have to do this...
1185+
"'x'",
1186+
this.parent.dialect.sqlFieldReference(
1187+
this.parent.getIdentifier(),
1188+
'__row_id',
1189+
'string',
1190+
true,
1191+
false
1192+
)
1193+
);
11591194
} else {
11601195
// return this.parent.getIdentifier() + "." + "__distinct_key";
11611196
return this.parent.dialect.sqlFieldReference(
@@ -1499,7 +1534,7 @@ class FieldInstanceResult implements FieldInstance {
14991534
addStructToJoin(
15001535
qs: QueryStruct,
15011536
query: QueryQuery,
1502-
mayNeedUniqueKey: boolean,
1537+
uniqueKeyPossibleUse: UniqueKeyPossibleUse | undefined,
15031538
joinStack: string[]
15041539
): void {
15051540
const name = qs.getIdentifier();
@@ -1509,9 +1544,9 @@ class FieldInstanceResult implements FieldInstance {
15091544
return;
15101545
}
15111546

1512-
let join;
1547+
let join: JoinInstance | undefined;
15131548
if ((join = this.root().joins.get(name))) {
1514-
join.mayNeedUniqueKey ||= mayNeedUniqueKey;
1549+
join.uniqueKeyPossibleUses.add_use(uniqueKeyPossibleUse);
15151550
return;
15161551
}
15171552

@@ -1520,7 +1555,7 @@ class FieldInstanceResult implements FieldInstance {
15201555
const parentStruct = qs.parent?.getJoinableParent();
15211556
if (parentStruct) {
15221557
// add dependant expressions first...
1523-
this.addStructToJoin(parentStruct, query, false, joinStack);
1558+
this.addStructToJoin(parentStruct, query, undefined, joinStack);
15241559
parent = this.root().joins.get(parentStruct.getIdentifier());
15251560
}
15261561

@@ -1542,15 +1577,15 @@ class FieldInstanceResult implements FieldInstance {
15421577
join = new JoinInstance(qs, name, parent);
15431578
this.root().joins.set(name, join);
15441579
}
1545-
join.mayNeedUniqueKey ||= mayNeedUniqueKey;
1580+
join.uniqueKeyPossibleUses.add_use(uniqueKeyPossibleUse);
15461581
}
15471582

15481583
findJoins(query: QueryQuery) {
15491584
for (const dim of this.fields()) {
15501585
this.addStructToJoin(
15511586
dim.f.getJoinableParent(),
15521587
query,
1553-
dim.f.mayNeedUniqueKey(),
1588+
dim.f.uniqueKeyPossibleUse(),
15541589
[]
15551590
);
15561591
}
@@ -1667,7 +1702,7 @@ class FieldInstanceResultRoot extends FieldInstanceResult {
16671702
// look at all the fields again in the structs in the query
16681703

16691704
calculateSymmetricAggregates() {
1670-
let leafiest;
1705+
let leafiest: string | undefined;
16711706
for (const [name, join] of this.joins) {
16721707
// first join is by default the
16731708
const relationship = join.parentRelationship();
@@ -1702,8 +1737,23 @@ class FieldInstanceResultRoot extends FieldInstanceResult {
17021737
// Nested Unique keys are dependant on the primary key of the parent
17031738
// and the table.
17041739
for (const [_name, join] of this.joins) {
1705-
// don't need keys on leafiest
1706-
if (!join.leafiest && join.mayNeedUniqueKey) {
1740+
// in a one_to_many join we need a key to count there may be a failed
1741+
// match in a left join.
1742+
// users -> {
1743+
// group_by: user_id
1744+
// aggregate: order_count is orders.count()
1745+
if (join.leafiest) {
1746+
if (
1747+
join.parent !== null &&
1748+
join.uniqueKeyPossibleUses.has('count') &&
1749+
!join.queryStruct.primaryKey()
1750+
) {
1751+
join.makeUniqueKey = true;
1752+
}
1753+
} else if (
1754+
!join.leafiest &&
1755+
join.uniqueKeyPossibleUses.hasAsymetricFunctions()
1756+
) {
17071757
let j: JoinInstance | undefined = join;
17081758
while (j) {
17091759
if (!j.queryStruct.primaryKey()) {
@@ -1721,7 +1771,7 @@ class FieldInstanceResultRoot extends FieldInstanceResult {
17211771
}
17221772

17231773
class JoinInstance {
1724-
mayNeedUniqueKey = false;
1774+
uniqueKeyPossibleUses: UniqueKeyUse = new UniqueKeyUse();
17251775
makeUniqueKey = false;
17261776
leafiest = false;
17271777
joinFilterConditions?: QueryFieldBoolean[];
@@ -2133,7 +2183,7 @@ class QueryQuery extends QueryField {
21332183
resultStruct: FieldInstanceResult,
21342184
context: QueryStruct,
21352185
path: string,
2136-
mayNeedUniqueKey: boolean,
2186+
uniqueKeyPossibleUse: UniqueKeyPossibleUse | undefined,
21372187
joinStack: string[]
21382188
) {
21392189
const node = context.getFieldByName(path);
@@ -2150,7 +2200,7 @@ class QueryQuery extends QueryField {
21502200
.addStructToJoin(
21512201
struct.getJoinableParent(),
21522202
this,
2153-
mayNeedUniqueKey,
2203+
uniqueKeyPossibleUse,
21542204
joinStack
21552205
);
21562206
}
@@ -2202,7 +2252,7 @@ class QueryQuery extends QueryField {
22022252
.addStructToJoin(
22032253
field.parent.getJoinableParent(),
22042254
this,
2205-
false,
2255+
undefined,
22062256
joinStack
22072257
);
22082258
// this.addDependantPath(resultStruct, field.parent, expr.path, false);
@@ -2261,12 +2311,17 @@ class QueryQuery extends QueryField {
22612311
resultStruct,
22622312
context,
22632313
expr.structPath,
2264-
true,
2314+
expr.function,
22652315
joinStack
22662316
);
22672317
} else {
22682318
// we are doing a sum in the root. It may need symetric aggregates
2269-
resultStruct.addStructToJoin(context, this, true, joinStack);
2319+
resultStruct.addStructToJoin(
2320+
context,
2321+
this,
2322+
expr.function,
2323+
joinStack
2324+
);
22702325
}
22712326
}
22722327
this.addDependantExpr(resultStruct, context, expr.e, joinStack);
@@ -2276,7 +2331,7 @@ class QueryQuery extends QueryField {
22762331
resultStruct,
22772332
context,
22782333
expr.structPath,
2279-
true,
2334+
'generic_aggregate',
22802335
joinStack
22812336
);
22822337
}
@@ -2408,7 +2463,7 @@ class QueryQuery extends QueryField {
24082463
prepare(_stageWriter: StageWriter | undefined) {
24092464
if (!this.prepared) {
24102465
this.expandFields(this.rootResult);
2411-
this.rootResult.addStructToJoin(this.parent, this, false, []);
2466+
this.rootResult.addStructToJoin(this.parent, this, undefined, []);
24122467
this.rootResult.findJoins(this);
24132468
this.rootResult.calculateSymmetricAggregates();
24142469
this.prepared = true;

packages/malloy/src/model/malloy_types.ts

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,9 +186,17 @@ export function isDialectFragment(f: Fragment): f is DialectFragment {
186186
return (f as DialectFragment)?.type === 'dialect';
187187
}
188188

189+
export type AggregateFunctionType =
190+
| 'sum'
191+
| 'avg'
192+
| 'count'
193+
| 'count_distinct'
194+
| 'max'
195+
| 'min';
196+
189197
export interface AggregateFragment {
190198
type: 'aggregate';
191-
function: string;
199+
function: AggregateFunctionType;
192200
e: Expr;
193201
structPath?: string;
194202
}

test/src/databases/all/nomodel.spec.ts

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,66 @@ runtimes.runtimeMap.forEach((runtime, databaseName) => {
410410
});
411411
});
412412

413+
it(`leafy count - ${databaseName}`, async () => {
414+
// in a joined table when the joined is leafiest
415+
// we need to make sure we don't count rows that
416+
// don't match the join.
417+
await expect(`
418+
source: am_states is ${databaseName}.table('malloytest.state_facts') -> {
419+
select: *
420+
where: state ~ r'^(A|M)'
421+
}
422+
423+
source: states is ${databaseName}.table('malloytest.state_facts') extend {
424+
join_many: am_states on state=am_states.state
425+
}
426+
427+
run: states -> {
428+
where: state = 'CA'
429+
aggregate:
430+
leafy_count is am_states.count()
431+
root_count is count()
432+
}
433+
`).malloyResultMatches(runtime, {
434+
leafy_count: 0,
435+
root_count: 1,
436+
});
437+
});
438+
439+
it(`leafy nested count - ${databaseName}`, async () => {
440+
// in a joined table when the joined is leafiest
441+
// we need to make sure we don't count rows that
442+
// don't match the join.
443+
await expect(`
444+
source: am_states is ${databaseName}.table('malloytest.state_facts') -> {
445+
group_by: state
446+
where: state ~ r'^(A|M)'
447+
nest: nested_state is {
448+
group_by: state
449+
}
450+
}
451+
452+
source: states is ${databaseName}.table('malloytest.state_facts') extend {
453+
join_many: am_states on state=am_states.state
454+
}
455+
456+
run: states -> {
457+
where: state = 'CA'
458+
group_by:
459+
state
460+
am_state is am_states.state
461+
aggregate:
462+
leafy_count is am_states.nested_state.count()
463+
root_count is count()
464+
}
465+
`).malloyResultMatches(runtime, {
466+
leafy_count: 0,
467+
root_count: 1,
468+
state: 'CA',
469+
am_state: null,
470+
});
471+
});
472+
413473
it(`basic index - ${databaseName}`, async () => {
414474
// Make sure basic indexing works.
415475
await expect(`

0 commit comments

Comments
 (0)