Skip to content

Commit f694255

Browse files
authored
SQL: Support GROUP BY and ORDER BY for NULL types. (#16252)
* SQL: Support GROUP BY and ORDER BY for NULL types. Treats SQL NULL types as strings at the native layer. * Update tests. * Fixes. * Fix tests. * Fix tests. * Fix test. * Add back NOT_ENOUGH_RULES. * Fix test. * Fix test. * Update test case. * Update quidem test.
1 parent 1db2fb7 commit f694255

File tree

12 files changed

+271
-49
lines changed

12 files changed

+271
-49
lines changed

extensions-core/datasketches/src/test/java/org/apache/druid/query/aggregation/datasketches/tuple/sql/ArrayOfDoublesSketchSqlAggregatorTest.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -399,16 +399,16 @@ public void testNullInputs()
399399
ImmutableList.of(
400400
new ArrayOfDoublesSketchToMetricsSumEstimatePostAggregator(
401401
"p1",
402-
expressionPostAgg("p0", "null", null)
402+
expressionPostAgg("p0", "null", ColumnType.STRING)
403403
),
404404
new ArrayOfDoublesSketchSetOpPostAggregator(
405405
"p4",
406406
ArrayOfDoublesSketchOperations.Operation.UNION.name(),
407407
null,
408408
null,
409409
ImmutableList.of(
410-
expressionPostAgg("p2", "null", null),
411-
expressionPostAgg("p3", "null", null)
410+
expressionPostAgg("p2", "null", ColumnType.STRING),
411+
expressionPostAgg("p3", "null", ColumnType.STRING)
412412
)
413413
),
414414
new ArrayOfDoublesSketchSetOpPostAggregator(
@@ -417,7 +417,7 @@ public void testNullInputs()
417417
null,
418418
null,
419419
ImmutableList.of(
420-
expressionPostAgg("p5", "null", null),
420+
expressionPostAgg("p5", "null", ColumnType.STRING),
421421
new FieldAccessPostAggregator("p6", "a1")
422422
)
423423
),
@@ -428,7 +428,7 @@ public void testNullInputs()
428428
null,
429429
ImmutableList.of(
430430
new FieldAccessPostAggregator("p8", "a1"),
431-
expressionPostAgg("p9", "null", null)
431+
expressionPostAgg("p9", "null", ColumnType.STRING)
432432
)
433433
)
434434
)

sql/src/main/java/org/apache/druid/sql/calcite/expression/Expressions.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,11 @@ private static DimFilter toSimpleLeafFilter(
717717

718718
// Numeric lhs needs a numeric comparison.
719719
final StringComparator comparator = Calcites.getStringComparatorForRelDataType(lhs.getType());
720+
if (comparator == null) {
721+
// Type is not comparable.
722+
return null;
723+
}
724+
720725
final BoundRefKey boundRefKey = new BoundRefKey(column, extractionFn, comparator);
721726
final DimFilter filter;
722727

sql/src/main/java/org/apache/druid/sql/calcite/planner/Calcites.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ public static ColumnType getValueTypeForRelDataTypeFull(final RelDataType type)
206206
return ColumnType.DOUBLE;
207207
} else if (isLongType(sqlTypeName)) {
208208
return ColumnType.LONG;
209-
} else if (isStringType(sqlTypeName)) {
209+
} else if (isStringType(sqlTypeName) || sqlTypeName == SqlTypeName.NULL) {
210210
return ColumnType.STRING;
211211
} else if (SqlTypeName.OTHER == sqlTypeName) {
212212
if (type instanceof RowSignatures.ComplexSqlType) {
@@ -247,12 +247,22 @@ public static boolean isLongType(SqlTypeName sqlTypeName)
247247
}
248248

249249
/**
250-
* Returns the natural StringComparator associated with the RelDataType
250+
* Returns the natural StringComparator associated with the RelDataType, or null if the type is not convertible to
251+
* {@link ColumnType} by {@link #getColumnTypeForRelDataType(RelDataType)}.
251252
*/
253+
@Nullable
252254
public static StringComparator getStringComparatorForRelDataType(RelDataType dataType)
253255
{
254-
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
255-
return getStringComparatorForValueType(valueType);
256+
if (dataType.getSqlTypeName() == SqlTypeName.NULL) {
257+
return StringComparators.NATURAL;
258+
} else {
259+
final ColumnType valueType = getColumnTypeForRelDataType(dataType);
260+
if (valueType == null) {
261+
return null;
262+
}
263+
264+
return getStringComparatorForValueType(valueType);
265+
}
256266
}
257267

258268
/**

sql/src/test/java/org/apache/druid/sql/calcite/CalciteArraysQueryTest.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7331,7 +7331,13 @@ public void testNullArray()
73317331
newScanQueryBuilder()
73327332
.dataSource(CalciteTests.ARRAYS_DATASOURCE)
73337333
.intervals(querySegmentSpec(Filtration.eternity()))
7334-
.virtualColumns(expressionVirtualColumn("v0", "(\"arrayLongNulls\" == array(null,null))", ColumnType.LONG))
7334+
.virtualColumns(
7335+
expressionVirtualColumn(
7336+
"v0",
7337+
"(\"arrayLongNulls\" == CAST(array(null,null), 'ARRAY<LONG>'))",
7338+
ColumnType.LONG
7339+
)
7340+
)
73357341
.columns("v0")
73367342
.columnTypes(ColumnType.LONG)
73377343
.resultFormat(ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST)

sql/src/test/java/org/apache/druid/sql/calcite/CalciteQueryTest.java

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6198,6 +6198,114 @@ public void testCountStarWithTimeInIntervalFilterLosAngeles()
61986198
);
61996199
}
62006200

6201+
@Test
6202+
public void testGroupByNullType()
6203+
{
6204+
// Cannot vectorize due to null constant expression.
6205+
cannotVectorize();
6206+
testQuery(
6207+
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1",
6208+
ImmutableList.of(
6209+
new GroupByQuery.Builder()
6210+
.setDataSource(CalciteTests.DATASOURCE1)
6211+
.setInterval(querySegmentSpec(Filtration.eternity()))
6212+
.setGranularity(Granularities.ALL)
6213+
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
6214+
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
6215+
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
6216+
.setContext(QUERY_CONTEXT_DEFAULT)
6217+
.build()
6218+
),
6219+
ImmutableList.of(
6220+
new Object[]{null, 6L}
6221+
)
6222+
);
6223+
}
6224+
6225+
@Test
6226+
public void testOrderByNullType()
6227+
{
6228+
testQuery(
6229+
// Order on subquery, since the native engine doesn't currently support ordering when selecting directly
6230+
// from a table.
6231+
"SELECT dim1, NULL as nullcol FROM (SELECT DISTINCT dim1 FROM druid.foo LIMIT 1) ORDER BY 2",
6232+
ImmutableList.of(
6233+
WindowOperatorQueryBuilder
6234+
.builder()
6235+
.setDataSource(
6236+
new TopNQueryBuilder()
6237+
.dataSource(CalciteTests.DATASOURCE1)
6238+
.intervals(querySegmentSpec(Filtration.eternity()))
6239+
.dimension(new DefaultDimensionSpec("dim1", "d0", ColumnType.STRING))
6240+
.threshold(1)
6241+
.metric(new DimensionTopNMetricSpec(null, StringComparators.LEXICOGRAPHIC))
6242+
.postAggregators(expressionPostAgg("s0", "null", ColumnType.STRING))
6243+
.context(QUERY_CONTEXT_DEFAULT)
6244+
.build()
6245+
)
6246+
.setSignature(
6247+
RowSignature.builder()
6248+
.add("d0", ColumnType.STRING)
6249+
.add("s0", ColumnType.STRING)
6250+
.build()
6251+
)
6252+
.setOperators(
6253+
OperatorFactoryBuilders.naiveSortOperator("s0", ColumnWithDirection.Direction.ASC)
6254+
)
6255+
.setLeafOperators(
6256+
OperatorFactoryBuilders
6257+
.scanOperatorFactoryBuilder()
6258+
.setOffsetLimit(0, Long.MAX_VALUE)
6259+
.setProjectedColumns("d0", "s0")
6260+
.build()
6261+
)
6262+
.build()
6263+
),
6264+
ImmutableList.of(
6265+
new Object[]{"", null}
6266+
)
6267+
);
6268+
}
6269+
6270+
@Test
6271+
public void testGroupByOrderByNullType()
6272+
{
6273+
// Cannot vectorize due to null constant expression.
6274+
cannotVectorize();
6275+
6276+
testQuery(
6277+
"SELECT NULL as nullcol, COUNT(*) FROM druid.foo GROUP BY 1 ORDER BY 1",
6278+
ImmutableList.of(
6279+
new GroupByQuery.Builder()
6280+
.setDataSource(CalciteTests.DATASOURCE1)
6281+
.setInterval(querySegmentSpec(Filtration.eternity()))
6282+
.setGranularity(Granularities.ALL)
6283+
.setVirtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
6284+
.setDimensions(dimensions(new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
6285+
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
6286+
.setLimitSpec(
6287+
queryFramework().engine().featureAvailable(EngineFeature.GROUPBY_IMPLICITLY_SORTS)
6288+
? NoopLimitSpec.instance()
6289+
: new DefaultLimitSpec(
6290+
ImmutableList.of(
6291+
new OrderByColumnSpec(
6292+
"d0",
6293+
Direction.ASCENDING,
6294+
StringComparators.NATURAL
6295+
)
6296+
),
6297+
Integer.MAX_VALUE
6298+
)
6299+
)
6300+
.setContext(QUERY_CONTEXT_DEFAULT)
6301+
.build()
6302+
),
6303+
ImmutableList.of(
6304+
new Object[]{null, 6L}
6305+
)
6306+
);
6307+
}
6308+
62016309
@Test
62026310
public void testCountStarWithTimeInIntervalFilterInvalidInterval()
62036311
{

sql/src/test/java/org/apache/druid/sql/calcite/CalciteSelectQueryTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ public void testValuesContainingNull()
168168
ImmutableList.of(new Object[]{null, "United States"}),
169169
RowSignature
170170
.builder()
171-
.add("EXPR$0", null)
171+
.add("EXPR$0", ColumnType.STRING)
172172
.add("EXPR$1", ColumnType.STRING)
173173
.build()
174174
)

sql/src/test/java/org/apache/druid/sql/calcite/CalciteTableAppendTest.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,7 @@ public void testUnion()
4747
.columnTypes(ColumnType.STRING, ColumnType.STRING)
4848
.context(QUERY_CONTEXT_DEFAULT)
4949
.resultFormat(ResultFormat.RESULT_FORMAT_COMPACTED_LIST)
50-
.virtualColumns(
51-
expressionVirtualColumn("v0", "null", null)
52-
)
50+
.virtualColumns(expressionVirtualColumn("v0", "null", ColumnType.STRING))
5351
.build(),
5452
Druids.newScanQueryBuilder()
5553
.dataSource(CalciteTests.DATASOURCE3)

sql/src/test/java/org/apache/druid/sql/calcite/DecoupledPlanningCalciteQueryTest.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.apache.druid.sql.calcite;
2121

2222
import org.apache.druid.sql.calcite.NotYetSupported.NotYetSupportedProcessor;
23+
import org.junit.jupiter.api.Test;
2324
import org.junit.jupiter.api.extension.ExtendWith;
2425
import org.junit.jupiter.api.extension.RegisterExtension;
2526

@@ -34,4 +35,12 @@ protected QueryTestBuilder testBuilder()
3435
{
3536
return decoupledExtension.testBuilder();
3637
}
38+
39+
@Override
40+
@Test
41+
@NotYetSupported(NotYetSupported.Modes.NOT_ENOUGH_RULES)
42+
public void testOrderByNullType()
43+
{
44+
super.testOrderByNullType();
45+
}
3746
}

sql/src/test/java/org/apache/druid/sql/calcite/NotYetSupported.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@
7777
enum Modes
7878
{
7979
// @formatter:off
80+
NOT_ENOUGH_RULES(DruidException.class, "There are not enough rules to produce a node"),
8081
DISTINCT_AGGREGATE_NOT_SUPPORTED(DruidException.class, "DISTINCT is not supported"),
8182
EXPRESSION_NOT_GROUPED(DruidException.class, "Expression '[a-z]+' is not being grouped"),
8283
NULLS_FIRST_LAST(DruidException.class, "NULLS (FIRST|LAST)"),

sql/src/test/java/org/apache/druid/sql/calcite/expression/ExpressionsTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2800,7 +2800,7 @@ public void testCalciteLiteralToDruidLiteral()
28002800
);
28012801

28022802
assertDruidLiteral(
2803-
new DruidLiteral(null, null),
2803+
new DruidLiteral(ExpressionType.STRING, null),
28042804
Expressions.calciteLiteralToDruidLiteral(
28052805
plannerContext,
28062806
rexBuilder.makeNullLiteral(rexBuilder.getTypeFactory().createSqlType(SqlTypeName.NULL))

0 commit comments

Comments
 (0)