Skip to content

Commit

Permalink
[CALCITE-6350] Unexpected result from UNION with literals expression
Browse files Browse the repository at this point in the history
Signed-off-by: Mihai Budiu <mbudiu@feldera.com>
  • Loading branch information
mihaibudiu committed Nov 5, 2024
1 parent 7fb0fc2 commit c7cace6
Show file tree
Hide file tree
Showing 25 changed files with 203 additions and 160 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.dialect.BigQuerySqlDialect;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.parser.babel.SqlBabelParserImpl;
import org.apache.calcite.sql.pretty.SqlPrettyWriter;
Expand Down Expand Up @@ -116,6 +117,8 @@ public static void main(String[] args) throws Exception {
.with(CalciteConnectionProperty.CONFORMANCE,
SqlConformanceEnum.BABEL)
.with(CalciteConnectionProperty.LENIENT_OPERATOR_LOOKUP, true)
.with(CalciteConnectionProperty.TYPE_SYSTEM,
BigQuerySqlDialect.class.getName() + "#TYPE_SYSTEM")
.with(
ConnectionFactories.addType("DATETIME", typeFactory ->
typeFactory.createSqlType(SqlTypeName.TIMESTAMP)))
Expand Down
24 changes: 12 additions & 12 deletions babel/src/test/resources/sql/big-query.iq
Original file line number Diff line number Diff line change
Expand Up @@ -1034,9 +1034,9 @@ FROM

SELECT
email,
REGEXP_CONTAINS(email, '^([\w.+-]+@foo\.com|[\w.+-]+@bar\.org)\s+$')
REGEXP_CONTAINS(email, '^([\w.+-]+@foo\.com|[\w.+-]+@bar\.org)\s*$')
AS valid_email_address,
REGEXP_CONTAINS(email, '^[\w.+-]+@foo\.com|[\w.+-]+@bar\.org\s+$')
REGEXP_CONTAINS(email, '^[\w.+-]+@foo\.com|[\w.+-]+@bar\.org\s*$')
AS without_parentheses
FROM
(SELECT
Expand All @@ -1046,11 +1046,11 @@ FROM
+----------------+---------------------+---------------------+
| email | valid_email_address | without_parentheses |
+----------------+---------------------+---------------------+
| a@foo.com | true | true |
| a@foo.computer | false | true |
| b@bar.org | true | true |
| !b@bar.org | false | true |
| c@buz.net | false | false |
| !b@bar.org | false | true |
| a@foo.com | true | true |
| b@bar.org | true | true |
+----------------+---------------------+---------------------+
(5 rows)

Expand Down Expand Up @@ -2133,13 +2133,13 @@ WITH Recipes AS
SELECT 'Ham scramble', 'Steak avocado salad', 'Tomato pasta' UNION ALL
SELECT 'Avocado toast', 'Tomato soup', 'Blueberry salmon' UNION ALL
SELECT 'Corned beef hash', 'Lentil potato soup', 'Glazed ham')
SELECT * FROM Recipes WHERE CONTAINS_SUBSTR((Lunch, Dinner), 'potato');
+--------------------+-------------------------+------------------+
| Breakfast | Lunch | Dinner |
+--------------------+-------------------------+------------------+
| Blueberry pancakes | Egg salad sandwich | Potato dumplings |
| Corned beef hash | Lentil potato soup | Glazed ham |
+--------------------+-------------------------+------------------+
SELECT *, LENGTH(lunch) AS LEN FROM Recipes WHERE CONTAINS_SUBSTR((Lunch, Dinner), 'potato');
+--------------------+--------------------+------------------+-----+
| Breakfast | Lunch | Dinner | LEN |
+--------------------+--------------------+------------------+-----+
| Blueberry pancakes | Egg salad sandwich | Potato dumplings | 18 |
| Corned beef hash | Lentil potato soup | Glazed ham | 18 |
+--------------------+--------------------+------------------+-----+
(2 rows)

!ok
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -553,9 +553,12 @@ private static Expression checkExpressionPadTruncate(
<= 0) {
truncate = false;
}
// If this is a widening cast, no need to pad.
if (SqlTypeUtil.comparePrecision(sourcePrecision, targetPrecision)
>= 0) {
// If this is a narrowing cast, no need to pad.
// However, conversion from VARCHAR(N) to CHAR(N) still requires padding,
// because VARCHAR(N) does not represent the spaces explicitly,
// whereas CHAR(N) does.
if ((SqlTypeUtil.comparePrecision(sourcePrecision, targetPrecision) >= 0)
&& (sourceType.getSqlTypeName() != SqlTypeName.VARCHAR)) {
pad = false;
}
// fall through
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.config.Lex;
import org.apache.calcite.config.NullCollation;
import org.apache.calcite.rel.type.DelegatingTypeSystem;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeSystem;
import org.apache.calcite.rex.RexCall;
Expand Down Expand Up @@ -76,6 +77,15 @@ public class BigQuerySqlDialect extends SqlDialect {

public static final SqlDialect DEFAULT = new BigQuerySqlDialect(DEFAULT_CONTEXT);

// The BigQuery type system differs from the DEFAULT type system in this respect,
// as evidenced by tests in big-query.iq
public static final RelDataTypeSystem TYPE_SYSTEM =
new DelegatingTypeSystem(RelDataTypeSystem.DEFAULT) {
@Override public boolean shouldConvertRaggedUnionTypesToVarying() {
return true;
}
};

private static final List<String> RESERVED_KEYWORDS =
ImmutableList.copyOf(
Arrays.asList("ALL", "AND", "ANY", "ARRAY", "AS", "ASC",
Expand Down Expand Up @@ -115,6 +125,10 @@ public BigQuerySqlDialect(SqlDialect.Context context) {
&& !SqlTypeUtil.isNumeric(call.type);
}

@Override public RelDataTypeSystem getTypeSystem() {
return TYPE_SYSTEM;
}

@Override public boolean supportsApproxCountDistinct() {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,19 +110,20 @@ public class SetopOperandTypeChecker implements SqlOperandTypeChecker {

final RelDataType type =
callBinding.getTypeFactory().leastRestrictive(columnIthTypes);
if (type == null) {
boolean coerced = false;
if (callBinding.isTypeCoercionEnabled()) {
for (int j = 0; j < callBinding.getOperandCount(); j++) {
TypeCoercion typeCoercion = validator.getTypeCoercion();
RelDataType widenType = typeCoercion.getWiderTypeFor(columnIthTypes, true);
if (null != widenType) {
coerced =
typeCoercion.rowTypeCoercion(callBinding.getScope(),
callBinding.operand(j), i, widenType) || coerced;
}
// If any of the types is different we need to insert a coercion.
boolean coerced = false;
if (callBinding.isTypeCoercionEnabled()) {
for (int j = 0; j < callBinding.getOperandCount(); j++) {
TypeCoercion typeCoercion = validator.getTypeCoercion();
RelDataType widenType = typeCoercion.getWiderTypeFor(columnIthTypes, true);
if (null != widenType) {
coerced =
typeCoercion.rowTypeCoercion(callBinding.getScope(),
callBinding.operand(j), i, widenType) || coerced;
}
}
}
if (type == null) {
if (!coerced) {
if (throwOnFailure) {
SqlNode field =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,10 @@ protected boolean needToCast(SqlValidatorScope scope, SqlNode node,
return false;
}

// No need to cast between char and varchar.
// No need to cast between char and unlimited varchar.
if (SqlTypeUtil.isCharacter(toType)
&& SqlTypeUtil.isCharacter(fromType)) {
&& SqlTypeUtil.isCharacter(fromType)
&& toType.getPrecision() == RelDataType.PRECISION_NOT_SPECIFIED) {
return false;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4395,7 +4395,7 @@ private SqlDialect nonOrdinalDialect() {
+ "FROM (SELECT \"product_id\", \"net_weight\"\n"
+ "FROM \"foodmart\".\"product\"\n"
+ "UNION ALL\n"
+ "SELECT \"product_id\", 0 AS \"net_weight\"\n"
+ "SELECT \"product_id\", 0E0 AS \"net_weight\"\n"
+ "FROM \"foodmart\".\"sales_fact_1997\") AS \"t1\"";
sql(query).ok(expected);
}
Expand Down Expand Up @@ -6545,7 +6545,7 @@ private void checkLiteral2(String expression, String expected) {
+ "UNION ALL\n"
+ "SELECT 2 `a`, 'yy' `b`) `t`";
final String expectedBigQuery = "SELECT a\n"
+ "FROM (SELECT 1 AS a, 'x ' AS b\n"
+ "FROM (SELECT 1 AS a, 'x' AS b\n"
+ "UNION ALL\n"
+ "SELECT 2 AS a, 'yy' AS b)";
final String expectedFirebolt = expectedPostgresql;
Expand Down Expand Up @@ -7658,9 +7658,9 @@ private void checkLiteral2(String expression, String expected) {
+ "SELECT \"product\".\"product_id\" AS \"account_id\", "
+ "CAST(NULL AS INTEGER) AS \"account_parent\", CAST(NULL AS VARCHAR"
+ "(30) CHARACTER SET \"ISO-8859-1\") AS \"account_description\", "
+ "CAST(\"product\".\"product_id\" AS VARCHAR CHARACTER SET "
+ "CAST(\"product\".\"product_id\" AS VARCHAR(30) CHARACTER SET "
+ "\"ISO-8859-1\") AS \"account_type\", "
+ "CAST(\"sales_fact_1997\".\"store_id\" AS VARCHAR CHARACTER SET \"ISO-8859-1\") AS "
+ "CAST(\"sales_fact_1997\".\"store_id\" AS VARCHAR(30) CHARACTER SET \"ISO-8859-1\") AS "
+ "\"account_rollup\", "
+ "CAST(NULL AS VARCHAR(255) CHARACTER SET \"ISO-8859-1\") AS \"Custom_Members\"\n"
+ "FROM \"foodmart\".\"product\"\n"
Expand Down Expand Up @@ -7777,7 +7777,7 @@ private void checkLiteral2(String expression, String expected) {
final String expectedDefault = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedDefaultX = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
Expand All @@ -7787,15 +7787,15 @@ private void checkLiteral2(String expression, String expected) {
+ "FROM (VALUES (0)) AS \"t\" (\"ZERO\")";
final String expectedHive = "INSERT INTO `SCOTT`.`DEPT` (`DEPTNO`, `DNAME`, `LOC`)\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedHiveX = "INSERT INTO `SCOTT`.`DEPT` (`DEPTNO`, `DNAME`, `LOC`)\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
+ "UNION ALL\n"
+ "SELECT 2, 'Eric', 'Washington'";
final String expectedMysql = "INSERT INTO `SCOTT`.`DEPT`"
+ " (`DEPTNO`, `DNAME`, `LOC`)\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedMysqlX = "INSERT INTO `SCOTT`.`DEPT`"
+ " (`DEPTNO`, `DNAME`, `LOC`)\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
Expand All @@ -7804,7 +7804,7 @@ private void checkLiteral2(String expression, String expected) {
final String expectedOracle = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedOracleX = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
Expand All @@ -7815,7 +7815,7 @@ private void checkLiteral2(String expression, String expected) {
final String expectedMssql = "INSERT INTO [SCOTT].[DEPT]"
+ " ([DEPTNO], [DNAME], [LOC])\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedMssqlX = "INSERT INTO [SCOTT].[DEPT]"
+ " ([DEPTNO], [DNAME], [LOC])\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
Expand All @@ -7826,7 +7826,7 @@ private void checkLiteral2(String expression, String expected) {
final String expectedCalcite = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "VALUES (1, 'Fred', 'San Francisco'),\n"
+ "(2, 'Eric', 'Washington ')";
+ "(2, 'Eric', 'Washington')";
final String expectedCalciteX = "INSERT INTO \"SCOTT\".\"DEPT\""
+ " (\"DEPTNO\", \"DNAME\", \"LOC\")\n"
+ "SELECT 1, 'Fred', 'San Francisco'\n"
Expand Down Expand Up @@ -8010,7 +8010,7 @@ private void checkLiteral2(String expression, String expected) {
+ "WHEN NOT MATCHED THEN INSERT (\"DEPTNO\", \"DNAME\", \"LOC\") "
+ "VALUES CAST(\"DEPT\".\"DEPTNO\" + 1 AS TINYINT),\n"
+ "'abc',\n"
+ "LOWER(\"DEPT\".\"DNAME\")";
+ "CAST(LOWER(\"DEPT\".\"DNAME\") AS VARCHAR(13) CHARACTER SET \"ISO-8859-1\")";
sql(sql3)
.schema(CalciteAssert.SchemaSpec.JDBC_SCOTT)
.ok(expected3);
Expand Down Expand Up @@ -8083,8 +8083,8 @@ private void checkLiteral2(String expression, String expected) {
+ "WHEN MATCHED THEN UPDATE SET \"DNAME\" = 'abc'\n"
+ "WHEN NOT MATCHED THEN INSERT (\"DEPTNO\", \"DNAME\", \"LOC\") "
+ "VALUES CAST(\"t0\".\"EXPR$0\" + 1 AS TINYINT),\n"
+ "LOWER(\"t0\".\"EXPR$1\"),\n"
+ "UPPER(\"t0\".\"EXPR$2\")";
+ "CAST(LOWER(\"t0\".\"EXPR$1\") AS VARCHAR(14) CHARACTER SET \"ISO-8859-1\"),\n"
+ "CAST(UPPER(\"t0\".\"EXPR$2\") AS VARCHAR(13) CHARACTER SET \"ISO-8859-1\")";
sql(sql7)
.schema(CalciteAssert.SchemaSpec.JDBC_SCOTT)
.ok(expected7);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public final RelOptFixture sql(String sql) {

final String sql = "(select name from dept union select ename from emp)\n"
+ "intersect (select fname from customer.contact)";
sql(sql).withPlanner(planner).check();
sql(sql).withPlanner(planner).checkUnchanged();
}

@Test void testRuleDescription() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,15 +127,15 @@ class JdbcAdapterTest {
+ " JdbcFilter(condition=[<($0, 10)])\n"
+ " JdbcTableScan(table=[[foodmart, store]])\n"
+ " JdbcToEnumerableConverter\n"
+ " JdbcProject(ENAME=[$1])\n"
+ " JdbcProject(EXPR$0=[CAST($1):VARCHAR(30)])\n"
+ " JdbcFilter(condition=[>(CAST($0):INTEGER NOT NULL, 10)])\n"
+ " JdbcTableScan(table=[[SCOTT, EMP]])")
.runs()
.enable(CalciteAssert.DB == CalciteAssert.DatabaseInstance.HSQLDB)
.planHasSql("SELECT \"store_name\"\n"
+ "FROM \"foodmart\".\"store\"\n"
+ "WHERE \"store_id\" < 10")
.planHasSql("SELECT \"ENAME\"\n"
.planHasSql("SELECT CAST(\"ENAME\" AS VARCHAR(30))\n"
+ "FROM \"SCOTT\".\"EMP\"\n"
+ "WHERE CAST(\"EMPNO\" AS INTEGER) > 10");
}
Expand Down Expand Up @@ -1344,7 +1344,8 @@ private LockWrapper exclusiveCleanDb(Connection c) throws SQLException {
+ " JdbcTableModify(table=[[foodmart, expense_fact]], operation=[MERGE],"
+ " updateColumnList=[[amount]], flattened=[false])\n"
+ " JdbcProject(STORE_ID=[$0], $f1=[666], $f2=[1997-01-01 00:00:00], $f3=[666],"
+ " $f4=['666'], $f5=[666], AMOUNT=[CAST($1):DECIMAL(10, 4) NOT NULL], store_id=[$2],"
+ " $f4=['666':VARCHAR(30)], $f5=[666], AMOUNT=[CAST($1):DECIMAL(10, 4) NOT NULL],"
+ " store_id=[$2],"
+ " account_id=[$3], exp_date=[$4], time_id=[$5], category_id=[$6], currency_id=[$7],"
+ " amount=[$8], AMOUNT0=[$1])\n"
+ " JdbcJoin(condition=[=($2, $0)], joinType=[left])\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,8 @@ final RelMetadataFixture sql(String sql) {
.assertColumnOriginIsEmpty();
}

@Test void testColumnOriginsUnion() {
@Test @Disabled("Plan contains casts, which inhibit metadata propagation")
void testColumnOriginsUnion() {
sql("select name from dept union all select ename from emp")
.assertColumnOriginDouble("DEPT", "NAME", "EMP", "ENAME", false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3219,7 +3219,10 @@ private void checkPushJoinThroughUnionOnRightDoesNotMatchSemiOrAntiJoin(JoinRelT
+ "select *\n"
+ "from (values (5)) as t(y)";
sql(sql)
.withRule(CoreRules.PROJECT_REMOVE, CoreRules.UNION_TO_VALUES)
.withRule(
CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_VALUES_MERGE,
CoreRules.UNION_TO_VALUES)
.withInSubQueryThreshold(0)
.check();
}
Expand Down Expand Up @@ -3267,7 +3270,9 @@ private void checkPushJoinThroughUnionOnRightDoesNotMatchSemiOrAntiJoin(JoinRelT
+ "select *\n"
+ "from (values (5)) as t(y)";
sql(sql)
.withRule(CoreRules.PROJECT_REMOVE, CoreRules.UNION_TO_VALUES)
.withRule(CoreRules.PROJECT_REMOVE,
CoreRules.PROJECT_VALUES_MERGE,
CoreRules.UNION_TO_VALUES)
.withInSubQueryThreshold(0)
.check();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ public static void checkActualAndReferenceFiles() {
return LOCAL_FIXTURE;
}

/** Test case for <a href="https://issues.apache.org/jira/browse/CALCITE-6350">[CALCITE-6350]
* Unexpected result from UNION with literals expression</a>. */
@Test void testUnionLiterals() {
final String sql = "select * from (select 'word' i union all select 'w' i) t1 where i='w'";
sql(sql).ok();
}

@Test void testDotLiteralAfterNestedRow() {
final String sql = "select ((1,2),(3,4,5)).\"EXPR$1\".\"EXPR$2\" from emp";
sql(sql).ok();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4399,7 +4399,7 @@ private void checkNegWindow(String s, String msg) {
expr("(1,2) in ((1,2), (3,4))")
.columnType("BOOLEAN NOT NULL");
expr("'medium' in (cast(null as varchar(10)), 'bc')")
.columnType("BOOLEAN");
.columnType("BOOLEAN NOT NULL");

// nullability depends on nullability of both sides
sql("select empno in (1, 2) from emp")
Expand Down Expand Up @@ -8752,7 +8752,7 @@ void testGroupExpressionEquivalenceParams() {
.withValidatorColumnReferenceExpansion(true)
.rewritesTo("SELECT `DEPT`.`NAME`\n"
+ "FROM `CATALOG`.`SALES`.`DEPT` AS `DEPT`\n"
+ "WHERE `DEPT`.`NAME` = 'Moonracer'\n"
+ "WHERE `DEPT`.`NAME` = CAST('Moonracer' AS VARCHAR(10) CHARACTER SET `ISO-8859-1`)\n"
+ "GROUP BY `DEPT`.`NAME`\n"
+ "HAVING SUM(`DEPT`.`DEPTNO`) > 3\n"
+ "ORDER BY `NAME`");
Expand All @@ -8771,7 +8771,7 @@ void testGroupExpressionEquivalenceParams() {
+ " `EMP`.`MGR`, `EMP`.`HIREDATE`, `EMP`.`SAL`, `EMP`.`COMM`,"
+ " `EMP`.`DEPTNO`, `EMP`.`SLACKER`\n"
+ "FROM `CATALOG`.`SALES`.`EMP` AS `EMP`) AS `E`\n"
+ "WHERE `E`.`ENAME` = 'Moonracer'\n"
+ "WHERE `E`.`ENAME` = CAST('Moonracer' AS VARCHAR(20) CHARACTER SET `ISO-8859-1`)\n"
+ "GROUP BY `E`.`ENAME`, `E`.`DEPTNO`, `E`.`SAL`\n"
+ "HAVING SUM(`E`.`DEPTNO`) > 3\n"
+ "ORDER BY `ENAME`, `E`.`DEPTNO`, `E`.`SAL`";
Expand All @@ -8789,7 +8789,7 @@ void testGroupExpressionEquivalenceParams() {
+ " order by unexpanded.deptno";
final String expectedSql = "SELECT `DEPT`.`DEPTNO`\n"
+ "FROM `CATALOG`.`SALES`.`DEPT` AS `DEPT`\n"
+ "WHERE `DEPT`.`NAME` = 'Moonracer'\n"
+ "WHERE `DEPT`.`NAME` = CAST('Moonracer' AS VARCHAR(10) CHARACTER SET `ISO-8859-1`)\n"
+ "GROUP BY `DEPT`.`DEPTNO`\n"
+ "HAVING SUM(`DEPT`.`DEPTNO`) > 0\n"
+ "ORDER BY `DEPT`.`DEPTNO`";
Expand Down
Loading

0 comments on commit c7cace6

Please sign in to comment.