From e8ed914180e9193c22a1e8842aedf964ef61e946 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 27 Dec 2022 17:35:10 +0800 Subject: [PATCH 1/3] Initial commit --- .../utils/velox/VeloxTestSettings.scala | 2 ++ .../org/apache/spark/sql/GlutenTestsTrait.scala | 12 ++---------- .../GlutenDateExpressionsSuite.scala | 17 +---------------- .../GlutenRegexpExpressionsSuite.scala | 15 --------------- 4 files changed, 5 insertions(+), 41 deletions(-) diff --git a/gluten-ut/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index d8a6513fb00e..8f45930f03d3 100644 --- a/gluten-ut/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -111,6 +111,8 @@ object VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenHashExpressionsSuite] enableSuite[GlutenCollectionExpressionsSuite] enableSuite[GlutenDateExpressionsSuite] + // Has exception in fallback execution when we use resultDF.collect in evaluation. + .exclude("DATE_FROM_UNIX_DATE", "TIMESTAMP_MICROS") enableSuite[GlutenDecimalExpressionSuite] enableSuite[GlutenStringFunctionsSuite] enableSuite[GlutenRegexpExpressionsSuite] diff --git a/gluten-ut/src/test/scala/org/apache/spark/sql/GlutenTestsTrait.scala b/gluten-ut/src/test/scala/org/apache/spark/sql/GlutenTestsTrait.scala index ae9e4bfc50e8..6dbe3233a1d9 100644 --- a/gluten-ut/src/test/scala/org/apache/spark/sql/GlutenTestsTrait.scala +++ b/gluten-ut/src/test/scala/org/apache/spark/sql/GlutenTestsTrait.scala @@ -148,7 +148,7 @@ trait GlutenTestsTrait extends GlutenTestsCommonTrait { def glutenCheckExpression(expression: Expression, expected: Any, - inputRow: InternalRow, justEvalExpr: Boolean = false): Unit = { + inputRow: InternalRow): Unit = { val df = if (inputRow != EmptyRow && inputRow != InternalRow.empty) { convertInternalRowToDataFrame(inputRow) } else { @@ -158,15 +158,7 @@ trait GlutenTestsTrait extends GlutenTestsCommonTrait { _spark.createDataFrame(_spark.sparkContext.parallelize(empData), schema) } val resultDF = df.select(Column(expression)) - val result = if (justEvalExpr) { - try { - expression.eval(inputRow) - } catch { - case e: Exception => fail(s"Exception evaluating $expression", e) - } - } else { - resultDF.collect() - } + val result = resultDF.collect() if (checkDataTypeSupported(expression) && expression.children.forall(checkDataTypeSupported)) { val projectTransformer = resultDF.queryExecution.executedPlan.collect { diff --git a/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala b/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala index d2e24b61e0d4..abae83c92736 100644 --- a/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala +++ b/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenDateExpressionsSuite.scala @@ -17,22 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} -import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone -import org.apache.spark.sql.{GlutenTestConstants, GlutenTestsTrait} +import org.apache.spark.sql.GlutenTestsTrait class GlutenDateExpressionsSuite extends DateExpressionsSuite with GlutenTestsTrait { - - override protected def checkEvaluation(expression: => Expression, - expected: Any, - inputRow: InternalRow = EmptyRow): Unit = { - val resolver = ResolveTimeZone - val expr = resolver.resolveTimeZones(expression) - assert(expr.resolved) - - val catalystValue = CatalystTypeConverters.convertToCatalyst(expected) - // Consistent with the evaluation approach in vanilla spark UT to avoid overflow issue - // in resultDF.collect() for some corner cases. - glutenCheckExpression(expr, catalystValue, inputRow, true) - } } diff --git a/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenRegexpExpressionsSuite.scala b/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenRegexpExpressionsSuite.scala index 9eeddada56e8..03f7b60888d3 100644 --- a/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenRegexpExpressionsSuite.scala +++ b/gluten-ut/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenRegexpExpressionsSuite.scala @@ -17,23 +17,8 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} -import org.apache.spark.sql.catalyst.analysis.ResolveTimeZone import org.apache.spark.sql.GlutenTestsTrait class GlutenRegexpExpressionsSuite extends RegexpExpressionsSuite with GlutenTestsTrait { - override protected def checkEvaluation(expression: => Expression, - expected: Any, - inputRow: InternalRow = EmptyRow): Unit = { - val resolver = ResolveTimeZone - val expr = resolver.resolveTimeZones(expression) - assert(expr.resolved) - - val catalystValue = CatalystTypeConverters.convertToCatalyst(expected) - // Consistent with the evaluation approach in vanilla spark UT to avoid overflow issue - // in resultDF.collect() for some corner cases. - glutenCheckExpression(expr, catalystValue, inputRow, justEvalExpr = true) - } - } From 9744b49901c143e237fcb87ac1e3275ed34c2b9d Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 30 Dec 2022 14:30:19 +0800 Subject: [PATCH 2/3] Exclude the test for Quarter in CH --- .../glutenproject/utils/clickhouse/ClickHouseTestSettings.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index c857fb9a2d4b..35e1eea79f3b 100644 --- a/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -77,7 +77,7 @@ object ClickHouseTestSettings extends BackendTestSettings { enableSuite[GlutenDateExpressionsSuite] .include( - "Quarter", + // "Quarter", // ch backend not support cast 'yyyy-MM-dd HH:mm:ss' as date32 "date_add", "date_sub", "datediff" From 054dce534ed9301a98a87440278aecff55e3edea Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 3 Jan 2023 09:28:58 +0800 Subject: [PATCH 3/3] Exclude two tests for CH --- .../utils/clickhouse/ClickHouseTestSettings.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 35e1eea79f3b..397e3e944660 100644 --- a/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -127,11 +127,11 @@ object ClickHouseTestSettings extends BackendTestSettings { "SPARK-34920: error class" ) - enableSuite[GlutenRegexpExpressionsSuite] - .include( - "SPLIT", - "RLIKE Regular Expression" - ) +// enableSuite[GlutenRegexpExpressionsSuite] +// .include( +// "SPLIT", +// "RLIKE Regular Expression" +// ) enableSuite[GlutenStringExpressionsSuite] .include(