diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java index 415f4cc64bd..54e9648d85b 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/BinaryComparison.java @@ -18,6 +18,8 @@ import java.util.Comparator; +import io.delta.kernel.internal.expressions.CastingComparator; + /** * A {@link BinaryOperator} that compares the left and right {@link Expression}s and evaluates to a * boolean value. diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java similarity index 55% rename from kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java rename to kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java index 92fade0f439..a3f979dbf54 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/expressions/CastingComparator.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/expressions/CastingComparator.java @@ -14,34 +14,49 @@ * limitations under the License. */ -package io.delta.kernel.expressions; +package io.delta.kernel.internal.expressions; import java.util.Comparator; +import io.delta.kernel.types.BinaryType; import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.ByteType; import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DateType; +import io.delta.kernel.types.DoubleType; import io.delta.kernel.types.IntegerType; import io.delta.kernel.types.FloatType; +import io.delta.kernel.types.LongType; +import io.delta.kernel.types.ShortType; import io.delta.kernel.types.StringType; +import io.delta.kernel.types.TimestampType; -// TODO: exclude from public interfaces (move to "internal" somewhere?) public class CastingComparator> implements Comparator { public static Comparator forDataType(DataType dataType) { - if (dataType instanceof IntegerType) { - return new CastingComparator(); - } - if (dataType instanceof BooleanType) { return new CastingComparator(); - } - - if (dataType instanceof FloatType) { + } else if (dataType instanceof ByteType) { + return new CastingComparator(); + } else if (dataType instanceof ShortType) { + return new CastingComparator(); + } else if (dataType instanceof IntegerType) { + return new CastingComparator(); + } else if (dataType instanceof LongType) { return new CastingComparator(); - } - - if (dataType instanceof StringType) { + } else if (dataType instanceof FloatType) { + return new CastingComparator(); + } else if (dataType instanceof DoubleType) { + return new CastingComparator(); + } else if (dataType instanceof StringType) { return new CastingComparator(); + } else if (dataType instanceof DateType) { + // Date value is accessed as integer (number of days since epoch). + // This may change in the future. + return new CastingComparator(); + } else if (dataType instanceof TimestampType) { + // Timestamp value is accessed as long (epoch seconds). This may change in the future. + return new CastingComparator(); } throw new IllegalArgumentException( diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/TableSchemaSerDe.java b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java similarity index 59% rename from kernel/kernel-api/src/main/java/io/delta/kernel/types/TableSchemaSerDe.java rename to kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java index d87dbf52399..7dc5c382426 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/TableSchemaSerDe.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/internal/types/TableSchemaSerDe.java @@ -13,11 +13,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package io.delta.kernel.types; +package io.delta.kernel.internal.types; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.function.Function; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -26,6 +27,17 @@ import io.delta.kernel.data.ColumnVector; import io.delta.kernel.data.ColumnarBatch; import io.delta.kernel.data.Row; +import io.delta.kernel.types.ArrayType; +import io.delta.kernel.types.BasePrimitiveType; +import io.delta.kernel.types.BooleanType; +import io.delta.kernel.types.DataType; +import io.delta.kernel.types.DecimalType; +import io.delta.kernel.types.MapType; +import io.delta.kernel.types.MixedDataType; +import io.delta.kernel.types.StringType; +import io.delta.kernel.types.StructField; +import io.delta.kernel.types.StructType; +import io.delta.kernel.utils.CloseableIterator; import io.delta.kernel.utils.Utils; /** @@ -68,12 +80,15 @@ public static StructType fromJson(JsonHandler jsonHandler, String serializedStru */ private static StructType parseStructType(JsonHandler jsonHandler, String serializedStructType) { - Row row = parse(jsonHandler, serializedStructType, STRUCT_TYPE_SCHEMA); - final List fields = row.getArray(0); - return new StructType( - fields.stream() - .map(field -> parseStructField(jsonHandler, field)) - .collect(Collectors.toList())); + Function evalMethod = (row) -> { + final List fields = row.getArray(0); + return new StructType( + fields.stream() + .map(field -> parseStructField(jsonHandler, field)) + .collect(Collectors.toList())); + }; + return parseAndEvalSingleRow( + jsonHandler, serializedStructType, STRUCT_TYPE_SCHEMA, evalMethod); } /** @@ -82,7 +97,8 @@ private static StructType parseStructType(JsonHandler jsonHandler, String serial private static StructField parseStructField(JsonHandler jsonHandler, Row row) { String name = row.getString(0); - DataType type = parseDataType(jsonHandler, row, 1); + String serializedDataType = row.getString(1); + DataType type = parseDataType(jsonHandler, serializedDataType); boolean nullable = row.getBoolean(2); Map metadata = row.getMap(3); @@ -92,24 +108,23 @@ private static StructField parseStructField(JsonHandler jsonHandler, Row row) /** * Utility method to parse the data type from the {@link Row}. */ - private static DataType parseDataType(JsonHandler jsonHandler, Row row, int ordinal) + private static DataType parseDataType(JsonHandler jsonHandler, String serializedDataType) { - final String typeName = row.getString(ordinal); - - if (BasePrimitiveType.isPrimitiveType(typeName)) { - return BasePrimitiveType.createPrimitive(typeName); + if (BasePrimitiveType.isPrimitiveType(serializedDataType)) { + return BasePrimitiveType.createPrimitive(serializedDataType); } // Check if it is decimal type - if (typeName.startsWith("decimal")) { - if (typeName.equalsIgnoreCase("decimal")) { + if (serializedDataType.startsWith("decimal")) { + if (serializedDataType.equalsIgnoreCase("decimal")) { return DecimalType.USER_DEFAULT; } // parse the precision and scale - Matcher matcher = DECIMAL_TYPE_PATTERN.matcher(typeName); + Matcher matcher = DECIMAL_TYPE_PATTERN.matcher(serializedDataType); if (!matcher.matches()) { - throw new IllegalArgumentException("Invalid decimal type format: " + typeName); + throw new IllegalArgumentException( + "Invalid decimal type format: " + serializedDataType); } return new DecimalType( Integer.valueOf(matcher.group("precision")), @@ -117,67 +132,83 @@ private static DataType parseDataType(JsonHandler jsonHandler, Row row, int ordi } // This must be a complex type which is described as an JSON object. - Optional arrayType = parseAsArrayType(jsonHandler, typeName); + Optional arrayType = parseAsArrayType(jsonHandler, serializedDataType); if (arrayType.isPresent()) { return arrayType.get(); } - Optional mapType = parseAsMapType(jsonHandler, typeName); + Optional mapType = parseAsMapType(jsonHandler, serializedDataType); if (mapType.isPresent()) { return mapType.get(); } - return parseStructType(jsonHandler, typeName); + return parseStructType(jsonHandler, serializedDataType); } private static Optional parseAsArrayType(JsonHandler jsonHandler, String json) { - Row row = parse(jsonHandler, json, ARRAY_TYPE_SCHEMA); - if (!"array".equalsIgnoreCase(row.getString(0))) { - return Optional.empty(); - } + Function> evalMethod = (row) -> { + if (!"array".equalsIgnoreCase(row.getString(0))) { + return Optional.empty(); + } - if (row.isNullAt(1) || row.isNullAt(2)) { - throw new IllegalArgumentException("invalid array serialized format: " + json); - } + if (row.isNullAt(1) || row.isNullAt(2)) { + throw new IllegalArgumentException("invalid array serialized format: " + json); + } + + // Now parse the element type and create an array data type object + DataType elementType = parseDataType(jsonHandler, row.getString(1)); + boolean containsNull = row.getBoolean(2); - // Now parse the element type and create an array data type object - DataType elementType = parseDataType(jsonHandler, row, 1); - boolean containsNull = row.getBoolean(2); + return Optional.of(new ArrayType(elementType, containsNull)); + }; - return Optional.of(new ArrayType(elementType, containsNull)); + return parseAndEvalSingleRow(jsonHandler, json, ARRAY_TYPE_SCHEMA, evalMethod); } private static Optional parseAsMapType(JsonHandler jsonHandler, String json) { - Row row = parse(jsonHandler, json, MAP_TYPE_SCHEMA); - if (!"map".equalsIgnoreCase(row.getString(0))) { - return Optional.empty(); - } + Function> evalMethod = (row -> { + if (!"map".equalsIgnoreCase(row.getString(0))) { + return Optional.empty(); + } - if (row.isNullAt(1) || row.isNullAt(2) || row.isNullAt(3)) { - throw new IllegalArgumentException("invalid map serialized format: " + json); - } + if (row.isNullAt(1) || row.isNullAt(2) || row.isNullAt(3)) { + throw new IllegalArgumentException("invalid map serialized format: " + json); + } - // Now parse the key and value types and create a map data type object - DataType keyType = parseDataType(jsonHandler, row, 1); - DataType valueType = parseDataType(jsonHandler, row, 2); - boolean valueContainsNull = row.getBoolean(3); + // Now parse the key and value types and create a map data type object + DataType keyType = parseDataType(jsonHandler, row.getString(1)); + DataType valueType = parseDataType(jsonHandler, row.getString(2)); + boolean valueContainsNull = row.getBoolean(3); - return Optional.of(new MapType(keyType, valueType, valueContainsNull)); + return Optional.of(new MapType(keyType, valueType, valueContainsNull)); + }); + + return parseAndEvalSingleRow(jsonHandler, json, MAP_TYPE_SCHEMA, evalMethod); } /** * Helper method to parse a single json string */ - private static Row parse(JsonHandler jsonHandler, String jsonString, StructType outputSchema) + private static R parseAndEvalSingleRow( + JsonHandler jsonHandler, + String jsonString, + StructType outputSchema, + Function evalFunction) { ColumnVector columnVector = Utils.singletonColumnVector(jsonString); ColumnarBatch result = jsonHandler.parseJson(columnVector, outputSchema); assert result.getSize() == 1; - return result.getRows().next(); + CloseableIterator rows = result.getRows(); + try { + return evalFunction.apply(rows.next()); + } + finally { + Utils.safeClose(rows); + } } /** @@ -200,17 +231,17 @@ private static Row parse(JsonHandler jsonHandler, String jsonString, StructType /** * Example Array Type in serialized format * { - * "type" : "array", - * "elementType" : { - * "type" : "struct", - * "fields" : [ { - * "name" : "d", - * "type" : "integer", - * "nullable" : false, - * "metadata" : { } - * } ] - * }, - * "containsNull" : true + * "type" : "array", + * "elementType" : { + * "type" : "struct", + * "fields" : [ { + * "name" : "d", + * "type" : "integer", + * "nullable" : false, + * "metadata" : { } + * } ] + * }, + * "containsNull" : true * } */ private static StructType ARRAY_TYPE_SCHEMA = diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java index 11227eeb44a..ddc3dbfe38a 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/BasePrimitiveType.java @@ -24,7 +24,7 @@ /** * Base class for all primitive types {@link DataType}. */ -class BasePrimitiveType extends DataType +public abstract class BasePrimitiveType extends DataType { /** * Create a primitive type {@link DataType} @@ -32,7 +32,7 @@ class BasePrimitiveType extends DataType * @param primitiveTypeName Primitive type name. * @return */ - protected static DataType createPrimitive(String primitiveTypeName) + public static DataType createPrimitive(String primitiveTypeName) { return Optional.ofNullable(nameToPrimitiveTypeMap.get(primitiveTypeName)) .orElseThrow( @@ -42,13 +42,13 @@ protected static DataType createPrimitive(String primitiveTypeName) /** * Is the given type name a primitive type? */ - protected static boolean isPrimitiveType(String typeName) + public static boolean isPrimitiveType(String typeName) { return nameToPrimitiveTypeMap.containsKey(typeName); } /** For testing only */ - protected static List getAllPrimitiveTypes() { + public static List getAllPrimitiveTypes() { return nameToPrimitiveTypeMap.values().stream().collect(Collectors.toList()); } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java index ec699f416e6..a01cb803548 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/DecimalType.java @@ -35,6 +35,11 @@ public final class DecimalType extends DataType public DecimalType(int precision, int scale) { + if (precision < 0 || precision > 38 || scale < 0 || scale > 38 || scale > precision) { + throw new IllegalArgumentException(String.format( + "Invalid precision and scale combo (%d, %d). They should be in the range [0, 38] " + + "and scale can not be more than the precision.", precision, scale)); + } this.precision = precision; this.scale = scale; } diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java index 607107101ce..ca754b70049 100644 --- a/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java +++ b/kernel/kernel-api/src/main/java/io/delta/kernel/types/MixedDataType.java @@ -36,7 +36,7 @@ * } * *

- * `map` type column schema is serailized as: + * `struct` type column schema is serialized as: *

  *   {
  *     "type" : "struct",
diff --git a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java
index 3af1561cab4..16b57a3becc 100644
--- a/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java
+++ b/kernel/kernel-api/src/main/java/io/delta/kernel/utils/Utils.java
@@ -125,7 +125,7 @@ public String getString(int rowId) {
 
     /**
      * Utility method to get the physical schema from the scan state {@link Row} returned by
-     * {@link Scan#getScanState(TableClient)}}.
+     * {@link Scan#getScanState(TableClient)}.
      *
      * @param scanState Scan state {@link Row}
      * @return Physical schema to read from the data files.
@@ -148,4 +148,16 @@ public static FileStatus getFileStatus(Row scanFileInfo) {
 
         return FileStatus.of(path, size, 0);
     }
+
+    /**
+     * Close the iterator.
+     * @param i1
+     */
+    public static void safeClose(CloseableIterator i1) {
+        try {
+            i1.close();
+        } catch (IOException ioe) {
+            throw new RuntimeException(ioe);
+        }
+    }
 }
diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/types/JsonHandlerTestImpl.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java
similarity index 94%
rename from kernel/kernel-api/src/test/java/io/delta/kernel/types/JsonHandlerTestImpl.java
rename to kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java
index 8da444692c3..25e43141080 100644
--- a/kernel/kernel-api/src/test/java/io/delta/kernel/types/JsonHandlerTestImpl.java
+++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/JsonHandlerTestImpl.java
@@ -1,4 +1,4 @@
-package io.delta.kernel.types;
+package io.delta.kernel.internal.types;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
@@ -17,6 +17,16 @@
 import io.delta.kernel.data.FileDataReadResult;
 import io.delta.kernel.data.Row;
 import io.delta.kernel.expressions.Expression;
+import io.delta.kernel.types.ArrayType;
+import io.delta.kernel.types.BooleanType;
+import io.delta.kernel.types.DataType;
+import io.delta.kernel.types.IntegerType;
+import io.delta.kernel.types.LongType;
+import io.delta.kernel.types.MapType;
+import io.delta.kernel.types.MixedDataType;
+import io.delta.kernel.types.StringType;
+import io.delta.kernel.types.StructField;
+import io.delta.kernel.types.StructType;
 import io.delta.kernel.utils.CloseableIterator;
 import io.delta.kernel.utils.Utils;
 
diff --git a/kernel/kernel-api/src/test/java/io/delta/kernel/types/TestTableSchemaSerDe.java b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java
similarity index 86%
rename from kernel/kernel-api/src/test/java/io/delta/kernel/types/TestTableSchemaSerDe.java
rename to kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java
index 5c649193445..ffe82a30dae 100644
--- a/kernel/kernel-api/src/test/java/io/delta/kernel/types/TestTableSchemaSerDe.java
+++ b/kernel/kernel-api/src/test/java/io/delta/kernel/internal/types/TestTableSchemaSerDe.java
@@ -13,7 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package io.delta.kernel.types;
+package io.delta.kernel.internal.types;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -23,6 +23,22 @@
 import static org.junit.Assert.assertEquals;
 import org.junit.Test;
 
+import io.delta.kernel.types.ArrayType;
+import io.delta.kernel.types.BasePrimitiveType;
+import io.delta.kernel.types.BinaryType;
+import io.delta.kernel.types.BooleanType;
+import io.delta.kernel.types.DataType;
+import io.delta.kernel.types.DateType;
+import io.delta.kernel.types.DecimalType;
+import io.delta.kernel.types.FloatType;
+import io.delta.kernel.types.IntegerType;
+import io.delta.kernel.types.MapType;
+import io.delta.kernel.types.StructField;
+import io.delta.kernel.types.StructType;
+
+import io.delta.kernel.internal.types.JsonHandlerTestImpl;
+import io.delta.kernel.internal.types.TableSchemaSerDe;
+
 public class TestTableSchemaSerDe
 {
     @Test
@@ -110,4 +126,4 @@ private Map sampleMetadata()
         metadata.put("key2", "value2");
         return metadata;
     }
-}
\ No newline at end of file
+}