From e993a0f0985c22d7ef334472956578e027b0df75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 19 Nov 2024 17:08:19 +0100 Subject: [PATCH 01/42] Adding Value Type Null logic --- .../src/Internal/Value_Type_Helpers.enso | 142 +++++++++--------- .../Table/0.0.0-dev/src/Value_Type.enso | 49 ++++-- 2 files changed, 107 insertions(+), 84 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 8bcdbb60339e..7d0a8fd6a757 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -23,9 +23,9 @@ polyglot java import org.enso.table.data.column.storage.type.IntegerType for integers it will return 64-bit integers even if the value could fit in a smaller one; and for Text values variable-length text will be preferred over fixed-length. -most_specific_value_type : Any -> Boolean -> Value_Type -most_specific_value_type value use_smallest=False = +most_specific_value_type value:Any use_smallest:Boolean=False -> Value_Type = case value of + Nothing -> Value_Type.Null _ : Float -> Value_Type.Float Bits.Bits_64 _ : Decimal -> Value_Type.Decimal _ : Boolean -> Value_Type.Boolean @@ -58,59 +58,60 @@ most_specific_value_type value use_smallest=False = ## PRIVATE Finds a type that can fit both a current type and a new type. -reconcile_types current new = case current of - Value_Type.Mixed -> Value_Type.Mixed - Value_Type.Integer size -> case new of - Value_Type.Integer new_size -> - Value_Type.Integer (max_size size new_size) - Value_Type.Byte -> current - # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. - Value_Type.Float _ -> Value_Type.Float - Value_Type.Decimal _ _ -> new - _ -> Value_Type.Mixed - Value_Type.Float size -> case new of - Value_Type.Float new_size -> - Value_Type.Float (max_size size new_size) - # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. - Value_Type.Integer _ -> Value_Type.Float - Value_Type.Byte -> Value_Type.Float - Value_Type.Decimal _ _ -> Value_Type.Float - _ -> Value_Type.Mixed - Value_Type.Byte -> case new of - Value_Type.Byte -> Value_Type.Byte - Value_Type.Integer _ -> new - Value_Type.Float _ -> Value_Type.Float - Value_Type.Decimal _ _ -> new - _ -> Value_Type.Mixed - Value_Type.Decimal precision scale -> case new of - Value_Type.Decimal new_precision new_scale -> - if (precision == new_precision) && (scale == new_scale) then new else - # TODO at some point we may want a more clever merging of precision and scale, for now we don't use them too much anyway so we just default to Nothing if they do not agree - Value_Type.Decimal - Value_Type.Integer _ -> Value_Type.Decimal precision scale - Value_Type.Byte -> Value_Type.Decimal precision scale - Value_Type.Float _ -> Value_Type.Float - _ -> Value_Type.Mixed - Value_Type.Char current_size current_variable -> case new of - Value_Type.Char new_size new_variable -> - result_variable = current_variable || new_variable || current_size != new_size - result_size = max_size current_size new_size - Value_Type.Char result_size result_variable - _ -> Value_Type.Mixed - Value_Type.Binary current_size current_variable -> case new of - Value_Type.Binary new_size new_variable -> - result_variable = current_variable || new_variable || current_size != new_size - result_size = max_size current_size new_size - Value_Type.Binary result_size result_variable - _ -> Value_Type.Mixed - _ -> - if current == new then current else Value_Type.Mixed +reconcile_types current:Value_Type new:Value_Type -> Value_Type = + if current == Value_Type.Null then new else if new == Value_Type.Null then current else case current of + Value_Type.Mixed -> Value_Type.Mixed + Value_Type.Integer size -> case new of + Value_Type.Integer new_size -> + Value_Type.Integer (max_size size new_size) + Value_Type.Byte -> current + # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. + Value_Type.Float _ -> Value_Type.Float + Value_Type.Decimal _ _ -> new + _ -> Value_Type.Mixed + Value_Type.Float size -> case new of + Value_Type.Float new_size -> + Value_Type.Float (max_size size new_size) + # If we unify integers with floats, we select the default Float 64 regardless of the input sizes. + Value_Type.Integer _ -> Value_Type.Float + Value_Type.Byte -> Value_Type.Float + Value_Type.Decimal _ _ -> Value_Type.Float + _ -> Value_Type.Mixed + Value_Type.Byte -> case new of + Value_Type.Byte -> Value_Type.Byte + Value_Type.Integer _ -> new + Value_Type.Float _ -> Value_Type.Float + Value_Type.Decimal _ _ -> new + _ -> Value_Type.Mixed + Value_Type.Decimal precision scale -> case new of + Value_Type.Decimal new_precision new_scale -> + if (precision == new_precision) && (scale == new_scale) then new else + # TODO at some point we may want a more clever merging of precision and scale, for now we don't use them too much anyway so we just default to Nothing if they do not agree + Value_Type.Decimal + Value_Type.Integer _ -> Value_Type.Decimal precision scale + Value_Type.Byte -> Value_Type.Decimal precision scale + Value_Type.Float _ -> Value_Type.Float + _ -> Value_Type.Mixed + Value_Type.Char current_size current_variable -> case new of + Value_Type.Char new_size new_variable -> + result_variable = current_variable || new_variable || current_size != new_size + result_size = max_size current_size new_size + Value_Type.Char result_size result_variable + _ -> Value_Type.Mixed + Value_Type.Binary current_size current_variable -> case new of + Value_Type.Binary new_size new_variable -> + result_variable = current_variable || new_variable || current_size != new_size + result_size = max_size current_size new_size + Value_Type.Binary result_size result_variable + _ -> Value_Type.Mixed + _ -> + if current == new then current else Value_Type.Mixed ## PRIVATE Reconciles two size parameters. If either of them is `Nothing` (meaning unbounded), returns `Nothing`. If both are bounded, the larger one is returned. -max_size a b = +max_size (a : Integer | Nothing) (b : Integer | Nothing) -> Integer | Nothing = if a.is_nothing || b.is_nothing then Nothing else if a < b then b else a @@ -118,19 +119,19 @@ max_size a b = Finds the most specific value type that will fit all the provided types. Arguments: - - types: a vector of types to unify. It must not be empty. + - types: a vector of types to unify. - strict: A flag determining how strict the unification is. If `False`, if no common type can be found, `Mixed` is used as a generic fallback. If `True`, `Nothing` is returned if no common type can be found and `Mixed` is only returned if any of the input types was already `Mixed`. find_common_type (types : Vector Value_Type) (strict : Boolean) -> Value_Type | Nothing = - assert types.not_empty - most_generic_type = (types.drop 1).fold types.first reconcile_types - if strict.not || most_generic_type != Value_Type.Mixed then most_generic_type else - ## We return the Mixed type only if the input contained Mixed. - Otherwise we report failure to find common type. - if types.contains Value_Type.Mixed then Value_Type.Mixed else - Nothing + if types.is_empty then Value_Type.Null else + most_generic_type = (types.drop 1).fold types.first reconcile_types + if strict.not || most_generic_type != Value_Type.Mixed then most_generic_type else + ## We return the Mixed type only if the input contained Mixed. + Otherwise we report failure to find common type. + if types.contains Value_Type.Mixed then Value_Type.Mixed else + Nothing ## PRIVATE An extra helper function that reconciles Date_Time types with varying timezone @@ -194,8 +195,7 @@ find_common_numeric_boolean_type (types : Vector Value_Type) -> Value_Type | Not method returns `Nothing` if the value is `Nothing` - so the caller can try to treat this value as fitting any type, or accordingly to specific semantics of each method. -find_argument_type : Any -> Boolean -> Value_Type | Nothing -find_argument_type value infer_precise_type=True = if Nothing == value then Nothing else +find_argument_type value:Any infer_precise_type:Boolean=True -> Value_Type = case is_column value of False -> most_specific_value_type value use_smallest=True True -> @@ -217,10 +217,10 @@ type Addition_Kind resolve_addition_kind arg1 arg2 = type_1 = find_argument_type arg1 type_2 = find_argument_type arg2 - if type_1.is_numeric && (type_2.is_nothing || type_2.is_numeric) then Addition_Kind.Numeric_Add else - if type_1.is_text && (type_2.is_nothing || type_2.is_text) then Addition_Kind.Text_Concat else + if type_1.is_numeric && (type_2.is_null || type_2.is_numeric) then Addition_Kind.Numeric_Add else + if type_1.is_text && (type_2.is_null || type_2.is_text) then Addition_Kind.Text_Concat else Error.throw <| Illegal_Argument.Error <| - if type_2.is_nothing then "Cannot perform addition on a value of type " + type_1.to_display_text + ". Addition can only be performed if the column is of some numeric type or is text." else + if type_2.is_null then "Cannot perform addition on a value of type " + type_1.to_display_text + ". Addition can only be performed if the column is of some numeric type or is text." else "Cannot perform addition on a pair of values of types " + type_1.to_display_text + " and " + type_2.to_display_text + ". Addition can only be performed if both columns are of some numeric type or are both are text." ## PRIVATE @@ -239,10 +239,10 @@ resolve_subtraction_kind arg1 arg2 = type_1 = find_argument_type arg1 type_2 = find_argument_type arg2 - if type_1.is_numeric && (type_2.is_nothing || type_2.is_numeric) then Subtraction_Kind.Numeric_Subtract else + if type_1.is_numeric && (type_2.is_null || type_2.is_numeric) then Subtraction_Kind.Numeric_Subtract else case type_1.is_date_or_time of True -> - if type_2.is_nothing || (type_2 == type_1) then Subtraction_Kind.Date_Time_Difference else + if type_2.is_null || (type_2 == type_1) then Subtraction_Kind.Date_Time_Difference else raise_unexpected_type type_1 arg2 False -> raise_unexpected_type "numeric or date/time" arg1 @@ -292,10 +292,8 @@ raise_unexpected_type expected_type argument = find_common_type_for_arguments : Vector Any -> Value_Type | Nothing ! No_Common_Type find_common_type_for_arguments arguments = # Here we do not want to infer the more precise type for a Mixed column, because we want it to stay Mixed. - types = arguments.map (find_argument_type infer_precise_type=False) . filter Filter_Condition.Not_Nothing - case types.is_empty of - True -> Nothing - False -> case find_common_type types strict=True of - common_type : Value_Type -> common_type - Nothing -> Error.throw <| - No_Common_Type.Error types related_column_name=Nothing + types = arguments.map (find_argument_type infer_precise_type=False) + case find_common_type types strict=True of + common_type : Value_Type -> common_type + Nothing -> Error.throw <| + No_Common_Type.Error types related_column_name=Nothing diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso index 4f0dda8cda83..8e98bc2dc489 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso @@ -152,6 +152,31 @@ type Value_Type In-Memory and SQLite tables support this. Mixed + ## A special type indicating no values present. + + It can be used when a column does not have any values, so no proper type + could be inferred for it. Columns of this type cannot hold any values + other than `Nothing`. + + Because the column with type `Null` contains no values, it can be used in + any operation. + + ? Implementation Note + + The `is_*` methods will return `False` for this type, as they require + more precision. E.g. `Value_Type.Null.is_boolean` will return `False`. + However, the sibling `expect_*` methods will accept a `Value_Type.Null` + column. + Null + + ## GROUP Standard.Base.Metadata + ICON metadata + Checks if the `Value_Type` represents a null type. + is_null : Boolean + is_null self = case self of + Value_Type.Null -> True + _ -> False + ## GROUP Standard.Base.Metadata ICON metadata Checks if the `Value_Type` represents a boolean type. @@ -273,6 +298,7 @@ type Value_Type Value_Type.Date -> True Value_Type.Date_Time _ -> True Value_Type.Time -> True + Value_Type.Null -> True ## Not all mixed types are ordered, but some can, so we allow ordering for mixed columns. Value_Type.Mixed -> True @@ -287,7 +313,7 @@ type Value_Type integers can be widened into floats and big enough integers may not be able to be exactly represented in floats. can_be_widened_to : Value_Type -> Boolean - can_be_widened_to self target_type = if (self == target_type) || (target_type == Value_Type.Mixed) then True else case self of + can_be_widened_to self target_type = if (self == target_type) || (target_type == Value_Type.Mixed) || (self == Value_Type.Null) then True else case self of Value_Type.Integer self_bits -> case target_type of Value_Type.Integer target_bits -> target_bits >= self_bits Value_Type.Float _ -> True @@ -339,10 +365,11 @@ type Value_Type buckets.index_of bucket-> bucket.contains ctor_name - bucket_1 = find_bucket type_1 - bucket_2 = find_bucket type_2 - if bucket_1.is_nothing || bucket_2.is_nothing then False else - if (type_1 == Value_Type.Mixed) || (type_2 == Value_Type.Mixed) then True else + comparable_with_everything = [Value_Type.Mixed, Value_Type.Null] + if (comparable_with_everything.contains type_1) || (comparable_with_everything.contains type_2) then True else + bucket_1 = find_bucket type_1 + bucket_2 = find_bucket type_2 + if bucket_1.is_nothing || bucket_2.is_nothing then False else bucket_1 == bucket_2 @@ -418,13 +445,10 @@ type Value_Type ## PRIVATE A helper for generating the `Value_Type.expect_` checks. expect_type : Any -> (Value_Type -> Boolean) -> Text|Value_Type -> Any -> Any ! Invalid_Value_Type - expect_type value predicate type_kind ~action = case value of - # Special handling for `Nothing`. Likely, can be removed with #6281. - Nothing -> action - _ -> - typ = Value_Type_Helpers.find_argument_type value - if predicate typ then action else - Value_Type_Helpers.raise_unexpected_type type_kind value + expect_type value predicate type_kind ~action = + typ = Value_Type_Helpers.find_argument_type value + if (typ == Value_Type.Null) || predicate typ then action else + Value_Type_Helpers.raise_unexpected_type type_kind value ## PRIVATE Provides a text representation of the `Value_Type` meant for @@ -459,6 +483,7 @@ type Value_Type Nothing -> "Unsupported_Data_Type" _ : Text -> "Unsupported_Data_Type (" + type_name + ")" Value_Type.Mixed -> "Mixed" + Value_Type.Null -> "Null" ## PRIVATE Provides a JS object representation for use in visualizations. From 69271764b5211d42e35b97f9f552d055072e687d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 19 Nov 2024 20:59:36 +0100 Subject: [PATCH 02/42] fixes --- .../src/Internal/Value_Type_Helpers.enso | 2 +- .../Standard/Table/0.0.0-dev/src/Value_Type.enso | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 7d0a8fd6a757..1eafe4789028 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -111,7 +111,7 @@ reconcile_types current:Value_Type new:Value_Type -> Value_Type = Reconciles two size parameters. If either of them is `Nothing` (meaning unbounded), returns `Nothing`. If both are bounded, the larger one is returned. -max_size (a : Integer | Nothing) (b : Integer | Nothing) -> Integer | Nothing = +max_size a b = if a.is_nothing || b.is_nothing then Nothing else if a < b then b else a diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso index 8e98bc2dc489..360d93316986 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Value_Type.enso @@ -360,17 +360,19 @@ type Value_Type are_comparable : Value_Type -> Value_Type -> Boolean are_comparable type_1 type_2 = find_bucket typ = typ.if_not_error <| - buckets = [["Integer", "Byte", "Float", "Decimal"], ["Char"], ["Date"], ["Date_Time"], ["Time"], ["Boolean"], ["Mixed"]] + buckets = [["Integer", "Byte", "Float", "Decimal"], ["Char"], ["Date"], ["Date_Time"], ["Time"], ["Boolean"], ["Mixed"], ["Null"]] ctor_name = Meta.meta typ . constructor . name buckets.index_of bucket-> bucket.contains ctor_name - comparable_with_everything = [Value_Type.Mixed, Value_Type.Null] - if (comparable_with_everything.contains type_1) || (comparable_with_everything.contains type_2) then True else - bucket_1 = find_bucket type_1 - bucket_2 = find_bucket type_2 - if bucket_1.is_nothing || bucket_2.is_nothing then False else - bucket_1 == bucket_2 + bucket_1 = find_bucket type_1 + bucket_2 = find_bucket type_2 + either_type_is_not_ordered = bucket_1.is_nothing || bucket_2.is_nothing + if either_type_is_not_ordered then False else + comparable_with_everything = [Value_Type.Mixed, Value_Type.Null] + if (comparable_with_everything.contains type_1) || (comparable_with_everything.contains type_2) then True else + if bucket_1.is_nothing || bucket_2.is_nothing then False else + bucket_1 == bucket_2 ## PRIVATE From b2b606e611e40128f502a93ad554ae3f376b185c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 20 Nov 2024 11:38:01 +0100 Subject: [PATCH 03/42] wip --- .../org/enso/table/data/column/storage/type/StorageType.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java index 9855836b7531..59e43f95de92 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java @@ -6,6 +6,7 @@ import java.time.LocalDateTime; import java.time.LocalTime; import java.time.ZonedDateTime; + import org.enso.base.polyglot.NumericConverter; /** @@ -21,6 +22,7 @@ public sealed interface StorageType DateType, FloatType, IntegerType, + NullType, TextType, TimeOfDayType { /** From f647799c0d43db0469f54722fcd043b209815414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 21 Nov 2024 11:39:37 +0100 Subject: [PATCH 04/42] forgot to commit file --- .../data/column/storage/type/NullType.java | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java new file mode 100644 index 000000000000..253ab2b0e6cd --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java @@ -0,0 +1,23 @@ +package org.enso.table.data.column.storage.type; + +public record NullType() implements StorageType { + public static final NullType INSTANCE = new NullType(); + + @Override + public boolean isNumeric() { + // TODO ? + return false; + } + + @Override + public boolean hasDate() { + // TODO ? + return false; + } + + @Override + public boolean hasTime() { + // TODO ? + return false; + } +} From 9c72ebbc954ec80b065562ccb6ce6be90dad5181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 19:42:05 +0100 Subject: [PATCH 05/42] NullStorag --- .../Table/0.0.0-dev/src/Internal/Storage.enso | 2 + .../data/column/builder/NullBuilder.java | 57 +++++++++++++ .../data/column/storage/NullStorage.java | 84 +++++++++++++++++++ .../data/column/storage/type/NullType.java | 9 +- 4 files changed, 146 insertions(+), 6 deletions(-) create mode 100644 std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java create mode 100644 std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso index 99adac591ea4..1d47ab9851e2 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso @@ -21,6 +21,7 @@ polyglot java import org.enso.table.data.column.storage.type.DateTimeType polyglot java import org.enso.table.data.column.storage.type.DateType polyglot java import org.enso.table.data.column.storage.type.FloatType polyglot java import org.enso.table.data.column.storage.type.IntegerType +polyglot java import org.enso.table.data.column.storage.type.NullType polyglot java import org.enso.table.data.column.storage.type.StorageType polyglot java import org.enso.table.data.column.storage.type.TextType polyglot java import org.enso.table.data.column.storage.type.TimeOfDayType @@ -70,6 +71,7 @@ closest_storage_type value_type = case value_type of Value_Type.Decimal _ scale -> is_integer = scale.is_nothing.not && scale <= 0 if is_integer then BigIntegerType.INSTANCE else BigDecimalType.INSTANCE + Value_Type.Null -> NullType.INSTANCE _ -> Error.throw (Illegal_Argument.Error "Columns of type "+value_type.to_display_text+" are currently not supported in the in-memory backend.") diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java new file mode 100644 index 000000000000..9b49fd895d76 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java @@ -0,0 +1,57 @@ +package org.enso.table.data.column.builder; + +import org.enso.table.data.column.storage.NullStorage; +import org.enso.table.data.column.storage.Storage; +import org.enso.table.data.column.storage.type.NullType; +import org.enso.table.data.column.storage.type.StorageType; + +public class NullBuilder extends Builder { + private int length = 0; + + @Override + public void appendNoGrow(Object o) { + if (o != null) { + throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+o); + } + } + + @Override + public void append(Object o) { + appendNoGrow(o); + length++; + } + + @Override + public void appendNulls(int count) { + length += count; + } + + @Override + public void appendBulkStorage(Storage storage) { + // For any storage that is not all-null, check if non-null values are present + if (!(storage instanceof NullStorage)) { + for (int i = 0; i < storage.size(); i++) { + if (!storage.isNothing(i)) { + throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+storage.getItemBoxed(i)); + } + } + } + + length += storage.size(); + } + + @Override + public int getCurrentSize() { + return length; + } + + @Override + public Storage seal() { + return new NullStorage(length); + } + + @Override + public StorageType getType() { + return NullType.INSTANCE; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java new file mode 100644 index 000000000000..986bb87d4993 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java @@ -0,0 +1,84 @@ +package org.enso.table.data.column.storage; + +import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; +import org.enso.table.data.column.storage.type.NullType; +import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.mask.OrderMask; +import org.enso.table.data.mask.SliceRange; + +import java.util.BitSet; +import java.util.List; + +/** A specialized storage that can be used by columns that contain only null values. */ +public class NullStorage extends Storage { + private final int size; + + public NullStorage(int size) { + this.size = size; + } + + @Override + public int size() { + return size; + } + + @Override + public StorageType getType() { + return NullType.INSTANCE; + } + + @Override + public boolean isNothing(long index) { + return true; + } + + @Override + public Void getItemBoxed(int idx) { + return null; + } + + @Override + public boolean isBinaryOpVectorized(String name) { + return false; + } + + @Override + public Storage runVectorizedBinaryMap(String name, Object argument, MapOperationProblemAggregator problemAggregator) { + throw new IllegalArgumentException("Operation "+name+" is not vectorized for NullStorage"); + } + + @Override + public Storage runVectorizedZip(String name, Storage argument, MapOperationProblemAggregator problemAggregator) { + throw new IllegalArgumentException("Operation "+name+" is not vectorized for NullStorage"); + } + + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + return this; + } + + @Override + public Storage applyFilter(BitSet filterMask, int newLength) { + return new NullStorage(newLength); + } + + @Override + public Storage applyMask(OrderMask mask) { + return new NullStorage(mask.length()); + } + + @Override + public Storage slice(int offset, int limit) { + return new NullStorage(limit - offset); + } + + @Override + public Storage appendNulls(int count) { + return new NullStorage(size + count); + } + + @Override + public Storage slice(List ranges) { + return new NullStorage(SliceRange.totalLength(ranges)); + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java index 253ab2b0e6cd..bf1ed466b777 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/NullType.java @@ -5,19 +5,16 @@ public record NullType() implements StorageType { @Override public boolean isNumeric() { - // TODO ? - return false; + return true; } @Override public boolean hasDate() { - // TODO ? - return false; + return true; } @Override public boolean hasTime() { - // TODO ? - return false; + return true; } } From fc63d398c3080794a5e1a05958fa448dfa366c58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:05:53 +0100 Subject: [PATCH 06/42] WIP integrating NullStorage --- .../0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso | 4 ++++ .../lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso | 1 + .../java/org/enso/table/data/column/builder/Builder.java | 6 +++++- .../org/enso/table/data/column/builder/InferredBuilder.java | 4 +++- .../org/enso/table/data/column/builder/NullBuilder.java | 6 ++++++ 5 files changed, 19 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso index 110ddd1c6828..2dc16d761ac3 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso @@ -60,6 +60,7 @@ type SQLite_Type_Mapping Value_Type.Date -> unsupported_date_time Value_Type.Date_Time _ -> unsupported_date_time Value_Type.Binary _ _ -> SQLite_Types.blob + Value_Type.Null -> SQLite_Types.null Value_Type.Mixed -> ## The best we could do would be to store mixed values and report invalid type or coerce the values to Text. Both seem @@ -246,6 +247,9 @@ type SQLite_Types The artificial 6th affinity that is used to distinguish boolean columns. boolean = SQL_Type.Value Types.BOOLEAN "BOOLEAN" + ## PRIVATE + null = SQL_Type.Value Types.NULL "NULL" + ## PRIVATE default_text = Value_Type.Char size=Nothing variable_length=True diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso index 1d47ab9851e2..23f5666fd0dc 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Storage.enso @@ -47,6 +47,7 @@ to_value_type storage_type = case storage_type of _ : TimeOfDayType -> Value_Type.Time _ : BigDecimalType -> Value_Type.Decimal _ : BigIntegerType -> Value_Type.Decimal scale=0 + _ : NullType -> Value_Type.Null _ : AnyObjectType -> Value_Type.Mixed ## PRIVATE diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java index 8fb6296d8d2a..87ac6c8c6b0e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java @@ -9,11 +9,14 @@ import org.enso.table.data.column.storage.type.DateType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.column.storage.type.TimeOfDayType; import org.enso.table.problems.ProblemAggregator; +import java.util.Objects; + /** A builder for creating columns dynamically. */ public abstract class Builder { /** @@ -41,9 +44,10 @@ public static Builder getForType( case TextType textType -> new StringBuilder(size, textType); case BigDecimalType x -> new BigDecimalBuilder(size); case BigIntegerType x -> new BigIntegerBuilder(size, problemAggregator); + case NullType x -> new NullBuilder(size); case null -> new InferredBuilder(size, problemAggregator); }; - assert java.util.Objects.equals(builder.getType(), type); + assert Objects.equals(builder.getType(), type); return builder; } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java index 829e049622e9..d8c5b344419b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java @@ -8,6 +8,7 @@ import java.util.List; import org.enso.base.polyglot.NumericConverter; import org.enso.base.polyglot.Polyglot_Utils; +import org.enso.table.data.column.storage.NullStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.*; import org.enso.table.problems.ProblemAggregator; @@ -194,7 +195,8 @@ public int getCurrentSize() { @Override public Storage seal() { if (currentBuilder == null) { - initBuilderFor(null); + // If all values that the builder got were nulls, we can return a special null storage. + return new NullStorage(currentSize); } return currentBuilder.seal(); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java index 9b49fd895d76..e00ca220893e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java @@ -8,6 +8,12 @@ public class NullBuilder extends Builder { private int length = 0; + public NullBuilder() {} + + public NullBuilder(int initialCapacity) { + length = initialCapacity; + } + @Override public void appendNoGrow(Object o) { if (o != null) { From bc43d3d6eb03981c7517b12fe121f12b76ac21fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:31:45 +0100 Subject: [PATCH 07/42] fixing edge cases with Null type --- .../src/Internal/SQLite/SQLite_Dialect.enso | 17 +++++++++-------- .../enso/table/data/column/builder/Builder.java | 2 +- .../table/data/column/builder/NullBuilder.java | 7 ++----- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index 9e0bcca0a1fb..c9c9bed4c445 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -192,14 +192,12 @@ type SQLite_Dialect adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = _ = infer_result_type_from_database_callback - # TODO [RW] This may be revisited with #6281. - case approximate_result_type of - Nothing -> column - _ -> - sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore - new_expression = self.make_cast_expression column sql_type - new_sql_type_reference = SQL_Type_Reference.from_constant sql_type - Internal_Column.Value column.name new_sql_type_reference new_expression + needs_cast = approximate_result_type != Value_Type.Null + if needs_cast.not then column else + sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore + new_expression = self.make_cast_expression column sql_type + new_sql_type_reference = SQL_Type_Reference.from_constant sql_type + Internal_Column.Value column.name new_sql_type_reference new_expression ## PRIVATE Add an extra cast to adjust the output type of certain operations with @@ -628,6 +626,9 @@ make_custom_cast column target_value_type type_mapping = result = Ref.new Nothing column_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get + if target_value_type == Value_Type.Null then + result.put column.expression + if target_value_type.is_text && (column_type == Value_Type.Boolean) then expr = SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"] result.put expr diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java index 87ac6c8c6b0e..c66eda484540 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java @@ -44,7 +44,7 @@ public static Builder getForType( case TextType textType -> new StringBuilder(size, textType); case BigDecimalType x -> new BigDecimalBuilder(size); case BigIntegerType x -> new BigIntegerBuilder(size, problemAggregator); - case NullType x -> new NullBuilder(size); + case NullType x -> new NullBuilder(); case null -> new InferredBuilder(size, problemAggregator); }; assert Objects.equals(builder.getType(), type); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java index e00ca220893e..4fc6f0344efe 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java @@ -10,21 +10,18 @@ public class NullBuilder extends Builder { public NullBuilder() {} - public NullBuilder(int initialCapacity) { - length = initialCapacity; - } - @Override public void appendNoGrow(Object o) { if (o != null) { throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+o); } + + length++; } @Override public void append(Object o) { appendNoGrow(o); - length++; } @Override From 55dfc5470fa01c18f76e15f6d3741be8509844eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:42:15 +0100 Subject: [PATCH 08/42] updating tests --- test/Table_Tests/src/IO/Read_Many_Spec.enso | 19 ++++++------------- .../Table_Tests/src/In_Memory/Table_Spec.enso | 2 +- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/test/Table_Tests/src/IO/Read_Many_Spec.enso b/test/Table_Tests/src/IO/Read_Many_Spec.enso index f5ab2edc7ec8..27925e0dc03f 100644 --- a/test/Table_Tests/src/IO/Read_Many_Spec.enso +++ b/test/Table_Tests/src/IO/Read_Many_Spec.enso @@ -168,15 +168,14 @@ add_specs suite_builder = # We transform the Path to just file name rows = (r.set (r.at "Path" . map .name) "Path").rows.map .to_vector - # TODO Once 6281 is done we should replace 100 with "100" etc. null = Nothing r.column_names . should_equal ["Path", "a", "b", "c", "Value", "d"] rows.at 0 . should_equal ["1_js_object.json", 1, 2, null, null, null] rows.at 1 . should_equal ["2_js_array.json", 30, 40, "foobar", null, null] rows.at 2 . should_equal ["2_js_array.json", 50, 60, null, null, null] - rows.at 3 . should_equal ["3_js_numbers.json", null, null, null, 100, null] - rows.at 4 . should_equal ["3_js_numbers.json", null, null, null, 200, null] - rows.at 5 . should_equal ["3_js_numbers.json", null, null, null, 300, null] + rows.at 3 . should_equal ["3_js_numbers.json", null, null, null, "100", null] + rows.at 4 . should_equal ["3_js_numbers.json", null, null, null, "200", null] + rows.at 5 . should_equal ["3_js_numbers.json", null, null, null, "300", null] rows.at 6 . should_equal ["4_table.tsv", -1, null, null, null, -4] rows.at 7 . should_equal ["4_table.tsv", -2, null, null, null, -5] rows.at 8 . should_equal ["5_plain_text.txt", null, null, null, "Hi!", null] @@ -186,13 +185,9 @@ add_specs suite_builder = r.at "a" . value_type . should_equal Value_Type.Integer r.at "b" . value_type . should_equal Value_Type.Integer r.at "c" . value_type . should_equal Value_Type.Char - # TODO change to Char once 6281 is done - r.at "Value" . value_type . should_equal Value_Type.Mixed + r.at "Value" . value_type . should_equal Value_Type.Char r.at "d" . value_type . should_equal Value_Type.Integer - # Marker to not forget - group_builder.specify "TODO" pending="Once 6281 is done we should update the test above and others." Nothing - group_builder.specify "should warn when a file loads as empty array and not include it in the As_Merged_Table result" <| # But such array should not influence the columns present: with_temp_dir base_dir-> @@ -240,8 +235,7 @@ add_specs suite_builder = r2 = Data.read_many files format=(..Delimited ',' headers=True) return=..As_Merged_Table r2.should_be_a Table - # TODO: once 6281 is done, change expect_only_warning - w2 = Problems.expect_warning No_Rows r2 + w2 = Problems.expect_only_warning No_Rows r2 w2.to_display_text . should_contain "1_empty_table.csv" w2.to_display_text . should_contain "loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`." within_table r2 <| @@ -254,8 +248,7 @@ add_specs suite_builder = r2.row_count . should_equal 1 r2.at "Path" . map .name . to_vector . should_equal ["2_table.csv"] r2.at "A" . to_vector . should_equal [Nothing] - # TODO: once 6281 is done, change '1' to 1 - r2.at "B" . to_vector . should_equal ['1'] + r2.at "B" . to_vector . should_equal [1] r2.at "C" . to_vector . should_equal [2] group_builder.specify "should allow to customize how the tables are merged" <| diff --git a/test/Table_Tests/src/In_Memory/Table_Spec.enso b/test/Table_Tests/src/In_Memory/Table_Spec.enso index f3eb5a657f5a..61325967d802 100644 --- a/test/Table_Tests/src/In_Memory/Table_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Table_Spec.enso @@ -174,7 +174,7 @@ add_specs suite_builder = data.varied_type_table.at "datetimes" . value_type . should_equal Value_Type.Date_Time data.varied_type_table.at "mixed" . value_type . should_equal Value_Type.Mixed data.varied_type_table.at "mixed_dates" . value_type . should_equal Value_Type.Mixed - data.varied_type_table.at "just_nulls" . value_type . should_equal Value_Type.Mixed + data.varied_type_table.at "just_nulls" . value_type . should_equal Value_Type.Null pending_python_missing = if Polyglot.is_language_installed "python" . not then "Can't run Python tests, Python is not installed." From b1b820958423d8df104aa04f674b6e2b76f0ca22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:42:23 +0100 Subject: [PATCH 09/42] can cast Null to anything --- .../Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso | 2 +- .../data/column/operation/cast/ToBigDecimalConverter.java | 3 ++- .../data/column/operation/cast/ToBigIntegerConverter.java | 3 ++- .../data/column/operation/cast/ToBooleanStorageConverter.java | 3 ++- .../data/column/operation/cast/ToDateStorageConverter.java | 3 ++- .../data/column/operation/cast/ToDateTimeStorageConverter.java | 3 ++- .../data/column/operation/cast/ToFloatStorageConverter.java | 3 ++- .../data/column/operation/cast/ToIntegerStorageConverter.java | 3 ++- .../data/column/operation/cast/ToTextStorageConverter.java | 3 ++- .../column/operation/cast/ToTimeOfDayStorageConverter.java | 3 ++- 10 files changed, 19 insertions(+), 10 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso index 98495c4fcf07..011ffbfc27b1 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso @@ -13,7 +13,7 @@ polyglot java import org.enso.table.data.column.storage.type.StorageType Checks if one type can be cast into another and returns a dataflow error explaining the situation if not. check_cast_compatibility source_type target_type ~action = - are_compatible = if (target_type == Value_Type.Mixed) || (source_type == Value_Type.Mixed) || target_type.is_text || (source_type == target_type) then True else + are_compatible = if (target_type == Value_Type.Mixed) || (source_type == Value_Type.Mixed) || (source_type == Value_Type.Null) || target_type.is_text || (source_type == target_type) then True else if source_type.is_text && is_a_valid_parse_target target_type then Error.throw (Illegal_Argument.Error "To parse a text column into "+target_type.to_display_text+" type, `parse` should be used instead of `cast`.") else if source_type == Value_Type.Boolean then target_type.is_numeric else if source_type.is_numeric then target_type.is_numeric else diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java index f8690b689689..9998e8115827 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java @@ -10,6 +10,7 @@ import org.enso.table.data.column.storage.numeric.BigIntegerStorage; import org.enso.table.data.column.storage.numeric.DoubleStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToBigDecimalConverter implements StorageConverter { @@ -25,7 +26,7 @@ public Storage cast(Storage storage, CastProblemAggregator proble return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BoolStorage boolStorage) { return convertBoolStorage(boolStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java index fb26bed28f5f..cfcc0eb0fd3e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java @@ -10,6 +10,7 @@ import org.enso.table.data.column.storage.numeric.BigIntegerStorage; import org.enso.table.data.column.storage.numeric.DoubleStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToBigIntegerConverter implements StorageConverter { @@ -25,7 +26,7 @@ public Storage cast(Storage storage, CastProblemAggregator proble return convertBoolStorage(boolStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java index 7cd024ef61ce..d57864ec23fb 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java @@ -4,6 +4,7 @@ import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToBooleanStorageConverter implements StorageConverter { @@ -11,7 +12,7 @@ public class ToBooleanStorageConverter implements StorageConverter { public Storage cast(Storage storage, CastProblemAggregator problemAggregator) { if (storage instanceof BoolStorage boolStorage) { return boolStorage; - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java index 3252b6abf9dd..b23000d5f37f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java @@ -7,6 +7,7 @@ import org.enso.table.data.column.storage.datetime.DateStorage; import org.enso.table.data.column.storage.datetime.DateTimeStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToDateStorageConverter implements StorageConverter { @@ -16,7 +17,7 @@ public Storage cast(Storage storage, CastProblemAggregator problem return dateStorage; } else if (storage instanceof DateTimeStorage dateTimeStorage) { return convertDateTimeStorage(dateTimeStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java index 949e485a3627..1f415fa2632e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java @@ -8,6 +8,7 @@ import org.enso.table.data.column.storage.datetime.DateStorage; import org.enso.table.data.column.storage.datetime.DateTimeStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToDateTimeStorageConverter implements StorageConverter { @@ -17,7 +18,7 @@ public Storage cast(Storage storage, CastProblemAggregator pro return dateTimeStorage; } else if (storage instanceof DateStorage dateStorage) { return convertDateStorage(dateStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java index 6a372eb5e8be..9e0cbc07b816 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java @@ -14,6 +14,7 @@ import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.Bits; import org.enso.table.data.column.storage.type.FloatType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToFloatStorageConverter implements StorageConverter { @@ -36,7 +37,7 @@ public Storage cast(Storage storage, CastProblemAggregator problemAgg return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java index ed668a94500f..087d193b4efd 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java @@ -15,6 +15,7 @@ import org.enso.table.data.column.storage.numeric.LongStorage; import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.util.BitSets; import org.graalvm.polyglot.Context; @@ -41,7 +42,7 @@ public Storage cast(Storage storage, CastProblemAggregator problemAggre return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java index 3c1afce68cca..1efc67ddd885 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java @@ -16,6 +16,7 @@ import org.enso.table.data.column.storage.numeric.AbstractLongStorage; import org.enso.table.data.column.storage.numeric.DoubleStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.TextType; import org.graalvm.polyglot.Context; @@ -47,7 +48,7 @@ public Storage cast(Storage storage, CastProblemAggregator problemAgg return castDateTimeStorage(dateStorage, this::convertDate, problemAggregator); } else if (storage instanceof DateTimeStorage dateTimeStorage) { return castDateTimeStorage(dateTimeStorage, this::convertDateTime, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java index f4650e5dbcd9..5d869efa3fdc 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java @@ -7,6 +7,7 @@ import org.enso.table.data.column.storage.datetime.DateTimeStorage; import org.enso.table.data.column.storage.datetime.TimeOfDayStorage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.graalvm.polyglot.Context; public class ToTimeOfDayStorageConverter implements StorageConverter { @@ -16,7 +17,7 @@ public Storage cast(Storage storage, CastProblemAggregator problem return timeOfDayStorage; } else if (storage instanceof DateTimeStorage dateTimeStorage) { return convertDateTimeStorage(dateTimeStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType) { + } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( From df1db004810da34c971b268240a10cd0f6918a69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:46:48 +0100 Subject: [PATCH 10/42] union with null should be possible for every type --- .../org/enso/table/data/column/builder/BoolBuilder.java | 3 +++ .../enso/table/data/column/builder/DoubleBuilder.java | 3 +++ .../enso/table/data/column/builder/InferredBuilder.java | 8 ++++++-- .../data/column/builder/InferredIntegerBuilder.java | 9 +++++++-- .../org/enso/table/data/column/builder/LongBuilder.java | 3 +++ .../org/enso/table/data/column/builder/NullBuilder.java | 2 +- .../enso/table/data/column/builder/ObjectBuilder.java | 3 +++ .../enso/table/data/column/builder/TypedBuilderImpl.java | 3 +++ 8 files changed, 29 insertions(+), 5 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/BoolBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/BoolBuilder.java index a92fe3751641..af01c3c93ba0 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/BoolBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/BoolBuilder.java @@ -4,6 +4,7 @@ import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.BooleanType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.error.ValueTypeMismatchException; import org.enso.table.util.BitSets; @@ -81,6 +82,8 @@ public void appendBulkStorage(Storage storage) { + storage + ". This is a bug in the Table library."); } + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { throw new StorageTypeMismatchException(getType(), storage.getType()); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java index ecf25d02704a..b05aab2a0e79 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/DoubleBuilder.java @@ -16,6 +16,7 @@ import org.enso.table.data.column.storage.type.BooleanType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.error.ValueTypeMismatchException; import org.enso.table.problems.ProblemAggregator; @@ -156,6 +157,8 @@ public void appendBulkStorage(Storage storage) { + storage + ". This is a bug in the Table library."); } + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { throw new StorageTypeMismatchException(getType(), storage.getType()); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java index d8c5b344419b..bab2b07106a6 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredBuilder.java @@ -105,8 +105,12 @@ public void appendNulls(int count) { @Override public void appendBulkStorage(Storage storage) { - for (int i = 0; i < storage.size(); i++) { - append(storage.getItemBoxed(i)); + if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); + } else { + for (int i = 0; i < storage.size(); i++) { + append(storage.getItemBoxed(i)); + } } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredIntegerBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredIntegerBuilder.java index 5ec3d1e3c8f7..dc4773ea1780 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredIntegerBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/InferredIntegerBuilder.java @@ -5,6 +5,7 @@ import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.BigIntegerType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.problems.ProblemAggregator; @@ -90,8 +91,12 @@ public void appendNulls(int count) { @Override public void appendBulkStorage(Storage storage) { - for (int i = 0; i < storage.size(); i++) { - append(storage.getItemBoxed(i)); + if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); + } else { + for (int i = 0; i < storage.size(); i++) { + append(storage.getItemBoxed(i)); + } } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java index e56e4b86722b..fa716d716e94 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java @@ -12,6 +12,7 @@ import org.enso.table.data.column.storage.type.BooleanType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.problems.ProblemAggregator; import org.enso.table.util.BitSets; @@ -109,6 +110,8 @@ public void appendBulkStorage(Storage storage) { data[currentSize++] = ToIntegerStorageConverter.booleanAsLong(boolStorage.getItem(i)); } } + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { throw new IllegalStateException( "Unexpected storage implementation for type BOOLEAN: " diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java index 4fc6f0344efe..ecced32b5501 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java @@ -32,7 +32,7 @@ public void appendNulls(int count) { @Override public void appendBulkStorage(Storage storage) { // For any storage that is not all-null, check if non-null values are present - if (!(storage instanceof NullStorage)) { + if (!(storage.getType() instanceof NullType)) { for (int i = 0; i < storage.size(); i++) { if (!storage.isNothing(i)) { throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+storage.getItemBoxed(i)); diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/ObjectBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/ObjectBuilder.java index 2ad32dee0b23..11d296df200a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/ObjectBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/ObjectBuilder.java @@ -5,6 +5,7 @@ import org.enso.table.data.column.storage.SpecializedStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; /** A builder for boxed object columns. */ @@ -72,6 +73,8 @@ public void appendBulkStorage(Storage storage) { if (storage instanceof SpecializedStorage specializedStorage) { System.arraycopy(specializedStorage.getData(), 0, data, currentSize, storage.size()); currentSize += storage.size(); + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { int n = storage.size(); for (int i = 0; i < n; i++) { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/TypedBuilderImpl.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/TypedBuilderImpl.java index 9f4118cf02eb..b50de168d3e4 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/TypedBuilderImpl.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/TypedBuilderImpl.java @@ -5,6 +5,7 @@ import org.enso.table.data.column.storage.SpecializedStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.AnyObjectType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; public abstract class TypedBuilderImpl extends TypedBuilder { @@ -70,6 +71,8 @@ public void appendBulkStorage(Storage storage) { + storage + ". This is a bug in the Table library."); } + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { throw new StorageTypeMismatchException(getType(), storage.getType()); } From 6100b86610b899fd2ce6c4ee45d26863433dca93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:55:59 +0100 Subject: [PATCH 11/42] updating tests, check that Null is accepted in Data.read_many --- test/Table_Tests/src/IO/Read_Many_Spec.enso | 47 +++++++++++++++++++-- 1 file changed, 43 insertions(+), 4 deletions(-) diff --git a/test/Table_Tests/src/IO/Read_Many_Spec.enso b/test/Table_Tests/src/IO/Read_Many_Spec.enso index 27925e0dc03f..880678db9f1f 100644 --- a/test/Table_Tests/src/IO/Read_Many_Spec.enso +++ b/test/Table_Tests/src/IO/Read_Many_Spec.enso @@ -235,12 +235,9 @@ add_specs suite_builder = r2 = Data.read_many files format=(..Delimited ',' headers=True) return=..As_Merged_Table r2.should_be_a Table - w2 = Problems.expect_only_warning No_Rows r2 - w2.to_display_text . should_contain "1_empty_table.csv" - w2.to_display_text . should_contain "loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`." within_table r2 <| ## But it DOES influence the columns present. - That is because the column structure is a 'structural' property, + That is because the column structure is a 'structural' property, we want the structure of the result to be the same regardless if it has 0 or more rows. If the workflow is run next time with this table having some rows, it is better that the structure is preserved. Otherwise, a workflow that is running fine could stop working once a file is changed to contain no rows, as some column could no longer be found. @@ -251,6 +248,10 @@ add_specs suite_builder = r2.at "B" . to_vector . should_equal [1] r2.at "C" . to_vector . should_equal [2] + w2 = Problems.expect_only_warning No_Rows r2 + w2.to_display_text . should_contain "1_empty_table.csv" + w2.to_display_text . should_contain "loaded as a table with 0 rows, so it did not contribute any rows to the `As_Merged_Table` result of `read_many`." + group_builder.specify "should allow to customize how the tables are merged" <| with_temp_dir base_dir-> '{"a": 1, "b": 2}'.write (base_dir / "1_js_object.json") @@ -319,6 +320,44 @@ add_specs suite_builder = t3.row_count . should_equal 0 t3.column_names . should_equal ["Path", "Value"] + group_builder.specify "works correctly if Nothing entries are encountered in the input column" <| + with_temp_dir base_dir-> + f = base_dir / "1.csv" + 'x,y,z\n1,2,3'.write f . should_succeed + t = Table.new [["A", [1, 2, 3, 4]], ["Path", [Nothing, f, f, Nothing]]] + r = Data.read_many t return=..As_Merged_Table + r.should_be_a Table + within_table r <| + r.column_names . should_equal ["Path", "A", "x", "y", "z"] + r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] + r.at "A" . to_vector . should_equal [1, 2, 3, 4] + r.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] + r.at "y" . to_vector . should_equal [Nothing, 2, 2, Nothing] + r.at "z" . to_vector . should_equal [Nothing, 3, 3, Nothing] + + r2 = Data.read_many (t.at "Path") return=..As_Merged_Table + r2.should_be_a Table + within_table r2 <| + r2.column_names . should_equal ["Path", "x", "y", "z"] + r2.at "Path" . map .name . to_vector . should_equal ["1.csv", "1.csv"] + r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] + r2.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] + + v3 = Data.read_many (t.at "Path") return=..As_Vector + v3.at 0 . should_equal Nothing + v3.at 1 . should_be_a Table + v3.at 2 . should_be_a Table + v3.at 3 . should_equal Nothing + + all_null = Column.from_vector "C" [Nothing] + r4 = Data.read_many all_null return=..As_Vector + r4.at 0 . should_equal Nothing + + r5 = Data.read_many all_null return=..As_Merged_Table + r5.should_be_a Table + r5.column_names . should_equal ["C"] + r5.at "C" . to_vector . should_equal [Nothing] + group_builder.specify "should have sane behaviour if all files are weird" <| with_temp_dir base_dir-> '{}'.write (base_dir / "1_js_object.json") From f268d1bbedd088d145df404f33dd5caf35c1fc1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:56:07 +0100 Subject: [PATCH 12/42] allow null col --- .../Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso | 2 ++ 1 file changed, 2 insertions(+) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso index e98b52338088..40feb019f4aa 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso @@ -30,6 +30,8 @@ ensure_column_type_valid_to_be_files_list (column : Column) ~action = Value_Type.Mixed -> True # Columns containing paths as Text will be Char Value_Type.Char _ _ -> True + # Empty columns are also valid + Value_Type.Null -> True _ -> False if is_expected_type then action else Error.throw (Invalid_Value_Type.Column "Text or Mixed" column.value_type column.name) From 98a5aa8ed8797e91788f380af178cfcddfda3454 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 20:57:04 +0100 Subject: [PATCH 13/42] typo --- test/Table_Tests/src/IO/Read_Many_Spec.enso | 48 ++++++++++----------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/test/Table_Tests/src/IO/Read_Many_Spec.enso b/test/Table_Tests/src/IO/Read_Many_Spec.enso index 880678db9f1f..51557df19f1f 100644 --- a/test/Table_Tests/src/IO/Read_Many_Spec.enso +++ b/test/Table_Tests/src/IO/Read_Many_Spec.enso @@ -324,30 +324,30 @@ add_specs suite_builder = with_temp_dir base_dir-> f = base_dir / "1.csv" 'x,y,z\n1,2,3'.write f . should_succeed - t = Table.new [["A", [1, 2, 3, 4]], ["Path", [Nothing, f, f, Nothing]]] - r = Data.read_many t return=..As_Merged_Table - r.should_be_a Table - within_table r <| - r.column_names . should_equal ["Path", "A", "x", "y", "z"] - r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] - r.at "A" . to_vector . should_equal [1, 2, 3, 4] - r.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] - r.at "y" . to_vector . should_equal [Nothing, 2, 2, Nothing] - r.at "z" . to_vector . should_equal [Nothing, 3, 3, Nothing] - - r2 = Data.read_many (t.at "Path") return=..As_Merged_Table - r2.should_be_a Table - within_table r2 <| - r2.column_names . should_equal ["Path", "x", "y", "z"] - r2.at "Path" . map .name . to_vector . should_equal ["1.csv", "1.csv"] - r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] - r2.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] - - v3 = Data.read_many (t.at "Path") return=..As_Vector - v3.at 0 . should_equal Nothing - v3.at 1 . should_be_a Table - v3.at 2 . should_be_a Table - v3.at 3 . should_equal Nothing + t = Table.new [["A", [1, 2, 3, 4]], ["Path", [Nothing, f, f, Nothing]]] + r = Data.read_many t return=..As_Merged_Table + r.should_be_a Table + within_table r <| + r.column_names . should_equal ["Path", "A", "x", "y", "z"] + r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] + r.at "A" . to_vector . should_equal [1, 2, 3, 4] + r.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] + r.at "y" . to_vector . should_equal [Nothing, 2, 2, Nothing] + r.at "z" . to_vector . should_equal [Nothing, 3, 3, Nothing] + + r2 = Data.read_many (t.at "Path") return=..As_Merged_Table + r2.should_be_a Table + within_table r2 <| + r2.column_names . should_equal ["Path", "x", "y", "z"] + r2.at "Path" . map .name . to_vector . should_equal ["1.csv", "1.csv"] + r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] + r2.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] + + v3 = Data.read_many (t.at "Path") return=..As_Vector + v3.at 0 . should_equal Nothing + v3.at 1 . should_be_a Table + v3.at 2 . should_be_a Table + v3.at 3 . should_equal Nothing all_null = Column.from_vector "C" [Nothing] r4 = Data.read_many all_null return=..As_Vector From d8068b9d954b44a346a1785ad21a1f2b28c24458 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 21:03:58 +0100 Subject: [PATCH 14/42] postgres --- .../0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso | 2 ++ 1 file changed, 2 insertions(+) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso index eac2d1fd9e76..23729eed7e12 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso @@ -64,6 +64,8 @@ type Postgres_Type_Mapping but we may also consider using the standard SQL `bit(n)` and `bit varying(n)` types. See: https://www.postgresql.org/docs/current/datatype-bit.html SQL_Type.Value Types.BINARY "bytea" precision=max_precision + Value_Type.Null -> + SQL_Type.Value Types.NULL "null" Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "Postgres") Value_Type.Unsupported_Data_Type type_name underlying_type -> From 95e4c73841c32b16654b98670b67a5cc79dc4862 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 21:07:21 +0100 Subject: [PATCH 15/42] re enable some union tests --- .../src/Common_Table_Operations/Join/Union_Spec.enso | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso index 1f7ce44523bf..b5807b118f34 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso @@ -531,7 +531,7 @@ run_union_tests group_builder setup call_union = Problems.test_problem_handling action problems tester # This test is not run on DB as all-null columns will not be Mixed there. - if setup.is_database.not then group_builder.specify "if no common type can be found, will fall back to converting all types to text and warn (all-null columns edge case)" pending="TODO Value_Type.Null #6281" <| + if setup.is_database.not then group_builder.specify "if no common type can be found, will fall back to converting all types to text and warn (all-null columns edge case)" <| # If a column is all-null, it will often have Mixed type. But that should not prevent the union from falling into the mixed type. t1 = table_builder [["A", [Nothing]]] t2 = table_builder [["A", [2, 3]]] @@ -547,7 +547,7 @@ run_union_tests group_builder setup call_union = t.at "A" . to_vector . should_equal [Nothing, '2', '3', Nothing, Nothing, 'a', 'b', Nothing] t.at "A" . value_type . is_text . should_be_true - if setup.is_database.not then group_builder.specify "all-Nothing column should not influence result type, unless it had a type explicitly given to it" pending="TODO Value_Type.Null #6281" <| + if setup.is_database.not then group_builder.specify "all-Nothing column should not influence result type, unless it had a type explicitly given to it" <| t1 = table_builder [["A", [Nothing]]] t2 = table_builder [["A", [2, 3]]] From 539a8fdcbdbca327e525eb54129dfb747efb5847 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Fri, 29 Nov 2024 21:07:27 +0100 Subject: [PATCH 16/42] re enable some agg tests --- .../src/In_Memory/Aggregate_Column_Spec.enso | 31 ++++++------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso index 99e51abe26e7..3f728d0a8442 100644 --- a/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Aggregate_Column_Spec.enso @@ -66,50 +66,43 @@ add_specs suite_builder = suite_builder.group "Aggregate Columns" group_builder- test_aggregator data.simple_table (Count_Empty -1) "Count Empty text" 3 test_aggregator data.simple_table (Count_Empty -1 test_name) test_name 3 test_aggregator data.simple_table (Count_Empty "text" test_name) test_name 3 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Count_Empty 0 test_name) test_name data.empty_table.row_count + test_aggregator data.empty_table (Count_Empty 0 test_name) test_name data.empty_table.row_count group_builder.specify "should be able to count non empties in a set of Texts" <| test_aggregator data.simple_table (Count_Not_Empty -1) "Count Not Empty text" 2 test_aggregator data.simple_table (Count_Not_Empty -1 test_name) test_name 2 test_aggregator data.simple_table (Count_Not_Empty "text" test_name) test_name 2 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Count_Not_Empty 0 test_name) test_name data.empty_table.row_count + test_aggregator data.empty_table (Count_Not_Empty 0 test_name) test_name data.empty_table.row_count group_builder.specify "should be able to total a set of values" <| test_aggregator data.simple_table (Sum -2) "Sum float" 12.1 test_aggregator data.simple_table (Sum -2 test_name) test_name 12.1 test_aggregator data.simple_table (Sum "float" test_name) test_name 12.1 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Sum 0 test_name) test_name Nothing + test_aggregator data.empty_table (Sum 0 test_name) test_name Nothing group_builder.specify "should be able to average a set of values" <| test_aggregator data.simple_table (Average -2) "Average float" 3.025 0.000001 test_aggregator data.simple_table (Average -2 test_name) test_name 3.025 0.000001 test_aggregator data.simple_table (Average "float" test_name) test_name 3.025 0.000001 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Average 0 test_name) test_name Nothing + test_aggregator data.empty_table (Average 0 test_name) test_name Nothing group_builder.specify "should be able to compute standard deviation a set of values" <| test_aggregator data.simple_table (Standard_Deviation -2) "Standard Deviation float" 1.977161 0.000001 test_aggregator data.simple_table (Standard_Deviation -2 test_name) test_name 1.977161 0.000001 test_aggregator data.simple_table (Standard_Deviation "float" test_name) test_name 1.977161 0.000001 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Standard_Deviation 0 test_name) test_name Nothing + test_aggregator data.empty_table (Standard_Deviation 0 test_name) test_name Nothing group_builder.specify "should be able to compute standard deviation of a population a set of values" <| test_aggregator data.simple_table (Standard_Deviation -2 population=True) "Standard Deviation float" 1.712271 0.000001 test_aggregator data.simple_table (Standard_Deviation -2 test_name population=True) test_name 1.712271 0.000001 test_aggregator data.simple_table (Standard_Deviation "float" test_name population=True) test_name 1.712271 0.000001 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Standard_Deviation 0 test_name population=True) test_name Nothing + test_aggregator data.empty_table (Standard_Deviation 0 test_name population=True) test_name Nothing group_builder.specify "should be able to compute median a set of values" <| test_aggregator data.simple_table (Median -2) "Median float" 2.75 0.000001 test_aggregator data.simple_table (Median -2 test_name) test_name 2.75 0.000001 test_aggregator data.simple_table (Median "float" test_name) test_name 2.75 0.000001 - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Median 0 test_name) test_name Nothing + test_aggregator data.empty_table (Median 0 test_name) test_name Nothing group_builder.specify "should be able to compute first of a set of values including missing" <| test_aggregator data.simple_table (First 1 ignore_nothing=False) "First is_valid" Nothing @@ -139,8 +132,7 @@ add_specs suite_builder = suite_builder.group "Aggregate Columns" group_builder- test_aggregator data.simple_table (Concatenate -1 "" ',' '[' ']' '"') "Concatenate text" '[A,"",,"B,C",]' test_aggregator data.simple_table (Concatenate -1 test_name) test_name 'AB,C' test_aggregator data.simple_table (Concatenate "text" test_name ',') test_name 'A,,,B,C,' - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Concatenate 0 test_name) test_name Nothing + test_aggregator data.empty_table (Concatenate 0 test_name) test_name Nothing group_builder.specify "should be able to count distinct items on a single set of values" <| test_aggregator data.simple_table (Count_Distinct 0) "Count Distinct count" 4 @@ -171,15 +163,13 @@ add_specs suite_builder = suite_builder.group "Aggregate Columns" group_builder- test_aggregator data.simple_table (Shortest -1) "Shortest text" "" test_aggregator data.simple_table (Shortest -1 test_name) test_name "" test_aggregator data.simple_table (Shortest "text" test_name) test_name "" - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Shortest 0 test_name) test_name Nothing + test_aggregator data.empty_table (Shortest 0 test_name) test_name Nothing group_builder.specify "should be able to get the longest of a set of texts" <| test_aggregator data.simple_table (Longest -1) "Longest text" "B,C" test_aggregator data.simple_table (Longest -1 test_name) test_name "B,C" test_aggregator data.simple_table (Longest "text" test_name) test_name "B,C" - # TODO [RW] Re-enable this once #6281 is implemented. - # test_aggregator data.empty_table (Longest 0 test_name) test_name Nothing + test_aggregator data.empty_table (Longest 0 test_name) test_name Nothing group_builder.specify "should be able to get the mode of a set of numbers" <| mode_table = Table.new [["tests", [1,2,3,4,2,4,1,2,3,4,2,1,3,5,2,1,2,4,5,2,1,2,3,5,6,1,2,2]]] @@ -200,4 +190,3 @@ main filter=Nothing = suite = Test.build suite_builder-> add_specs suite_builder suite.run_with_filter filter - From bebe7f2e473e101b4731f4389d54be0c4abee9ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 14:59:48 +0100 Subject: [PATCH 17/42] fix a tset --- .../src/Common_Table_Operations/Join/Union_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso index b5807b118f34..f318d8ccf454 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Join/Union_Spec.enso @@ -539,7 +539,7 @@ run_union_tests group_builder setup call_union = t4 = table_builder [["A", ['a', 'b']]] t5 = table_builder [["A", [Nothing]]] - t1.at "A" . value_type . should_equal Value_Type.Mixed + t1.at "A" . value_type . should_equal Value_Type.Null setup.expect_integer_type <| t2.at "A" t = call_union [t1, t2, t3, t4, t5] From ce8bcdaca8baceb37a47d72b22aa5caa642298d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 14:59:59 +0100 Subject: [PATCH 18/42] fix a typo - else in wrong place... --- .../java/org/enso/table/data/column/builder/LongBuilder.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java index fa716d716e94..de4a5f677ede 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilder.java @@ -110,14 +110,14 @@ public void appendBulkStorage(Storage storage) { data[currentSize++] = ToIntegerStorageConverter.booleanAsLong(boolStorage.getItem(i)); } } - } else if (storage.getType() instanceof NullType) { - appendNulls(storage.size()); } else { throw new IllegalStateException( "Unexpected storage implementation for type BOOLEAN: " + storage + ". This is a bug in the Table library."); } + } else if (storage.getType() instanceof NullType) { + appendNulls(storage.size()); } else { throw new StorageTypeMismatchException(getType(), storage.getType()); } From b4cde03e6ae9da89e0fdef314b55b33524408382 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 15:00:18 +0100 Subject: [PATCH 19/42] fmt --- .../enso/table/data/column/builder/Builder.java | 3 +-- .../table/data/column/builder/NullBuilder.java | 5 +++-- .../operation/cast/ToBigDecimalConverter.java | 3 ++- .../operation/cast/ToBigIntegerConverter.java | 3 ++- .../operation/cast/ToBooleanStorageConverter.java | 3 ++- .../operation/cast/ToDateStorageConverter.java | 3 ++- .../cast/ToDateTimeStorageConverter.java | 3 ++- .../operation/cast/ToFloatStorageConverter.java | 3 ++- .../operation/cast/ToIntegerStorageConverter.java | 3 ++- .../operation/cast/ToTextStorageConverter.java | 3 ++- .../cast/ToTimeOfDayStorageConverter.java | 3 ++- .../table/data/column/storage/NullStorage.java | 15 ++++++++------- .../data/column/storage/type/StorageType.java | 1 - 13 files changed, 30 insertions(+), 21 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java index c66eda484540..800999b43e0f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/Builder.java @@ -1,5 +1,6 @@ package org.enso.table.data.column.builder; +import java.util.Objects; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.BigDecimalType; @@ -15,8 +16,6 @@ import org.enso.table.data.column.storage.type.TimeOfDayType; import org.enso.table.problems.ProblemAggregator; -import java.util.Objects; - /** A builder for creating columns dynamically. */ public abstract class Builder { /** diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java index ecced32b5501..b74c75f1a1d4 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/NullBuilder.java @@ -13,7 +13,7 @@ public NullBuilder() {} @Override public void appendNoGrow(Object o) { if (o != null) { - throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+o); + throw new IllegalArgumentException("NullBuilder can only append nulls, but got " + o); } length++; @@ -35,7 +35,8 @@ public void appendBulkStorage(Storage storage) { if (!(storage.getType() instanceof NullType)) { for (int i = 0; i < storage.size(); i++) { if (!storage.isNothing(i)) { - throw new IllegalArgumentException("NullBuilder can only append nulls, but got "+storage.getItemBoxed(i)); + throw new IllegalArgumentException( + "NullBuilder can only append nulls, but got " + storage.getItemBoxed(i)); } } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java index 9998e8115827..d3cb6035323e 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigDecimalConverter.java @@ -26,7 +26,8 @@ public Storage cast(Storage storage, CastProblemAggregator proble return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BoolStorage boolStorage) { return convertBoolStorage(boolStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java index cfcc0eb0fd3e..352d8e44de7b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBigIntegerConverter.java @@ -26,7 +26,8 @@ public Storage cast(Storage storage, CastProblemAggregator proble return convertBoolStorage(boolStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java index d57864ec23fb..253c9233d8b1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToBooleanStorageConverter.java @@ -12,7 +12,8 @@ public class ToBooleanStorageConverter implements StorageConverter { public Storage cast(Storage storage, CastProblemAggregator problemAggregator) { if (storage instanceof BoolStorage boolStorage) { return boolStorage; - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java index b23000d5f37f..b183543914ce 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateStorageConverter.java @@ -17,7 +17,8 @@ public Storage cast(Storage storage, CastProblemAggregator problem return dateStorage; } else if (storage instanceof DateTimeStorage dateTimeStorage) { return convertDateTimeStorage(dateTimeStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java index 1f415fa2632e..ba859de444d3 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToDateTimeStorageConverter.java @@ -18,7 +18,8 @@ public Storage cast(Storage storage, CastProblemAggregator pro return dateTimeStorage; } else if (storage instanceof DateStorage dateStorage) { return convertDateStorage(dateStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java index 9e0cbc07b816..d7902f4d08a1 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToFloatStorageConverter.java @@ -37,7 +37,8 @@ public Storage cast(Storage storage, CastProblemAggregator problemAgg return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java index 087d193b4efd..4be10abd6e0c 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToIntegerStorageConverter.java @@ -42,7 +42,8 @@ public Storage cast(Storage storage, CastProblemAggregator problemAggre return convertBigIntegerStorage(bigIntegerStorage, problemAggregator); } else if (storage instanceof BigDecimalStorage bigDecimalStorage) { return convertBigDecimalStorage(bigDecimalStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java index 1efc67ddd885..e7f6317b759a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java @@ -48,7 +48,8 @@ public Storage cast(Storage storage, CastProblemAggregator problemAgg return castDateTimeStorage(dateStorage, this::convertDate, problemAggregator); } else if (storage instanceof DateTimeStorage dateTimeStorage) { return castDateTimeStorage(dateTimeStorage, this::convertDateTime, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java index 5d869efa3fdc..58a1b664ea08 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTimeOfDayStorageConverter.java @@ -17,7 +17,8 @@ public Storage cast(Storage storage, CastProblemAggregator problem return timeOfDayStorage; } else if (storage instanceof DateTimeStorage dateTimeStorage) { return convertDateTimeStorage(dateTimeStorage, problemAggregator); - } else if (storage.getType() instanceof AnyObjectType || storage.getType() instanceof NullType) { + } else if (storage.getType() instanceof AnyObjectType + || storage.getType() instanceof NullType) { return castFromMixed(storage, problemAggregator); } else { throw new IllegalStateException( diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java index 986bb87d4993..0df9c0722258 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java @@ -1,14 +1,13 @@ package org.enso.table.data.column.storage; +import java.util.BitSet; +import java.util.List; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; -import java.util.BitSet; -import java.util.List; - /** A specialized storage that can be used by columns that contain only null values. */ public class NullStorage extends Storage { private final int size; @@ -43,13 +42,15 @@ public boolean isBinaryOpVectorized(String name) { } @Override - public Storage runVectorizedBinaryMap(String name, Object argument, MapOperationProblemAggregator problemAggregator) { - throw new IllegalArgumentException("Operation "+name+" is not vectorized for NullStorage"); + public Storage runVectorizedBinaryMap( + String name, Object argument, MapOperationProblemAggregator problemAggregator) { + throw new IllegalArgumentException("Operation " + name + " is not vectorized for NullStorage"); } @Override - public Storage runVectorizedZip(String name, Storage argument, MapOperationProblemAggregator problemAggregator) { - throw new IllegalArgumentException("Operation "+name+" is not vectorized for NullStorage"); + public Storage runVectorizedZip( + String name, Storage argument, MapOperationProblemAggregator problemAggregator) { + throw new IllegalArgumentException("Operation " + name + " is not vectorized for NullStorage"); } @Override diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java index 59e43f95de92..bf8abe23ceab 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java @@ -6,7 +6,6 @@ import java.time.LocalDateTime; import java.time.LocalTime; import java.time.ZonedDateTime; - import org.enso.base.polyglot.NumericConverter; /** From 6d016f05f6b3a378b4a458508a78aea23cc73d80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 17:16:33 +0100 Subject: [PATCH 20/42] fix parser - empty column gets Null type --- .../java/org/enso/table/parsing/TypeInferringParser.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java index 51691148abc1..75b1243808a6 100644 --- a/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java +++ b/std-bits/table/src/main/java/org/enso/table/parsing/TypeInferringParser.java @@ -2,6 +2,7 @@ import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.operation.CountNothing; +import org.enso.table.data.column.storage.NullStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.parsing.problems.CommonParseProblemAggregator; import org.enso.table.parsing.problems.ParseProblemAggregator; @@ -43,10 +44,10 @@ public Object parseSingleValue(String text, ParseProblemAggregator problemAggreg public Storage parseColumn( Storage sourceStorage, CommonParseProblemAggregator problemAggregator) { // If there are no values, the Auto parser would guess some random type (the first one that is - // checked). Instead, we just return the empty column unchanged. + // checked). Instead, we return a Null-type column. boolean hasNoValues = (sourceStorage.size() == 0) || CountNothing.allNothing(sourceStorage); if (hasNoValues) { - return fallbackParser.parseColumn(sourceStorage, problemAggregator); + return new NullStorage(sourceStorage.size()); } Context context = Context.getCurrent(); From a1d17745dc094d3ce9e501880dd1f517f46b7283 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 19:57:51 +0100 Subject: [PATCH 21/42] WIP read many edge cases --- .../lib/Standard/Base/0.0.0-dev/src/Data.enso | 10 ++- .../src/Data/Read/Many_Files_List.enso | 9 +++ .../0.0.0-dev/src/Data/Read/Return_As.enso | 18 ++++- .../Base/0.0.0-dev/src/Errors/Common.enso | 10 +++ .../Read_Many_As_Merged_Table_Strategy.enso | 10 ++- .../src/Internal/Read_Many_Helpers.enso | 7 +- .../Base_Tests/src/System/File_Read_Spec.enso | 13 ++-- .../Conversion_Spec.enso | 5 ++ test/Table_Tests/src/IO/Read_Many_Spec.enso | 73 ++++++++++--------- 9 files changed, 104 insertions(+), 51 deletions(-) diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso index 64a3c3cdcbf9..f510d4cdfc03 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data.enso @@ -4,10 +4,12 @@ import project.Data.Read.Many_Files_List.Many_Files_List import project.Data.Read.Return_As.Return_As import project.Data.Text.Encoding.Encoding import project.Data.Text.Text +import project.Data.Vector.No_Wrap import project.Data.Vector.Vector import project.Enso_Cloud.Data_Link.Data_Link import project.Enso_Cloud.Data_Link_Helpers import project.Error.Error +import project.Errors.Common.Failed_To_Load import project.Errors.Common.Missing_Argument import project.Errors.File_Error.File_Error import project.Errors.Illegal_Argument.Illegal_Argument @@ -144,9 +146,11 @@ read path=(Missing_Argument.throw "path") format=Auto_Detect (on_problems : Prob read_many : Many_Files_List -> File_Format -> Return_As -> Problem_Behavior -> Any ! File_Error read_many (paths : Many_Files_List = Missing_Argument.throw "paths") format=Auto_Detect return=..As_Merged_Table (on_problems : Problem_Behavior = ..Report_Warning) = return_as = Return_As.resolve return - loaded_objects = paths.paths_to_load.map on_problems=on_problems path-> - Data.read path format on_problems - return_as.make_return paths loaded_objects on_problems + if paths.paths_to_load.contains Nothing then Error.throw (Illegal_Argument.Error "The list of paths to load should not contain Nothing. Use `filter` with `..Not_Nothing` to filter out missing entries before calling `read_many`.") else + loaded_objects = paths.paths_to_load.map on_problems=No_Wrap.Value path-> + Data.read path format on_problems . catch Any error-> + Failed_To_Load.Warning path error + return_as.make_return paths loaded_objects on_problems ## ALIAS load text, open text GROUP Input diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Many_Files_List.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Many_Files_List.enso index 22606fe6c4a9..dd9ef4c76b06 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Many_Files_List.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Many_Files_List.enso @@ -7,6 +7,15 @@ import project.Data.Vector.Vector used in `Data.read_many`. type Many_Files_List ## PRIVATE + Arguments: + - original_value: The original value that represents a list of files. + Some return modes may use it as it contains more information than just + list of files (e.g. it can be a source table). + - paths_to_load: A vector of paths to load. This is the fallback that can + be used by any return mode if it does not recognize the original value. + The vector is expected to contain values that can be passed into + `Data.read` (so it can be Text, File, URI or any other kind of file - + e.g. S3_File). Value original_value paths_to_load:Vector ## PRIVATE diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Return_As.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Return_As.enso index eee1b60c541b..b6560c916462 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Return_As.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Read/Return_As.enso @@ -3,10 +3,12 @@ import project.Data.Read.Many_Files_List.Many_Files_List import project.Data.Text.Text import project.Data.Vector.Vector import project.Error.Error +import project.Errors.Common.Failed_To_Load import project.Errors.Common.Type_Error import project.Errors.Illegal_Argument.Illegal_Argument import project.Errors.Problem_Behavior.Problem_Behavior import project.Function.Function +import project.Meta import project.Metadata.Display import project.Metadata.Widget import project.Nothing.Nothing @@ -33,7 +35,7 @@ type Return_As to_display_text self -> Text = self.underlying.to_display_text ## PRIVATE - make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) = + make_return self (input : Many_Files_List) (objects : Vector (Any | Failed_To_Load)) (on_problems : Problem_Behavior) = self.underlying.make_return input objects on_problems ## PRIVATE @@ -77,9 +79,17 @@ type Return_As_Base Panic.catch Type_Error (value:Return_As_Base) _->Nothing ## PRIVATE - make_return self (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) = - _ = [input, on_problems] - objects + make_return self (input : Many_Files_List) (objects : Vector (Any | Failed_To_Load)) (on_problems : Problem_Behavior) = + _ = input + replace_with_nothing_and_propagate objects on_problems + +## PRIVATE + A helper method that takes a Vector and replaces `Failed_To_Load` with `Nothing`, raising them as warnings. +replace_with_nothing_and_propagate (vector : Vector (Any | Failed_To_Load)) (on_problems : Problem_Behavior) = + failed = vector.filter o-> o.is_a Failed_To_Load + if failed.is_empty then vector else + on_problems.attach_problems_before failed <| + vector.map o-> if o.is_a Failed_To_Load then Nothing else o ## PRIVATE Return_As.from (that : Return_As_Base) = diff --git a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Common.enso b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Common.enso index 9ee140e3320b..2fd69d7f2879 100644 --- a/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Common.enso +++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Errors/Common.enso @@ -607,3 +607,13 @@ type Floating_Point_Equality "(Error (location = "+location+"))" Floating_Point_Equality.Used_As_Dictionary_Key value -> "(Used_As_Dictionary_Key (value = "+value.to_text+"))" + +## A warning indicating that a file failed to be loaded. +type Failed_To_Load + ## PRIVATE + Warning path cause + + ## PRIVATE + Create a human-readable version of the error. + to_display_text : Text + to_display_text self = "Failed to load file at path: "+self.path.to_display_text+": "+self.cause.to_display_text diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_As_Merged_Table_Strategy.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_As_Merged_Table_Strategy.enso index 3c7dd6e1f047..31909b00002e 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_As_Merged_Table_Strategy.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_As_Merged_Table_Strategy.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Errors.Common.Failed_To_Load import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import project.Columns_To_Keep.Columns_To_Keep @@ -39,11 +40,18 @@ Read_Many_As_Merged_Table_Strategy.from (that : Vector) = (No_Rows.Warning "The "+path.to_display_text+" loaded as an empty array, so it is not included in the `As_Merged_Table` result of `read_many`.") Read_Many_As_Merged_Table_Strategy.Value callback +## PRIVATE +Read_Many_As_Merged_Table_Strategy.from (that : Failed_To_Load) = + callback path _ _ on_problems = + on_problems.attach_problem_after Read_Many_As_Table_Result.No_Data <| + (No_Rows.Warning "The "+path.to_display_text+" failed to load, so it is not included in the `As_Merged_Table` result of `read_many`. Caused by: "+that.cause.to_display_text) + Read_Many_As_Merged_Table_Strategy.Value callback + ## PRIVATE The fallback strategy for converting a generic object into a table. Custom data types may implement a conversion to override this strategy, like above. Read_Many_As_Merged_Table_Strategy.from (that : Any) = - callback path _ _ on_problems = + callback path _ _ on_problems = Read_Many_As_Table_Result.Table metadata=Nothing data=(_interpret_as_table that path on_problems) Read_Many_As_Merged_Table_Strategy.Value callback diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso index 40feb019f4aa..e665033bb56b 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Read_Many_Helpers.enso @@ -2,7 +2,9 @@ private from Standard.Base import all import Standard.Base.Data.Read.Many_Files_List.Many_Files_List +import Standard.Base.Errors.Common.Failed_To_Load import Standard.Base.Errors.Illegal_Argument.Illegal_Argument +from Standard.Base.Data.Read.Return_As import replace_with_nothing_and_propagate import project.Column.Column import project.Errors.Invalid_Value_Type @@ -36,11 +38,12 @@ ensure_column_type_valid_to_be_files_list (column : Column) ~action = if is_expected_type then action else Error.throw (Invalid_Value_Type.Column "Text or Mixed" column.value_type column.name) -make_return (return_shape : Return_As_Table) (input : Many_Files_List) (objects : Vector Any) (on_problems : Problem_Behavior) -> Table = +make_return (return_shape : Return_As_Table) (input : Many_Files_List) (objects : Vector (Any | Failed_To_Load)) (on_problems : Problem_Behavior) -> Table = base_table = _input_as_table input case return_shape of Return_As_Table.With_New_Column -> - _add_objects_column base_table objects + replaced = replace_with_nothing_and_propagate objects on_problems + _add_objects_column base_table replaced Return_As_Table.As_Merged_Table columns_to_keep match_columns -> tables = input.paths_to_load.zip objects path-> object-> strategy = Read_Many_As_Merged_Table_Strategy.from object diff --git a/test/Base_Tests/src/System/File_Read_Spec.enso b/test/Base_Tests/src/System/File_Read_Spec.enso index 96ea66ddce6a..77bbdf31091f 100644 --- a/test/Base_Tests/src/System/File_Read_Spec.enso +++ b/test/Base_Tests/src/System/File_Read_Spec.enso @@ -1,5 +1,6 @@ from Standard.Base import all import Standard.Base.Data.Vector.Map_Error +import Standard.Base.Errors.Common.Failed_To_Load import Standard.Base.Errors.Encoding_Error.Encoding_Error import Standard.Base.Errors.File_Error.File_Error import Standard.Base.Errors.Illegal_Argument.Illegal_Argument @@ -126,11 +127,7 @@ add_specs suite_builder = group_builder.specify "should allow to Report_Error if any file fails to load" <| r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Error # The error reports as File_Error - r1.should_fail_with File_Error - # But it's actually Map_Error with index metadata - r1.should_fail_with unwrap_errors=False Map_Error - r1.catch.index . should_equal 1 - r1.catch.inner_error.should_be_a File_Error.Not_Found + r1.should_fail_with Failed_To_Load group_builder.specify "should allow to Ignore errors if any file fails to load" <| r1 = Data.read_many three_files return=..As_Vector on_problems=..Ignore @@ -140,13 +137,17 @@ add_specs suite_builder = group_builder.specify "should allow to continue loading if errors are encountered, but report them as warnings" <| r1 = Data.read_many three_files return=..As_Vector on_problems=..Report_Warning r1.should_equal [js_object, Nothing, "Hello World!"] - Problems.expect_only_warning File_Error r1 + Problems.expect_only_warning Failed_To_Load r1 group_builder.specify "should return empty vector if no files were provided" <| r1 = Data.read_many [] return=..As_Vector r1.should_equal [] Problems.assume_no_problems r1 + group_builder.specify "should raise an error if input vector contains Nothing" <| + r1 = Data.read_many [Nothing] return=..As_Vector + r1.should_fail_with Illegal_Argument + main filter=Nothing = suite = Test.build suite_builder-> add_specs suite_builder diff --git a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso index fb90cec709ea..d1f675752d68 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso @@ -352,6 +352,11 @@ add_conversion_specs suite_builder setup = r1 = t.at "X" . cast Value_Type.Boolean r1.should_fail_with Illegal_Argument + group_builder.specify "does not allow to cast a column to Null type" <| + t = table_builder [["X", [1, 2]]] + r1 = t.at "X" . cast Value_Type.Null + r1.should_fail_with Illegal_Argument + group_builder.specify "should report an error pointing to the Table.parse method where applicable" <| t = table_builder [["X", ["1", "2", "3"]]] r1 = t.at "X" . cast Value_Type.Integer diff --git a/test/Table_Tests/src/IO/Read_Many_Spec.enso b/test/Table_Tests/src/IO/Read_Many_Spec.enso index 51557df19f1f..66196ce01623 100644 --- a/test/Table_Tests/src/IO/Read_Many_Spec.enso +++ b/test/Table_Tests/src/IO/Read_Many_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Errors.Common.Failed_To_Load import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import all @@ -160,7 +161,6 @@ add_specs suite_builder = '"str"'.write (base_dir / "7_js_string.json") files = Data.list base_dir . sort on=(.name) - IO.println (Meta.type_of files.first) r = Data.read_many files r.should_be_a Table @@ -320,43 +320,46 @@ add_specs suite_builder = t3.row_count . should_equal 0 t3.column_names . should_equal ["Path", "Value"] - group_builder.specify "works correctly if Nothing entries are encountered in the input column" <| + group_builder.specify "does not allow Nothing in Path column" <| + t = Table.new [["A", [1, 2]], ["Path", [Nothing, Nothing]]] + r1 = Data.read_many t return=..As_Merged_Table + r1.should_fail_with Illegal_Argument + + r2 = Data.read_many t return=..With_New_Column + r2.should_fail_with Illegal_Argument + + r3 = Data.read_many t return=..As_Vector + r3.should_fail_with Illegal_Argument + + r4 = Data.read_many (t.at "Path") + r4.should_fail_with Illegal_Argument + + r5 = Data.read_many [Nothing] + r5.should_fail_with Illegal_Argument + + group_builder.specify "in As_Merged_Table mode, will discard rows associated with files that fail to load" <| with_temp_dir base_dir-> - f = base_dir / "1.csv" - 'x,y,z\n1,2,3'.write f . should_succeed - t = Table.new [["A", [1, 2, 3, 4]], ["Path", [Nothing, f, f, Nothing]]] - r = Data.read_many t return=..As_Merged_Table + 'A,B\n1,2'.write (base_dir / "1_table.csv") + + files = [base_dir / "1_table.csv", base_dir / "nonexistent.csv"] + r = Data.read_many files r.should_be_a Table - within_table r <| - r.column_names . should_equal ["Path", "A", "x", "y", "z"] - r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] - r.at "A" . to_vector . should_equal [1, 2, 3, 4] - r.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] - r.at "y" . to_vector . should_equal [Nothing, 2, 2, Nothing] - r.at "z" . to_vector . should_equal [Nothing, 3, 3, Nothing] - - r2 = Data.read_many (t.at "Path") return=..As_Merged_Table + r.row_count . should_equal 1 + r.at "Path" . map .name . to_vector . should_equal ["1_table.csv"] + + w = Problems.expect_only_warning No_Rows r + w.to_display_text . should_contain "nonexistent.csv" + w.to_display_text . should_contain "does not exist." + + r2 = Data.read_many files return=..With_New_Column r2.should_be_a Table - within_table r2 <| - r2.column_names . should_equal ["Path", "x", "y", "z"] - r2.at "Path" . map .name . to_vector . should_equal ["1.csv", "1.csv"] - r.at "Path" . map (p-> p.if_not_nothing p.name) . to_vector . should_equal [Nothing, "1.csv", "1.csv", Nothing] - r2.at "x" . to_vector . should_equal [Nothing, 1, 1, Nothing] - - v3 = Data.read_many (t.at "Path") return=..As_Vector - v3.at 0 . should_equal Nothing - v3.at 1 . should_be_a Table - v3.at 2 . should_be_a Table - v3.at 3 . should_equal Nothing - - all_null = Column.from_vector "C" [Nothing] - r4 = Data.read_many all_null return=..As_Vector - r4.at 0 . should_equal Nothing - - r5 = Data.read_many all_null return=..As_Merged_Table - r5.should_be_a Table - r5.column_names . should_equal ["C"] - r5.at "C" . to_vector . should_equal [Nothing] + r2.row_count . should_equal 2 + r2.at "Path" . map .name . to_vector . should_equal ["1_table.csv", "nonexistent.csv"] + r2.at "Value" . at 0 . should_be_a Table + r2.at "Value" . at 1 . should_equal Nothing + + w2 = Problems.expect_only_warning Failed_To_Load r2 + w2.to_display_text . should_contain "nonexistent.csv" group_builder.specify "should have sane behaviour if all files are weird" <| with_temp_dir base_dir-> From 643e3247b5812fb6cb4594c19a458ad22a7e409a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 20:07:56 +0100 Subject: [PATCH 22/42] update spec of null ops? --- .../Expression_Spec.enso | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index d01ff5e0df13..d966dd75c350 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -167,17 +167,18 @@ add_expression_specs suite_builder detailed setup = group_builder.specify "a null column may fail typechecks that expect a concrete type" <| t = table_builder [["X", [1, 2, 3]]] c = t.evaluate_expression "null" - (c + c) . should_fail_with Illegal_Argument - (c - c) . should_fail_with Invalid_Value_Type - (c.starts_with "X") . should_fail_with Invalid_Value_Type - c.not . should_fail_with Invalid_Value_Type - - t.evaluate_expression "not(Nothing)" . should_fail_with Invalid_Value_Type - t.evaluate_expression "Nothing + Nothing" . should_fail_with Illegal_Argument - t.evaluate_expression "Nothing * Nothing" . should_fail_with Invalid_Value_Type - - t.evaluate_expression "[X] + Nothing" . to_vector . should_equal [Nothing, Nothing, Nothing] - t.evaluate_expression "Nothing + [X]" . should_fail_with Illegal_Argument + nulls = [Nothing, Nothing, Nothing] + c.not . to_vector . should_equal nulls + (c + c) . to_vector . should_equal nulls + (c - c) . to_vector . should_equal nulls + (c.starts_with "X") . to_vector . should_equal nulls + + t.evaluate_expression "not(Nothing)" . to_vector . should_equal nulls + t.evaluate_expression "Nothing + Nothing" . to_vector . should_equal nulls + t.evaluate_expression "Nothing * Nothing" . to_vector . should_equal nulls + + t.evaluate_expression "[X] + Nothing" . to_vector . should_equal nulls + t.evaluate_expression "Nothing + [X]" . to_vector . should_equal nulls suite_builder.group prefix+"Expression Date and Time literals" group_builder-> specify_test "should be able to add a date or time column" group_builder pending=pending_datetime expression_test-> From 536068f1c9ef49b010debaffdbd0b1d37fc08707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Mon, 2 Dec 2024 20:08:17 +0100 Subject: [PATCH 23/42] update some tests --- .../Table_Tests/src/Formatting/Parse_Values_Spec.enso | 11 +++++------ .../src/In_Memory/Split_Tokenize_Spec.enso | 10 +++++----- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso index a7f0669b17a4..efb095606b27 100644 --- a/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso +++ b/test/Table_Tests/src/Formatting/Parse_Values_Spec.enso @@ -613,7 +613,7 @@ add_specs suite_builder = r6 = (c6.drop 1).parse r6.to_vector . should_equal [] - r6.value_type . should_equal Value_Type.Char + r6.value_type . should_equal Value_Type.Null Problems.assume_no_problems r6 r7 = c7.parse @@ -622,7 +622,7 @@ add_specs suite_builder = Problems.assume_no_problems r7 r8 = c8.drop 1 . parse - r8.value_type . should_equal Value_Type.Char + r8.value_type . should_equal Value_Type.Null r8.to_vector . should_equal [Nothing, Nothing, Nothing] Problems.assume_no_problems r8 @@ -647,12 +647,12 @@ add_specs suite_builder = c1 = Column.from_vector "A" ["1", "2", "3"] Test.expect_panic Type_Error (c1.parse type=Nothing) - group_builder.specify "should return unchanged if all are Nothing or no rows" <| + group_builder.specify "should return Null column if all are Nothing or no rows" <| c1 = Column.from_vector "A" [Nothing, Nothing, Nothing] Value_Type.Char - c1.parse.value_type . should_equal Value_Type.Char + c1.parse.value_type . should_equal Value_Type.Null c2 = Column.from_vector "A" [] Value_Type.Char - c2.parse.value_type . should_equal Value_Type.Char + c2.parse.value_type . should_equal Value_Type.Null group_builder.specify "should error if the input column is not text" <| c1 = Column.from_vector "A" [1, 2, 3] @@ -666,4 +666,3 @@ main filter=Nothing = suite = Test.build suite_builder-> add_specs suite_builder suite.run_with_filter filter - diff --git a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso index bfdb332f3199..c1085b840203 100644 --- a/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Split_Tokenize_Spec.enso @@ -390,19 +390,19 @@ add_specs suite_builder = group_builder.specify "empty table" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "bar", "baz"] [] actual = t.parse_to_columns "bar" "\d+" actual.should_equal expected group_builder.specify "empty table, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected group_builder.specify "empty table, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] . take 0 - expected = Table.from_rows ["foo", "quux", "bar 1", "foo 1", "bar 2", "baz"] [["x", "a", "a", "a", "a", "y"]] . take 0 + expected = Table.from_rows ["foo", "quux", "bar 1", "foo 1", "bar 2", "baz"] [] actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected @@ -419,13 +419,13 @@ add_specs suite_builder = group_builder.specify "input with no matches, with regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", Nothing, Nothing, "y"], ["", "", "", ""]] . take 1 + expected = Table.from_rows ["foo", "bar 1", "bar 2", "baz"] [["x", Nothing, Nothing, "y"]] actual = t.parse_to_columns "bar" "(\d)(\d)" actual.should_equal expected group_builder.specify "input with no matches, with named and unnamed regex groups" <| t = Table.from_rows ["foo", "bar", "baz"] [["x", "a", "y"]] - expected = Table.from_rows ["foo", "quux", "bar 1", "foo 1", "bar 2", "baz"] [["x", Nothing, Nothing, Nothing, Nothing, "y"], ["", "", "", "", "", ""]] . take 1 + expected = Table.from_rows ["foo", "quux", "bar 1", "foo 1", "bar 2", "baz"] [["x", Nothing, Nothing, Nothing, Nothing, "y"]] actual = t.parse_to_columns "bar" "(?)(\d)(?\d)(\d)" actual.should_equal expected From f8be248a4614710eb4013aa6e73af73f8f2a862e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 14:20:03 +0100 Subject: [PATCH 24/42] from_repeated_item test --- test/Table_Tests/src/In_Memory/Column_Spec.enso | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/Table_Tests/src/In_Memory/Column_Spec.enso b/test/Table_Tests/src/In_Memory/Column_Spec.enso index 025c47abb05f..5f89236f3c83 100644 --- a/test/Table_Tests/src/In_Memory/Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Column_Spec.enso @@ -258,6 +258,17 @@ add_specs suite_builder = r2 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=True) r2.should_fail_with Illegal_Argument + group_builder.specify "should allow to create a column of repeated items" <| + r1 = Column.from_repeated_item "X" 123 3 + r1.length . should_equal 3 + r1.to_vector . should_equal [123, 123, 123] + r1.value_type . should_equal Value_Type.Integer + + r2 = Column.from_repeated_item "X" Nothing 3 + r2.length . should_equal 3 + r2.to_vector . should_equal [Nothing, Nothing, Nothing] + r2.value_type . should_equal Value_Type.Null + group_builder.specify "should be able to serialize to Enso code" <| c1 = Column.from_vector "X" [1, 2] Value_Type.Float c1.pretty . should_equal 'Column.from_vector \'X\' [1.0, 2.0]' From 12ad7472c85f936b532f9c4db92e3165ab31b772 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 16:20:17 +0100 Subject: [PATCH 25/42] Null type for null column from repeated --- .../src/main/java/org/enso/table/data/table/Column.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index c5d868d1772b..af48a0930ed2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -5,7 +5,7 @@ import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.builder.InferredBuilder; -import org.enso.table.data.column.builder.MixedBuilder; +import org.enso.table.data.column.storage.NullStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; @@ -172,9 +172,7 @@ public static Column fromRepeatedItem( Object converted = Polyglot_Utils.convertPolyglotValue(item); if (converted == null) { - Builder builder = new MixedBuilder(repeat); - builder.appendNulls(repeat); - return new Column(name, builder.seal()); + return new Column(name, new NullStorage(repeat)); } StorageType storageType = StorageType.forBoxedItem(converted); From 11b8bbf37dc9890eea229a0b0353c4ccb4653456 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 16:22:08 +0100 Subject: [PATCH 26/42] add test for Null column edge cases --- .../src/In_Memory/Column_Spec.enso | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/Table_Tests/src/In_Memory/Column_Spec.enso b/test/Table_Tests/src/In_Memory/Column_Spec.enso index 5f89236f3c83..c023c48ee73d 100644 --- a/test/Table_Tests/src/In_Memory/Column_Spec.enso +++ b/test/Table_Tests/src/In_Memory/Column_Spec.enso @@ -258,6 +258,29 @@ add_specs suite_builder = r2 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=True) r2.should_fail_with Illegal_Argument + group_builder.specify "should allow to create all-null column" <| + # Empty column with no type is also Null + r0 = Column.from_vector "X" [] + r0.length . should_equal 0 + r0.to_vector . should_equal [] + r0.value_type . should_equal Value_Type.Null + + r1 = Column.from_vector "X" [Nothing] + r1.length . should_equal 1 + r1.to_vector . should_equal [Nothing] + r1.value_type . should_equal Value_Type.Null + + # But if a type is specified, it will be of that type. + r2 = Column.from_vector "X" [] Value_Type.Integer + r2.length . should_equal 0 + r2.to_vector . should_equal [] + r2.value_type . should_equal Value_Type.Integer + + r3 = Column.from_vector "X" [Nothing] Value_Type.Float + r3.length . should_equal 1 + r3.to_vector . should_equal [Nothing] + r3.value_type . should_equal Value_Type.Float + group_builder.specify "should allow to create a column of repeated items" <| r1 = Column.from_repeated_item "X" 123 3 r1.length . should_equal 3 From af34a4b416220ee48e8ef1463530d5f3fdcd8e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 16:57:40 +0100 Subject: [PATCH 27/42] not --- .../table/data/column/operation/unary/NotOperation.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/unary/NotOperation.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/unary/NotOperation.java index 124ddb87893b..aa7c999471a4 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/unary/NotOperation.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/unary/NotOperation.java @@ -6,7 +6,9 @@ import org.enso.table.data.column.storage.BoolStorage; import org.enso.table.data.column.storage.ColumnBooleanStorage; import org.enso.table.data.column.storage.ColumnStorage; +import org.enso.table.data.column.storage.NullStorage; import org.enso.table.data.column.storage.type.BooleanType; +import org.enso.table.data.column.storage.type.NullType; public class NotOperation extends AbstractUnaryBooleanOperation { public static final String NAME = "not"; @@ -19,7 +21,7 @@ private NotOperation() { @Override public boolean canApply(ColumnStorage storage) { - return storage.getType() instanceof BooleanType; + return storage.getType() instanceof BooleanType || storage.getType() instanceof NullType; } @Override @@ -29,6 +31,10 @@ public ColumnStorage apply( return boolStorage.makeNegated(); } + if (storage.getType() instanceof NullType) { + return new NullStorage(Math.toIntExact(storage.getSize())); + } + var builder = createBuilder(storage, problemAggregator); if (storage instanceof ColumnBooleanStorage booleanStorage) { UnaryOperation.applyOverBooleanStorage( From a4b901e867a90b963bfad589621f84e7a2ac9088 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 17:15:03 +0100 Subject: [PATCH 28/42] reconciling Null types in various operations --- .../Database/0.0.0-dev/src/DB_Column.enso | 14 +++--- .../Postgres/Postgres_Type_Mapping.enso | 2 +- .../Internal/SQLite/SQLite_Type_Mapping.enso | 2 +- .../src/Internal/Value_Type_Helpers.enso | 44 +++++++++++-------- .../Column_Operations_Spec.enso | 7 ++- .../Expression_Spec.enso | 7 ++- 6 files changed, 43 insertions(+), 33 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso index 167c2e1d3674..e136be528c3c 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/DB_Column.enso @@ -595,12 +595,13 @@ type DB_Column cause a hard error, the value may be truncated or wrap-around etc. + : DB_Column | Any -> DB_Column + self other = - op = case Value_Type_Helpers.resolve_addition_kind self other of - Value_Type_Helpers.Addition_Kind.Numeric_Add -> "ADD_NUMBER" - Value_Type_Helpers.Addition_Kind.Text_Concat -> "ADD_TEXT" - op.if_not_error <| - new_name = self.naming_helper.binary_operation_name "+" self other - self.make_binary_op op other new_name + new_name = self.naming_helper.binary_operation_name "+" self other + case Value_Type_Helpers.resolve_addition_kind self other of + Value_Type_Helpers.Addition_Kind.Numeric_Add -> + self.make_binary_op "ADD_NUMBER" other new_name + Value_Type_Helpers.Addition_Kind.Text_Concat -> + self.make_binary_op "ADD_TEXT" other new_name + Nothing -> self.const Nothing ## ALIAS minus, subtract, time difference GROUP Standard.Base.Operators @@ -630,6 +631,7 @@ type DB_Column self.make_binary_op "-" other Value_Type_Helpers.Subtraction_Kind.Date_Time_Difference -> Error.throw (Unsupported_Database_Operation.Error "Subtracting date/time value") + Nothing -> self.const Nothing ## ALIAS multiply, product, times GROUP Standard.Base.Operators diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso index 23729eed7e12..d3f25cb97151 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso @@ -148,7 +148,7 @@ type Postgres_Type_Mapping simple_types_map = Dictionary.from_vector <| ints = [[Types.SMALLINT, Value_Type.Integer Bits.Bits_16], [Types.BIGINT, Value_Type.Integer Bits.Bits_64], [Types.INTEGER, Value_Type.Integer Bits.Bits_32]] floats = [[Types.DOUBLE, Value_Type.Float Bits.Bits_64], [Types.REAL, Value_Type.Float Bits.Bits_32]] - other = [[Types.DATE, Value_Type.Date], [Types.TIME, Value_Type.Time]] + other = [[Types.DATE, Value_Type.Date], [Types.TIME, Value_Type.Time], [Types.NULL, Value_Type.Null]] ints + floats + other ## PRIVATE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso index 2dc16d761ac3..1ce4016be368 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Type_Mapping.enso @@ -152,7 +152,7 @@ simple_types_map = Dictionary.from_vector <| numerics = [Types.DECIMAL, Types.NUMERIC] . map x-> [x, default_float] strings = [Types.CHAR, Types.VARCHAR] . map x-> [x, default_text] blobs = [Types.BINARY, Types.BLOB, Types.CLOB] . map x-> [x, Value_Type.Binary] - special_types = [[Types.BOOLEAN, Value_Type.Boolean]] + special_types = [[Types.BOOLEAN, Value_Type.Boolean], [Types.NULL, Value_Type.Null]] ints + floats + numerics + strings + blobs + special_types ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 1eafe4789028..226bd5fe3cc0 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -210,18 +210,27 @@ type Addition_Kind ## PRIVATE Text_Concat +private resolve_operation_kind arg1:Any arg2:Any operation_name:Text find_operation_kind:(Value_Type -> Any) -> Any | Nothing = + kinds = [arg1, arg2] + . map on_problems=No_Wrap.Value find_operation_kind + . filter ..Not_Nothing + . distinct + if kinds.is_empty then Nothing else + if kinds.length > 1 then Error.throw (Illegal_Argument.Error ("Cannot perform "+operation_name+" on a pair of values of types " + (find_argument_type arg1).to_display_text + " and " + (find_argument_type arg2).to_display_text + ".")) else + kinds.first + ## PRIVATE A helper which resolves if numeric addition or string concatenation should be used when the a `+` operator is used with the two provided types. It will return an error if the provided types are incompatible. -resolve_addition_kind arg1 arg2 = - type_1 = find_argument_type arg1 - type_2 = find_argument_type arg2 - if type_1.is_numeric && (type_2.is_null || type_2.is_numeric) then Addition_Kind.Numeric_Add else - if type_1.is_text && (type_2.is_null || type_2.is_text) then Addition_Kind.Text_Concat else - Error.throw <| Illegal_Argument.Error <| - if type_2.is_null then "Cannot perform addition on a value of type " + type_1.to_display_text + ". Addition can only be performed if the column is of some numeric type or is text." else - "Cannot perform addition on a pair of values of types " + type_1.to_display_text + " and " + type_2.to_display_text + ". Addition can only be performed if both columns are of some numeric type or are both are text." +resolve_addition_kind arg1 arg2 -> Addition_Kind | Nothing = + find_addition_kind arg = + typ = find_argument_type arg + if typ.is_null then Nothing else + if typ.is_text then Addition_Kind.Text_Concat else + if typ.is_numeric then Addition_Kind.Numeric_Add else + raise_unexpected_type "numeric or text" arg + resolve_operation_kind arg1 arg2 "addition" find_addition_kind ## PRIVATE type Subtraction_Kind @@ -235,17 +244,14 @@ type Subtraction_Kind A helper which resolves if numeric subtraction or date-time difference should be used when the a `-` operator is used with the two provided types. It will return an error if the provided types are incompatible. -resolve_subtraction_kind arg1 arg2 = - type_1 = find_argument_type arg1 - type_2 = find_argument_type arg2 - - if type_1.is_numeric && (type_2.is_null || type_2.is_numeric) then Subtraction_Kind.Numeric_Subtract else - case type_1.is_date_or_time of - True -> - if type_2.is_null || (type_2 == type_1) then Subtraction_Kind.Date_Time_Difference else - raise_unexpected_type type_1 arg2 - False -> - raise_unexpected_type "numeric or date/time" arg1 +resolve_subtraction_kind arg1 arg2 -> Subtraction_Kind | Nothing = + find_subtraction_kind arg = + typ = find_argument_type arg + if typ.is_null then Nothing else + if typ.is_numeric then Subtraction_Kind.Numeric_Subtract else + if typ.is_date_or_time then Subtraction_Kind.Date_Time_Difference else + raise_unexpected_type "numeric or date/time" arg + resolve_operation_kind arg1 arg2 "subtraction" find_subtraction_kind ## PRIVATE Checks that both provided arguments have numeric type and runs the action diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index 9717de67cffe..a99e9d68459d 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -1903,13 +1903,12 @@ add_column_operation_specs suite_builder setup = c = empty.make_constant_column 42 setup.expect_integer_type c - nulls_db_pending = if setup.is_database then "Empty NULL columns are unsupported in the database backends" - group_builder.specify "Should create a column of the correct type on a table with no rows" pending=nulls_db_pending <| + group_builder.specify "Should create a column of the correct type on a table with no rows" <| t = table_builder [["x", ["1", "2", "3"]]] empty = t.take 0 c = empty.make_constant_column Nothing - c.value_type . should_equal Value_Type.Mixed - (empty.set c).at c.name . value_type . should_equal Value_Type.Mixed + c.value_type . should_equal Value_Type.Null + (empty.set c).at c.name . value_type . should_equal Value_Type.Null decimal_db_pending = if setup.is_database then "Decimals are currently not implemented for the Database backend." suite_builder.group prefix+"(Column_Operations_Spec) Decimal" pending=decimal_db_pending group_builder-> diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index d966dd75c350..7448403ff131 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -175,11 +175,14 @@ add_expression_specs suite_builder detailed setup = t.evaluate_expression "not(Nothing)" . to_vector . should_equal nulls t.evaluate_expression "Nothing + Nothing" . to_vector . should_equal nulls - t.evaluate_expression "Nothing * Nothing" . to_vector . should_equal nulls - t.evaluate_expression "[X] + Nothing" . to_vector . should_equal nulls t.evaluate_expression "Nothing + [X]" . to_vector . should_equal nulls + ## Currently some databases (e.g. Postgres) fail on this with "Operator is not unique" so we accept it may fail + r1 = t.evaluate_expression "Nothing * Nothing" . to_vector + if r1.is_error && setup.is_database then r1.should_fail_with SQL_Error else + r1.should_equal nulls + suite_builder.group prefix+"Expression Date and Time literals" group_builder-> specify_test "should be able to add a date or time column" group_builder pending=pending_datetime expression_test-> expression_test "#2020-12-23#" (Date.new 2020 12 23) From 8cbed1bd2842b833f0a9e993794212e1d20d2677 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 17:55:49 +0100 Subject: [PATCH 29/42] implement stub ops for NullStorage so that operations on Null columns can work --- .../Standard/Table/0.0.0-dev/src/Column.enso | 1 + .../src/Internal/Value_Type_Helpers.enso | 2 + .../operation/cast/StorageConverter.java | 2 + .../data/column/storage/NullStorage.java | 176 +++++++++++++++++- .../table/data/column/storage/Storage.java | 29 +++ .../org/enso/table/data/table/Column.java | 21 +-- 6 files changed, 206 insertions(+), 25 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso index 5e8775aa933e..b6199328f733 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Column.enso @@ -536,6 +536,7 @@ type Column run_binary_op self fn other new_name _ -> run_vectorized_binary_op self Java_Storage.Maps.SUB other + Nothing -> self.const Nothing ## ALIAS multiply, product, times GROUP Standard.Base.Operators diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 226bd5fe3cc0..27e602284943 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -210,6 +210,8 @@ type Addition_Kind ## PRIVATE Text_Concat +## Returns the operation kind based on types of the inputs. + If both inputs are Null, it is impossible to tell the kind, so Nothing is returned and the caller may decide what to do. private resolve_operation_kind arg1:Any arg2:Any operation_name:Text find_operation_kind:(Value_Type -> Any) -> Any | Nothing = kinds = [arg1, arg2] . map on_problems=No_Wrap.Value find_operation_kind diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/StorageConverter.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/StorageConverter.java index 17a9835ac0de..020c012f6a2a 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/StorageConverter.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/StorageConverter.java @@ -9,6 +9,7 @@ import org.enso.table.data.column.storage.type.DateType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.column.storage.type.TimeOfDayType; @@ -31,6 +32,7 @@ static StorageConverter fromStorageType(StorageType storageType) { case TimeOfDayType timeOfDayType -> new ToTimeOfDayStorageConverter(); case BigIntegerType bigIntegerType -> new ToBigIntegerConverter(); case BigDecimalType bigDecimalType -> new ToBigDecimalConverter(); + case NullType nullType -> throw new IllegalArgumentException("Cannot cast to Null type."); }; } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java index 0df9c0722258..130b054dbc26 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java @@ -2,15 +2,22 @@ import java.util.BitSet; import java.util.List; +import org.enso.table.data.column.builder.BoolBuilder; +import org.enso.table.data.column.operation.map.BinaryMapOperation; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; +import org.enso.table.data.column.operation.map.MapOperationStorage; import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; +import org.enso.table.error.UnexpectedColumnTypeException; +import org.enso.table.error.UnexpectedTypeException; +import org.graalvm.polyglot.Value; /** A specialized storage that can be used by columns that contain only null values. */ public class NullStorage extends Storage { private final int size; + private final MapOperationStorage ops = buildOps(); public NullStorage(int size) { this.size = size; @@ -36,26 +43,66 @@ public Void getItemBoxed(int idx) { return null; } + private static MapOperationStorage buildOps() { + MapOperationStorage ops = new MapOperationStorage<>(); + ops.add(new NullOp(Maps.EQ)); + ops.add(new NullOp(Maps.LT)); + ops.add(new NullOp(Maps.LTE)); + ops.add(new NullOp(Maps.GT)); + ops.add(new NullOp(Maps.GTE)); + + ops.add(new NullOp(Maps.MUL)); + ops.add(new NullOp(Maps.ADD)); + ops.add(new NullOp(Maps.SUB)); + ops.add(new NullOp(Maps.DIV)); + ops.add(new NullOp(Maps.MOD)); + ops.add(new NullOp(Maps.POWER)); + + ops.add(new NullAndOp()); + ops.add(new NullOrOp()); + + ops.add(new NullOp(Maps.STARTS_WITH)); + ops.add(new NullOp(Maps.ENDS_WITH)); + ops.add(new NullOp(Maps.CONTAINS)); + ops.add(new NullOp(Maps.LIKE)); + ops.add(new NullOp(Maps.TEXT_LEFT)); + ops.add(new NullOp(Maps.TEXT_RIGHT)); + + ops.add(new CoalescingNullOp(Maps.MIN)); + ops.add(new CoalescingNullOp(Maps.MAX)); + + return ops; + } + @Override public boolean isBinaryOpVectorized(String name) { - return false; + return ops.isSupportedBinary(name); } @Override public Storage runVectorizedBinaryMap( String name, Object argument, MapOperationProblemAggregator problemAggregator) { - throw new IllegalArgumentException("Operation " + name + " is not vectorized for NullStorage"); + return ops.runBinaryMap(name, this, argument, problemAggregator); } @Override public Storage runVectorizedZip( String name, Storage argument, MapOperationProblemAggregator problemAggregator) { - throw new IllegalArgumentException("Operation " + name + " is not vectorized for NullStorage"); + return ops.runZip(name, this, argument, problemAggregator); } @Override - public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { - return this; + public boolean isTernaryOpVectorized(String name) { + return ops.isSupportedTernary(name); + } + + @Override + public Storage runVectorizedTernaryMap( + String name, + Object argument0, + Object argument1, + MapOperationProblemAggregator problemAggregator) { + return ops.runTernaryMap(name, this, argument0, argument1, problemAggregator); } @Override @@ -82,4 +129,123 @@ public Storage appendNulls(int count) { public Storage slice(List ranges) { return new NullStorage(SliceRange.totalLength(ranges)); } + + @Override + public Storage fillMissingFromPrevious(BoolStorage missingIndicator) { + return this; + } + + /** A binary operation that always returns null. */ + private static class NullOp extends BinaryMapOperation { + public NullOp(String name) { + super(name); + } + + @Override + public Storage runBinaryMap( + NullStorage storage, Object arg, MapOperationProblemAggregator problemAggregator) { + // We return the same storage as-is, because all lhs arguments are guaranteed to be null. + return storage; + } + + @Override + public Storage runZip( + NullStorage storage, Storage arg, MapOperationProblemAggregator problemAggregator) { + // We return the same storage as-is, because all lhs arguments are guaranteed to be null. + return storage; + } + } + + /** + * A binary operation that always returns the other argument. + * + *

Useful for implementing operations that should return the other argument when the left-hand + * side is null, e.g. min. + */ + private static class CoalescingNullOp extends BinaryMapOperation { + public CoalescingNullOp(String name) { + super(name); + } + + @Override + public Storage runBinaryMap( + NullStorage storage, Object arg, MapOperationProblemAggregator problemAggregator) { + return Storage.fromRepeatedItem(Value.asValue(arg), storage.size(), problemAggregator); + } + + @Override + public Storage runZip( + NullStorage storage, Storage arg, MapOperationProblemAggregator problemAggregator) { + return arg; + } + } + + private abstract static class BoolAndNullOp extends BinaryMapOperation { + public BoolAndNullOp(String name) { + super(name); + } + + protected abstract Boolean doBool(boolean a); + + @Override + public Storage runBinaryMap( + NullStorage storage, Object arg, MapOperationProblemAggregator problemAggregator) { + if (arg == null) { + return new NullStorage(storage.size()); + } else if (arg instanceof Boolean b) { + return Storage.fromRepeatedItem( + Value.asValue(doBool(b)), storage.size(), problemAggregator); + } else { + throw new UnexpectedTypeException("Boolean", arg.toString()); + } + } + + @Override + public Storage runZip( + NullStorage storage, Storage arg, MapOperationProblemAggregator problemAggregator) { + if (arg instanceof BoolStorage boolStorage) { + BoolBuilder builder = new BoolBuilder(storage.size()); + for (int i = 0; i < storage.size(); i++) { + if (boolStorage.isNothing(i)) { + builder.appendNulls(1); + } else { + builder.append(doBool(boolStorage.getItem(i))); + } + } + return builder.seal(); + } else { + throw new UnexpectedColumnTypeException("Boolean"); + } + } + } + + private static class NullAndOp extends BoolAndNullOp { + public NullAndOp() { + super(Maps.AND); + } + + @Override + protected Boolean doBool(boolean a) { + if (a) { + return null; + } else { + return false; + } + } + } + + private static class NullOrOp extends BoolAndNullOp { + public NullOrOp() { + super(Maps.OR); + } + + @Override + protected Boolean doBool(boolean a) { + if (a) { + return true; + } else { + return null; + } + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index c9c8c258fdad..52cbc3165594 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -9,6 +9,7 @@ import org.enso.table.data.column.operation.cast.CastProblemAggregator; import org.enso.table.data.column.operation.cast.StorageConverter; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; +import org.enso.table.data.column.storage.numeric.LongConstantStorage; import org.enso.table.data.column.storage.numeric.LongStorage; import org.enso.table.data.column.storage.type.IntegerType; import org.enso.table.data.column.storage.type.StorageType; @@ -445,4 +446,32 @@ public final Storage cast( public Object getItemAsObject(long index) { return getItemBoxed((int) index); } + + /** Creates a storage containing a single repeated item. */ + public static Storage fromRepeatedItem( + Value item, int repeat, ProblemAggregator problemAggregator) { + if (repeat < 0) { + throw new IllegalArgumentException("Repeat count must be non-negative."); + } + + Object converted = Polyglot_Utils.convertPolyglotValue(item); + + if (converted == null) { + return new NullStorage(repeat); + } + + if (converted instanceof Long longValue) { + return new LongConstantStorage(longValue, repeat); + } + + StorageType storageType = StorageType.forBoxedItem(converted); + Builder builder = Builder.getForType(storageType, repeat, problemAggregator); + Context context = Context.getCurrent(); + for (int i = 0; i < repeat; i++) { + builder.appendNoGrow(converted); + context.safepoint(); + } + + return builder.seal(); + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java index af48a0930ed2..c0f77bead4b8 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Column.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Column.java @@ -5,7 +5,6 @@ import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.builder.InferredBuilder; -import org.enso.table.data.column.storage.NullStorage; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; @@ -165,25 +164,7 @@ public static Column fromItemsNoDateConversion( */ public static Column fromRepeatedItem( String name, Value item, int repeat, ProblemAggregator problemAggregator) { - if (repeat < 0) { - throw new IllegalArgumentException("Repeat count must be non-negative."); - } - - Object converted = Polyglot_Utils.convertPolyglotValue(item); - - if (converted == null) { - return new Column(name, new NullStorage(repeat)); - } - - StorageType storageType = StorageType.forBoxedItem(converted); - Builder builder = Builder.getForType(storageType, repeat, problemAggregator); - Context context = Context.getCurrent(); - for (int i = 0; i < repeat; i++) { - builder.appendNoGrow(converted); - context.safepoint(); - } - - return new Column(name, builder.seal()); + return new Column(name, Storage.fromRepeatedItem(item, repeat, problemAggregator)); } /** From 724ca472369c08ee7d20ee21cdaf7128c9afd954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 18:22:23 +0100 Subject: [PATCH 30/42] fixing tests --- .../src/Internal/Value_Type_Helpers.enso | 9 +++++++- .../data/column/storage/NullStorage.java | 21 ++++++++----------- .../Column_Operations_Spec.enso | 4 ++-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 27e602284943..56ce69fc0bb0 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -253,7 +253,14 @@ resolve_subtraction_kind arg1 arg2 -> Subtraction_Kind | Nothing = if typ.is_numeric then Subtraction_Kind.Numeric_Subtract else if typ.is_date_or_time then Subtraction_Kind.Date_Time_Difference else raise_unexpected_type "numeric or date/time" arg - resolve_operation_kind arg1 arg2 "subtraction" find_subtraction_kind + kind = resolve_operation_kind arg1 arg2 "subtraction" find_subtraction_kind + case kind of + # Additional special logic needed: + Subtraction_Kind.Date_Time_Difference -> + types = [arg1, arg2].map find_argument_type . filter (!= Value_Type.Null) . distinct + # If both types are date-time but they differ, we say second one should be the same as first. + if types.length > 1 then raise_unexpected_type types.first arg2 else kind + _ -> kind ## PRIVATE Checks that both provided arguments have numeric type and runs the action diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java index 130b054dbc26..07bf0a7e3b59 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/NullStorage.java @@ -10,7 +10,6 @@ import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; -import org.enso.table.error.UnexpectedColumnTypeException; import org.enso.table.error.UnexpectedTypeException; import org.graalvm.polyglot.Value; @@ -203,19 +202,17 @@ public Storage runBinaryMap( @Override public Storage runZip( NullStorage storage, Storage arg, MapOperationProblemAggregator problemAggregator) { - if (arg instanceof BoolStorage boolStorage) { - BoolBuilder builder = new BoolBuilder(storage.size()); - for (int i = 0; i < storage.size(); i++) { - if (boolStorage.isNothing(i)) { - builder.appendNulls(1); - } else { - builder.append(doBool(boolStorage.getItem(i))); - } + BoolBuilder builder = new BoolBuilder(storage.size()); + for (int i = 0; i < storage.size(); i++) { + if (arg.isNothing(i)) { + builder.appendNulls(1); + } else if (arg.getItemBoxed(i) instanceof Boolean bool) { + builder.append(doBool(bool)); + } else { + throw new UnexpectedTypeException("Boolean", arg.getItemBoxed(i).toString()); } - return builder.seal(); - } else { - throw new UnexpectedColumnTypeException("Boolean"); } + return builder.seal(); } } diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index a99e9d68459d..8aef02b733cd 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -902,8 +902,8 @@ add_column_operation_specs suite_builder setup = y = t.at "Y" z = t.at "Z" - (x + z) . should_fail_with Illegal_Argument - (x + False) . should_fail_with Illegal_Argument + (x + z) . should_fail_with Invalid_Value_Type + (x + False) . should_fail_with Invalid_Value_Type # Mixing text and integers should not be allowed (x + y) . should_fail_with Illegal_Argument From 7b6ce9f8122a48661693cf8ad4b2316153cfa902 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 18:32:22 +0100 Subject: [PATCH 31/42] type mappings for Postgres & SQLServer --- .../0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso | 2 ++ .../0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso b/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso index e17772517a6f..8caa93925872 100644 --- a/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso +++ b/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso @@ -62,6 +62,7 @@ type SQLServer_Type_Mapping True -> SQL_Type.Value Types.BINARY "VARBINARY" size False -> SQL_Type.Value Types.BINARY "BINARY" size Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "SQLServer") + Value_Type.Null -> SQL_Type.Value Types.NULL "NULL" Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| Illegal_Argument.Error <| "An unsupported SQL type ["+type_name.to_text+"] cannot be converted into an SQL type because it did not contain the SQL metadata needed to reconstruct it." @@ -101,6 +102,7 @@ type SQLServer_Type_Mapping "varbinary" -> Value_Type.Binary size=sql_type.precision variable_length=True "binary" -> Value_Type.Binary size=sql_type.precision variable_length=False _ -> on_unknown_type sql_type + Types.NULL -> Value_Type.Null _ -> case sql_type.name of "datetimeoffset" -> Value_Type.Date_Time with_timezone=True _ -> on_unknown_type sql_type diff --git a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso index 678e9fbfe1f3..8d3ebdffb93e 100644 --- a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso +++ b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso @@ -63,6 +63,7 @@ type Snowflake_Type_Mapping True -> SQL_Type.Value Types.BINARY "binary" False -> SQL_Type.Value Types.BINARY "binary" size Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "Snowflake") + Value_Type.Null -> SQL_Type.Value Types.NULL "NULL" Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| Illegal_Argument.Error <| "An unsupported SQL type ["+type_name.to_text+"] cannot be converted into an SQL type because it did not contain the SQL metadata needed to reconstruct it." @@ -159,7 +160,7 @@ type Snowflake_Type_Mapping ## PRIVATE simple_types_map = Dictionary.from_vector <| floats = [[Types.DOUBLE, Value_Type.Float Bits.Bits_64], [Types.REAL, Value_Type.Float Bits.Bits_64]] - other = [[Types.DATE, Value_Type.Date], [Types.TIME, Value_Type.Time], [Types.BOOLEAN, Value_Type.Boolean]] + other = [[Types.DATE, Value_Type.Date], [Types.TIME, Value_Type.Time], [Types.BOOLEAN, Value_Type.Boolean], [Types.NULL, Value_Type.Null]] floats + other ## PRIVATE From ddea3e4064269b45e22814c0c4dc623c0fc5adee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 18:36:13 +0100 Subject: [PATCH 32/42] test a few more edge cases --- .../Expression_Spec.enso | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index 7448403ff131..260dbca3af09 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -169,19 +169,28 @@ add_expression_specs suite_builder detailed setup = c = t.evaluate_expression "null" nulls = [Nothing, Nothing, Nothing] c.not . to_vector . should_equal nulls + + ## Currently some databases (e.g. Postgres) fail on this with "Operator is not unique" so we accept it may fail + null_or_sql_fail x = + if x.is_error && setup.is_database then x.should_fail_with SQL_Error else + x.should_equal nulls + (c + c) . to_vector . should_equal nulls (c - c) . to_vector . should_equal nulls - (c.starts_with "X") . to_vector . should_equal nulls + (c * c) . to_vector |> null_or_sql_fail + (c / c) . to_vector |> null_or_sql_fail + (c ^ c) . to_vector |> null_or_sql_fail + (c % c) . to_vector |> null_or_sql_fail + (c.starts_with c) . to_vector . should_equal nulls + (c.ends_with c) . to_vector . should_equal nulls + (c.contains c) . to_vector . should_equal nulls t.evaluate_expression "not(Nothing)" . to_vector . should_equal nulls t.evaluate_expression "Nothing + Nothing" . to_vector . should_equal nulls t.evaluate_expression "[X] + Nothing" . to_vector . should_equal nulls t.evaluate_expression "Nothing + [X]" . to_vector . should_equal nulls - ## Currently some databases (e.g. Postgres) fail on this with "Operator is not unique" so we accept it may fail - r1 = t.evaluate_expression "Nothing * Nothing" . to_vector - if r1.is_error && setup.is_database then r1.should_fail_with SQL_Error else - r1.should_equal nulls + t.evaluate_expression "Nothing * Nothing" . to_vector |> null_or_sql_fail suite_builder.group prefix+"Expression Date and Time literals" group_builder-> specify_test "should be able to add a date or time column" group_builder pending=pending_datetime expression_test-> @@ -348,6 +357,9 @@ add_expression_specs suite_builder detailed setup = expression_test "True && TRUE" True expression_test "True && Nothing" Nothing expression_test "True AND False" False + expression_test "Nothing && False" False + expression_test "Nothing && True" Nothing + expression_test "True && [Bad]] Name]" [True, False, True, False, True] expression_test "False AND [Bad]] Name]" False @@ -355,6 +367,9 @@ add_expression_specs suite_builder detailed setup = expression_test "True || TRUE" True expression_test "True OR False" True expression_test "False OR False" False + expression_test "Nothing || False" Nothing + expression_test "Nothing || True" True + expression_test "True OR [Bad]] Name]" True expression_test "False || [Bad]] Name]" [True, False, True, False, True] @@ -377,7 +392,9 @@ add_expression_specs suite_builder detailed setup = specify_test "should be able to call a variable args function" group_builder expression_test-> expression_test "min(10, 3, 8)" 3 + expression_test "min(Nothing, 2)" 2 expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6] + expression_test "max(Nothing, [A], [B], 3)" [3, 3, 3, 4, 6] specify_test "should be able to use functions with constants" group_builder expression_test-> expression_test "truncate(3.3)" 3 From dfe300b2fea5b806018f21e824ff84a9c13d62f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Tue, 3 Dec 2024 18:43:41 +0100 Subject: [PATCH 33/42] comment, a few more tests --- .../src/Common_Table_Operations/Expression_Spec.enso | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index 260dbca3af09..86afd6b98fc5 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -170,7 +170,8 @@ add_expression_specs suite_builder detailed setup = nulls = [Nothing, Nothing, Nothing] c.not . to_vector . should_equal nulls - ## Currently some databases (e.g. Postgres) fail on this with "Operator is not unique" so we accept it may fail + ## TODO fix behaviour in Postgres and other DBs + https://github.com/enso-org/enso/issues/11751 null_or_sql_fail x = if x.is_error && setup.is_database then x.should_fail_with SQL_Error else x.should_equal nulls @@ -191,6 +192,9 @@ add_expression_specs suite_builder detailed setup = t.evaluate_expression "Nothing + [X]" . to_vector . should_equal nulls t.evaluate_expression "Nothing * Nothing" . to_vector |> null_or_sql_fail + t.evaluate_expression "Nothing / Nothing" . to_vector |> null_or_sql_fail + t.evaluate_expression "Nothing ^ Nothing" . to_vector |> null_or_sql_fail + t.evaluate_expression "Nothing % Nothing" . to_vector |> null_or_sql_fail suite_builder.group prefix+"Expression Date and Time literals" group_builder-> specify_test "should be able to add a date or time column" group_builder pending=pending_datetime expression_test-> From 9039bbdffd3a85eccd4874dbfc63655338e1af41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 10:44:08 +0100 Subject: [PATCH 34/42] CR --- .../Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso index 56ce69fc0bb0..4b1abbbbbb5c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Value_Type_Helpers.enso @@ -210,7 +210,7 @@ type Addition_Kind ## PRIVATE Text_Concat -## Returns the operation kind based on types of the inputs. +## Returns the operation kind based on types of the inputs, or an error if the inferred kinds are incompatible. If both inputs are Null, it is impossible to tell the kind, so Nothing is returned and the caller may decide what to do. private resolve_operation_kind arg1:Any arg2:Any operation_name:Text find_operation_kind:(Value_Type -> Any) -> Any | Nothing = kinds = [arg1, arg2] From 351b0685ea253923ec3013355218ebe5cc7deba5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 11:40:47 +0100 Subject: [PATCH 35/42] cannot create NULL columns in DBs --- .../0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso | 2 +- .../0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso | 2 +- .../0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso index d3f25cb97151..339ee213c52b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Postgres/Postgres_Type_Mapping.enso @@ -65,7 +65,7 @@ type Postgres_Type_Mapping See: https://www.postgresql.org/docs/current/datatype-bit.html SQL_Type.Value Types.BINARY "bytea" precision=max_precision Value_Type.Null -> - SQL_Type.Value Types.NULL "null" + Error.throw (Unsupported_Database_Type.Error "Null" "Postgres") Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "Postgres") Value_Type.Unsupported_Data_Type type_name underlying_type -> diff --git a/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso b/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso index 8caa93925872..f0a6ea0b9bcc 100644 --- a/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso +++ b/distribution/lib/Standard/Microsoft/0.0.0-dev/src/Internal/SQLServer_Type_Mapping.enso @@ -62,7 +62,7 @@ type SQLServer_Type_Mapping True -> SQL_Type.Value Types.BINARY "VARBINARY" size False -> SQL_Type.Value Types.BINARY "BINARY" size Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "SQLServer") - Value_Type.Null -> SQL_Type.Value Types.NULL "NULL" + Value_Type.Null -> Error.throw (Unsupported_Database_Type.Error "Null" "SQLServer") Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| Illegal_Argument.Error <| "An unsupported SQL type ["+type_name.to_text+"] cannot be converted into an SQL type because it did not contain the SQL metadata needed to reconstruct it." diff --git a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso index 8d3ebdffb93e..295096f3035c 100644 --- a/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso +++ b/distribution/lib/Standard/Snowflake/0.0.0-dev/src/Internal/Snowflake_Type_Mapping.enso @@ -63,7 +63,7 @@ type Snowflake_Type_Mapping True -> SQL_Type.Value Types.BINARY "binary" False -> SQL_Type.Value Types.BINARY "binary" size Value_Type.Mixed -> Error.throw (Unsupported_Database_Type.Error "Mixed" "Snowflake") - Value_Type.Null -> SQL_Type.Value Types.NULL "NULL" + Value_Type.Null -> Error.throw (Unsupported_Database_Type.Error "Null" "Snowflake") Value_Type.Unsupported_Data_Type type_name underlying_type -> underlying_type.if_nothing <| Error.throw <| Illegal_Argument.Error <| "An unsupported SQL type ["+type_name.to_text+"] cannot be converted into an SQL type because it did not contain the SQL metadata needed to reconstruct it." From 71b509c002fe1c2b2773c29eb02a2730b7d5c742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 11:41:07 +0100 Subject: [PATCH 36/42] disable tests, TODO https://github.com/enso-org/enso/issues/11757 --- .../Expression_Spec.enso | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index 86afd6b98fc5..ea6410085ac5 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -25,7 +25,7 @@ add_specs suite_builder detailed setup = if setup.is_feature_supported Feature.Column_Operations then (add_expression_specs suite_builder detailed setup) add_expression_specs suite_builder detailed setup = - prefix = setup.prefix + prefix = setup.prefix+"(Expression_Spec) " table_builder = build_sorted_table setup column_a_description = ["A", [1, 2, 3, 4, 5]] column_odd_description = ["Bad] Name", [True, False, True, False, True]] @@ -141,7 +141,7 @@ add_expression_specs suite_builder detailed setup = c.name.should_equal "[X] == '\0'" c.to_vector.should_equal [True, False, False] - suite_builder.group prefix+"Expression Nothing literals" group_builder-> + suite_builder.group prefix+"Expression Nothing literals" pending=(if setup.is_database && prefix.contains "SQLite" . not then "TODO #11757 - Nothing in expressions in DB") group_builder-> specify_test "should be able to add an nothing column" group_builder expression_test-> expression_test "null" Nothing expression_test "nUlL" Nothing @@ -160,11 +160,7 @@ add_expression_specs suite_builder detailed setup = c.is_nothing.to_vector.should_equal [True, True, True] - ## TODO that may not necessarily be good, I think we may need to - introduce a Value_Type.Null and make it accepted by all - `Value_Type.expect_*` checks. - See: https://github.com/enso-org/enso/issues/6281 - group_builder.specify "a null column may fail typechecks that expect a concrete type" <| + group_builder.specify "a null column should work with operations on any type" <| t = table_builder [["X", [1, 2, 3]]] c = t.evaluate_expression "null" nulls = [Nothing, Nothing, Nothing] @@ -182,9 +178,9 @@ add_expression_specs suite_builder detailed setup = (c / c) . to_vector |> null_or_sql_fail (c ^ c) . to_vector |> null_or_sql_fail (c % c) . to_vector |> null_or_sql_fail - (c.starts_with c) . to_vector . should_equal nulls - (c.ends_with c) . to_vector . should_equal nulls - (c.contains c) . to_vector . should_equal nulls + (c.starts_with c) . to_vector |> null_or_sql_fail + (c.ends_with c) . to_vector |> null_or_sql_fail + (c.contains c) . to_vector |> null_or_sql_fail t.evaluate_expression "not(Nothing)" . to_vector . should_equal nulls t.evaluate_expression "Nothing + Nothing" . to_vector . should_equal nulls @@ -396,9 +392,7 @@ add_expression_specs suite_builder detailed setup = specify_test "should be able to call a variable args function" group_builder expression_test-> expression_test "min(10, 3, 8)" 3 - expression_test "min(Nothing, 2)" 2 expression_test "max([A], [B], 3)" [3, 3, 3, 4, 6] - expression_test "max(Nothing, [A], [B], 3)" [3, 3, 3, 4, 6] specify_test "should be able to use functions with constants" group_builder expression_test-> expression_test "truncate(3.3)" 3 From 2fe99067ff470d55a26463973987143141b2f1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 11:48:36 +0100 Subject: [PATCH 37/42] better message --- .../0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso index 07eddec1d993..a1d179a528b8 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso @@ -2,6 +2,7 @@ private from Standard.Base import all import Standard.Base.Data.Vector.No_Wrap +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument import project.Column_Description.Column_Description import project.Connection.Connection.Connection @@ -10,6 +11,7 @@ import project.Internal.IR.Query.Query import project.Internal.IR.SQL_Expression.SQL_Expression import project.SQL_Query.SQL_Query import project.SQL_Statement.SQL_Statement +from project.Errors import Unsupported_Database_Type ## PRIVATE make_batched_insert_template : Connection -> Text -> Vector (Vector Text) -> SQL_Query @@ -32,6 +34,9 @@ prepare_create_table_statement connection table_name columns primary_key tempora type_mapping = connection.dialect.get_type_mapping column_descriptors = columns.map on_problems=No_Wrap.Value def-> sql_type = type_mapping.value_type_to_sql def.value_type on_problems + . catch Unsupported_Database_Type error-> + Error.throw (Illegal_Argument.Error "Column definition for column ["+def.name+"] is invalid: "+error.to_display_text) + sql_type_text = type_mapping.sql_type_to_text sql_type Create_Column_Descriptor.Value def.name sql_type_text def.constraints connection.dialect.generate_sql <| From cbe47f7e11604908e64d3d19aae7b6b56c45430f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 11:57:11 +0100 Subject: [PATCH 38/42] more tests for https://github.com/enso-org/enso/issues/11757 --- .../Expression_Spec.enso | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso index ea6410085ac5..7210a1a35091 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Expression_Spec.enso @@ -94,6 +94,8 @@ add_expression_specs suite_builder detailed setup = _ -> group_builder.specify label Nothing pending + db_null_pending = if setup.is_database && prefix.contains "SQLite" . not then "TODO #11757 - Nothing in expressions in DB" + suite_builder.group prefix+"Expression Integer literals" group_builder-> specify_test "should be able to add an integer column" group_builder expression_test-> expression_test "1" 1 @@ -141,7 +143,7 @@ add_expression_specs suite_builder detailed setup = c.name.should_equal "[X] == '\0'" c.to_vector.should_equal [True, False, False] - suite_builder.group prefix+"Expression Nothing literals" pending=(if setup.is_database && prefix.contains "SQLite" . not then "TODO #11757 - Nothing in expressions in DB") group_builder-> + suite_builder.group prefix+"Expression Nothing literals" pending=db_null_pending group_builder-> specify_test "should be able to add an nothing column" group_builder expression_test-> expression_test "null" Nothing expression_test "nUlL" Nothing @@ -321,13 +323,14 @@ add_expression_specs suite_builder detailed setup = specify_test "should be able to check null" group_builder expression_test-> expression_test "1 IS NULL" False expression_test "1 IS NoTHing" False - expression_test "Nothing IS NULL" True expression_test "1 IS NOT NULL" True - expression_test "Nothing IS NOT NULL" False expression_test "[A] IS NULL" [False, False, False, False, False] expression_test "[C] IS NULL" [False, False, False, False, True] expression_test "[A] IS NOT NULL" [True, True, True, True, True] expression_test "[C] IS NOT NULL" [True, True, True, True, False] + if db_null_pending.is_nothing then + expression_test "Nothing IS NULL" True + expression_test "Nothing IS NOT NULL" False specify_test "should be able to check empty" group_builder expression_test-> expression_test "'Hello World' IS EMPTY" False @@ -355,10 +358,12 @@ add_expression_specs suite_builder detailed setup = suite_builder.group prefix+"Expression Boolean Operators" group_builder-> specify_test "should be able to AND booleans" group_builder expression_test-> expression_test "True && TRUE" True - expression_test "True && Nothing" Nothing expression_test "True AND False" False - expression_test "Nothing && False" False - expression_test "Nothing && True" Nothing + + if db_null_pending.is_nothing then + expression_test "True && Nothing" Nothing + expression_test "Nothing && False" False + expression_test "Nothing && True" Nothing expression_test "True && [Bad]] Name]" [True, False, True, False, True] expression_test "False AND [Bad]] Name]" False @@ -367,8 +372,10 @@ add_expression_specs suite_builder detailed setup = expression_test "True || TRUE" True expression_test "True OR False" True expression_test "False OR False" False - expression_test "Nothing || False" Nothing - expression_test "Nothing || True" True + + if db_null_pending.is_nothing then + expression_test "Nothing || False" Nothing + expression_test "Nothing || True" True expression_test "True OR [Bad]] Name]" True expression_test "False || [Bad]] Name]" [True, False, True, False, True] From 2d0659ca55b5ffd6622833d387794b60c8c64418 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 12:08:09 +0100 Subject: [PATCH 39/42] yet another one for https://github.com/enso-org/enso/issues/11757 --- .../src/Common_Table_Operations/Column_Operations_Spec.enso | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso index 8aef02b733cd..dcb94062a526 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Column_Operations_Spec.enso @@ -1903,7 +1903,8 @@ add_column_operation_specs suite_builder setup = c = empty.make_constant_column 42 setup.expect_integer_type c - group_builder.specify "Should create a column of the correct type on a table with no rows" <| + db_null_pending = if setup.is_database && prefix.contains "SQLite" . not then "TODO #11757 - Nothing in expressions in DB" + group_builder.specify "Should create a column of the correct type on a table with no rows" pending=db_null_pending <| t = table_builder [["x", ["1", "2", "3"]]] empty = t.take 0 c = empty.make_constant_column Nothing From 2e49995852116780934c158660bf18ec94101fcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 12:08:19 +0100 Subject: [PATCH 40/42] update error to one with more info --- .../0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso | 2 +- test/Table_Tests/src/Database/Upload_Spec.enso | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso index a1d179a528b8..4e5823ae4321 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Upload/Helpers/SQL_Helpers.enso @@ -35,7 +35,7 @@ prepare_create_table_statement connection table_name columns primary_key tempora column_descriptors = columns.map on_problems=No_Wrap.Value def-> sql_type = type_mapping.value_type_to_sql def.value_type on_problems . catch Unsupported_Database_Type error-> - Error.throw (Illegal_Argument.Error "Column definition for column ["+def.name+"] is invalid: "+error.to_display_text) + Error.throw (Illegal_Argument.Error "Definition for column ["+def.name+"] is invalid: "+error.to_display_text) sql_type_text = type_mapping.sql_type_to_text sql_type Create_Column_Descriptor.Value def.name sql_type_text def.constraints diff --git a/test/Table_Tests/src/Database/Upload_Spec.enso b/test/Table_Tests/src/Database/Upload_Spec.enso index dce15a1d1fbb..e8e50cea3d68 100644 --- a/test/Table_Tests/src/Database/Upload_Spec.enso +++ b/test/Table_Tests/src/Database/Upload_Spec.enso @@ -134,7 +134,9 @@ add_specs suite_builder setup make_new_connection persistent_connector=True = group_builder.specify "should fail if an unsupported type is specified" <| run_with_and_without_output <| r1 = data.connection.create_table (Name_Generator.random_name "creating-table") structure=[Column_Description.Value "X" Value_Type.Integer, Column_Description.Value "Y" Value_Type.Mixed] temporary=True - r1.should_fail_with Unsupported_Database_Type + r1.should_fail_with Illegal_Argument + r1.to_display_text . should_contain "Definition for column [Y] is invalid" + r1.to_display_text . should_contain "do not support Mixed types" group_builder.specify "should fail if empty structure is provided" <| run_with_and_without_output <| From 2b94b24cb3a76ecd272a0e2e22278e9aaa87a27e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Wed, 4 Dec 2024 12:16:56 +0100 Subject: [PATCH 41/42] update test --- .../src/Database/Types/SQLite_Type_Mapping_Spec.enso | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso index 1353e7ec50cc..6bb59b3955fe 100644 --- a/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso +++ b/test/Table_Tests/src/Database/Types/SQLite_Type_Mapping_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument from Standard.Table import Aggregate_Column, Value_Type, Table, Bits from Standard.Table.Errors import Invalid_Value_Type, Inexact_Type_Coercion @@ -119,7 +120,9 @@ add_specs suite_builder = group_builder.specify "does not support creating tables with date/time values" <| t = Table.new [["a", [Date.today]], ["b", [Time_Of_Day.now]], ["c", [Date_Time.now]]] r1 = t.select_into_database_table data.connection table_name=(Name_Generator.random_name "date-time-table") temporary=True - r1.should_fail_with Unsupported_Database_Type + r1.should_fail_with Illegal_Argument + r1.to_display_text . should_contain "Definition for column [a] is invalid" + r1.to_display_text . should_contain "do not support Date/time types" group_builder.specify "should be able to infer types for all supported operations" <| dialect = Dialect.sqlite @@ -127,4 +130,3 @@ add_specs suite_builder = operation_type_mapping = SQLite_Type_Mapping.operations_dict operation_type_mapping.keys.sort . should_equal internal_mapping.keys.sort - From fad11797272768bf1cbc27c3b78a2fbe5305daeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rados=C5=82aw=20Wa=C5=9Bko?= Date: Thu, 19 Dec 2024 17:41:10 +0100 Subject: [PATCH 42/42] make custom aggregates work with Null input --- .../java/org/enso/table/aggregations/Mean.java | 18 ++++++++++++++++++ .../java/org/enso/table/aggregations/Sum.java | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java index e5cea57fcca5..0ba19e73f052 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Mean.java @@ -12,6 +12,7 @@ import org.enso.table.data.column.storage.type.BigIntegerType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.table.Column; import org.enso.table.data.table.problems.InvalidAggregation; @@ -41,6 +42,7 @@ private static StorageType resultTypeFromInput(Storage inputStorage) { case IntegerType integerType -> FloatType.FLOAT_64; case BigIntegerType bigIntegerType -> BigDecimalType.INSTANCE; case BigDecimalType bigDecimalType -> BigDecimalType.INSTANCE; + case NullType nullType -> nullType; default -> throw new IllegalStateException( "Unexpected input type for Mean aggregate: " + inputType); }; @@ -59,6 +61,7 @@ private MeanAccumulator makeAccumulator() { return switch (getType()) { case FloatType floatType -> new FloatMeanAccumulator(); case BigDecimalType bigDecimalType -> new BigDecimalMeanAccumulator(); + case NullType nullType -> new NullAccumulator(); default -> throw new IllegalStateException( "Unexpected output type in Mean aggregate: " + getType()); }; @@ -155,4 +158,19 @@ Object summarize() { return count == 0 ? null : total.divide(BigDecimal.valueOf(count), MathContext.DECIMAL128); } } + + /** A special case for a null input column. */ + private static final class NullAccumulator extends MeanAccumulator { + + @Override + void accumulate( + List indexes, Storage storage, ProblemAggregator problemAggregator) { + assert storage.getType() instanceof NullType; + } + + @Override + Object summarize() { + return null; + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java index 116d75ec1cc8..6fd3f5adf514 100644 --- a/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java +++ b/std-bits/table/src/main/java/org/enso/table/aggregations/Sum.java @@ -7,6 +7,7 @@ import org.enso.table.data.column.builder.Builder; import org.enso.table.data.column.builder.DoubleBuilder; import org.enso.table.data.column.builder.InferredIntegerBuilder; +import org.enso.table.data.column.builder.NullBuilder; import org.enso.table.data.column.operation.map.MapOperationProblemAggregator; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.numeric.AbstractLongStorage; @@ -15,6 +16,7 @@ import org.enso.table.data.column.storage.type.BigIntegerType; import org.enso.table.data.column.storage.type.FloatType; import org.enso.table.data.column.storage.type.IntegerType; +import org.enso.table.data.column.storage.type.NullType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.table.Column; import org.enso.table.problems.ProblemAggregator; @@ -37,6 +39,7 @@ public Builder makeBuilder(int size, ProblemAggregator problemAggregator) { case IntegerType integerType -> new InferredIntegerBuilder(size, problemAggregator); case BigIntegerType bigIntegerType -> new BigIntegerBuilder(size, problemAggregator); case FloatType floatType -> DoubleBuilder.createDoubleBuilder(size, problemAggregator); + case NullType nullType -> new NullBuilder(); default -> throw new IllegalStateException( "Unexpected input type for Sum aggregate: " + inputType); }; @@ -56,6 +59,7 @@ private SumAccumulator makeAccumulator() { case IntegerType integerType -> new IntegerSumAccumulator(); case BigIntegerType bigIntegerType -> new IntegerSumAccumulator(); case FloatType floatType -> new FloatSumAccumulator(); + case NullType nullType -> new NullAccumulator(); default -> throw new IllegalStateException( "Unexpected input type for Sum aggregate: " + inputType); }; @@ -199,4 +203,16 @@ Double summarize() { return accumulator; } } + + private static final class NullAccumulator extends SumAccumulator { + @Override + void accumulate(List indexes, Storage storage) { + assert storage.getType() instanceof NullType; + } + + @Override + Object summarize() { + return null; + } + } }