diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc new file mode 100644 index 0000000000000..ac4670ae5bb22 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc @@ -0,0 +1,92 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/array/proxy/timestamp_array.h" + +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/bit/pack.h" +#include "arrow/matlab/bit/unpack.h" + +#include "arrow/matlab/type/time_unit.h" +#include "arrow/util/utf8.h" +#include "arrow/type.h" +#include "arrow/builder.h" + + +namespace arrow::matlab::array::proxy { + + namespace { + const uint8_t* getUnpackedValidityBitmap(const ::matlab::data::TypedArray& valid_elements) { + const auto valid_elements_iterator(valid_elements.cbegin()); + return reinterpret_cast(valid_elements_iterator.operator->()); + } + } // anonymous namespace + + libmexclass::proxy::MakeResult TimestampArray::make(const libmexclass::proxy::FunctionArguments& constructor_arguments) { + namespace mda = ::matlab::data; + + mda::StructArray opts = constructor_arguments[0]; + + // Get the mxArray from constructor arguments + const mda::TypedArray timestamp_mda = opts[0]["MatlabArray"]; + const mda::TypedArray validity_bitmap_mda = opts[0]["Valid"]; + + const mda::TypedArray timezone_mda = opts[0]["TimeZone"]; + const mda::TypedArray units_mda = opts[0]["TimeUnit"]; + + // extract the time zone string + MATLAB_ASSIGN_OR_ERROR(const auto timezone, arrow::util::UTF16StringToUTF8(timezone_mda[0]), + error::UNICODE_CONVERSION_ERROR_ID); + + // extract the time unit + MATLAB_ASSIGN_OR_ERROR(const auto time_unit, arrow::matlab::type::timeUnitFromString(units_mda[0]), + error::UKNOWN_TIME_UNIT_ERROR_ID) + + // create the timestamp_type + auto data_type = arrow::timestamp(time_unit, timezone); + arrow::TimestampBuilder builder(data_type, arrow::default_memory_pool()); + + // Get raw pointer of mxArray + auto it(timestamp_mda.cbegin()); + auto dt = it.operator->(); + + // Pack the validity bitmap values. + const uint8_t* valid_mask = getUnpackedValidityBitmap(validity_bitmap_mda); + const auto num_elements = timestamp_mda.getNumberOfElements(); + + // Append values + MATLAB_ERROR_IF_NOT_OK(builder.AppendValues(dt, num_elements, valid_mask), error::APPEND_VALUES_ERROR_ID); + MATLAB_ASSIGN_OR_ERROR(auto timestamp_array, builder.Finish(), error::BUILD_ARRAY_ERROR_ID); + + return std::make_shared(timestamp_array); + } + + void TimestampArray::toMATLAB(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + const auto num_elements = static_cast(array->length()); + const auto timestamp_array = std::static_pointer_cast(array); + const int64_t* const data_begin = timestamp_array->raw_values(); + const int64_t* const data_end = data_begin + num_elements; + + mda::ArrayFactory factory; + + // Constructs a TypedArray from the raw values. Makes a copy. + mda::TypedArray result = factory.createArray({num_elements, 1}, data_begin, data_end); + context.outputs[0] = result; + } +} diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h new file mode 100644 index 0000000000000..ec67245564beb --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/array/proxy/timestamp_array.h @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/array.h" + +#include "arrow/matlab/array/proxy/array.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::array::proxy { + +class TimestampArray : public arrow::matlab::array::proxy::Array { + public: + TimestampArray(const std::shared_ptr timestamp_array) + : arrow::matlab::array::proxy::Array() { + array = timestamp_array; + } + + static libmexclass::proxy::MakeResult make(const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + + void toMATLAB(libmexclass::proxy::method::Context& context) override; +}; + +} diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index 4102054b88134..598db363f3a53 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -168,4 +168,6 @@ namespace arrow::matlab::error { static const char* SCHEMA_BUILDER_FINISH_ERROR_ID = "arrow:matlab:tabular:proxy:SchemaBuilderAddFields"; static const char* SCHEMA_BUILDER_ADD_FIELDS_ERROR_ID = "arrow:matlab:tabular:proxy:SchemaBuilderFinish"; static const char* UNICODE_CONVERSION_ERROR_ID = "arrow:matlab:unicode:UnicodeConversion"; + static const char* UKNOWN_TIME_UNIT_ERROR_ID = "arrow:matlab:UnknownTimeUnit"; + } diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index afa9038c849e1..94ee1ca89245f 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -18,6 +18,7 @@ #include "arrow/matlab/array/proxy/boolean_array.h" #include "arrow/matlab/array/proxy/numeric_array.h" #include "arrow/matlab/tabular/proxy/record_batch.h" +#include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/error/error.h" #include "factory.h" @@ -25,24 +26,19 @@ namespace arrow::matlab::proxy { libmexclass::proxy::MakeResult Factory::make_proxy(const ClassName& class_name, const FunctionArguments& constructor_arguments) { - // Register MATLAB Proxy classes with corresponding C++ Proxy classes. - REGISTER_PROXY(arrow.array.proxy.Float32Array, arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Float64Array, arrow::matlab::array::proxy::NumericArray); - // Register MATLAB Proxy classes for unsigned integer arrays - REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); - // Register MATLAB Proxy classes for signed integer arrays - REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); - // Register MATLAB Proxy class for boolean arrays - REGISTER_PROXY(arrow.array.proxy.BooleanArray, arrow::matlab::array::proxy::BooleanArray); - - REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); - + REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); + REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::TimestampArray); + REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, "Did not find matching C++ proxy for " + class_name}; }; diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.cc b/matlab/src/cpp/arrow/matlab/type/time_unit.cc new file mode 100644 index 0000000000000..15ebfcfc0c06b --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.cc @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/matlab/type/time_unit.h" +#include "arrow/util/utf8.h" + +namespace arrow::matlab::type { + + arrow::Result timeUnitFromString(const std::u16string& unit_str) { + if (unit_str == u"Second") { + return arrow::TimeUnit::type::SECOND; + } else if (unit_str == u"Millisecond") { + return arrow::TimeUnit::type::MILLI; + } else if (unit_str == u"Microsecond") { + return arrow::TimeUnit::type::MICRO; + } else if (unit_str == u"Nanosecond") { + return arrow::TimeUnit::type::NANO; + } else { + auto maybe_utf8 = arrow::util::UTF16StringToUTF8(unit_str); + auto msg = maybe_utf8.ok() ? "Unknown time unit string: " + *maybe_utf8 : "Unknown time unit string"; + return arrow::Status::Invalid(msg); + } + } +} diff --git a/matlab/src/cpp/arrow/matlab/type/time_unit.h b/matlab/src/cpp/arrow/matlab/type/time_unit.h new file mode 100644 index 0000000000000..cf3248d77b967 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/type/time_unit.h @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/type_fwd.h" +#include "arrow/result.h" + +#include + +namespace arrow::matlab::type { + + arrow::Result timeUnitFromString(const std::u16string& unit_str); + +} diff --git a/matlab/src/matlab/+arrow/+array/TimestampArray.m b/matlab/src/matlab/+arrow/+array/TimestampArray.m new file mode 100644 index 0000000000000..0aa76beb99c7a --- /dev/null +++ b/matlab/src/matlab/+arrow/+array/TimestampArray.m @@ -0,0 +1,78 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TimestampArray < arrow.array.Array +% arrow.array.TimestampArray + + properties(Access=private) + NullSubstitutionValue = NaT; + end + + properties(SetAccess=private, GetAccess=public) + Type = arrow.type.TimestampType % temporarily default value + end + + methods + function obj = TimestampArray(data, opts) + arguments + data + opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond + opts.InferNulls(1, 1) logical = true + opts.Valid + end + arrow.args.validateTypeAndShape(data, "datetime"); + validElements = arrow.args.parseValidElements(data, opts); + ptime = arrow.array.TimestampArray.convertToEpochTime(data, opts.TimeUnit); + timezone = string(data.TimeZone); + + args = struct(MatlabArray=ptime, Valid=validElements, TimeZone=timezone, TimeUnit=string(opts.TimeUnit)); + obj@arrow.array.Array("Name", "arrow.array.proxy.TimestampArray", "ConstructorArguments", {args}); + obj.Type = arrow.type.TimestampType(TimeUnit=opts.TimeUnit, TimeZone=timezone); + end + + function dates = toMATLAB(obj) + time = obj.Proxy.toMATLAB(); + + epoch = datetime(1970, 1, 1, TimeZone="UTC"); + + tz = obj.Type.TimeZone; + ticsPerSecond = obj.Type.TimeUnit.TicksPerSecond; + + dates = datetime(time, ConvertFrom="epochtime", Epoch=epoch, ... + TimeZone=tz, TicksPerSecond=ticsPerSecond); + + dates(~obj.Valid) = obj.NullSubstitutionValue; + end + + function dates = datetime(obj) + dates = toMATLAB(obj); + end + end + + methods (Static, Access = private) + function time = convertToEpochTime(dates, units) + + time = zeros(size(dates), "int64"); + indices = ~isnat(dates); + + % convertTo uses Jan-1-1970 as the default epoch. If the input + % datetime array has a TimeZone, the epoch is Jan-1-1970 UTC. + % + % TODO: convertTo may error if the datetime is 2^63-1 before or + % after the epoch. We should throw a custom error in this case. + time(indices) = convertTo(dates(indices), "epochtime", TicksPerSecond=units.TicksPerSecond); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/ID.m b/matlab/src/matlab/+arrow/+type/ID.m index 057ca399c0c1a..0450fe8aea453 100644 --- a/matlab/src/matlab/+arrow/+type/ID.m +++ b/matlab/src/matlab/+arrow/+type/ID.m @@ -28,6 +28,12 @@ % Float16 (10) not yet supported Float32 (11) Float64 (12) + % String (13) + % Binary (14) + % FixedSizeBinary (15) + % Date32 (16) + % Date64 (17) + Timestamp (18) end methods @@ -42,7 +48,7 @@ bitWidth = 16; case {ID.UInt32, ID.Int32, ID.Float32} bitWidth = 32; - case {ID.UInt64, ID.Int64, ID.Float64} + case {ID.UInt64, ID.Int64, ID.Float64, ID.Timestamp} bitWidth = 64; otherwise bitWidth = NaN; diff --git a/matlab/src/matlab/+arrow/+type/TimeUnit.m b/matlab/src/matlab/+arrow/+type/TimeUnit.m new file mode 100644 index 0000000000000..3ec8bf44d104f --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/TimeUnit.m @@ -0,0 +1,45 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef TimeUnit +% Enumeration class representing Time Units. + + enumeration + Second + Millisecond + Microsecond + Nanosecond + end + + properties (Dependent) + TicksPerSecond + end + + + methods + function ticksPerSecond = get.TicksPerSecond(obj) + import arrow.type.TimeUnit + switch obj + case TimeUnit.Second + ticksPerSecond = 1; + case TimeUnit.Millisecond + ticksPerSecond = 1e3; + case TimeUnit.Microsecond + ticksPerSecond = 1e6; + case TimeUnit.Nanosecond + ticksPerSecond = 1e9; + end + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+type/TimestampType.m b/matlab/src/matlab/+arrow/+type/TimestampType.m new file mode 100644 index 0000000000000..99ac4a7b769f7 --- /dev/null +++ b/matlab/src/matlab/+arrow/+type/TimestampType.m @@ -0,0 +1,41 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef TimestampType < arrow.type.PrimitiveType +%TIMESTAMPTYPE Type class for timestamp data. + + + properties(SetAccess=private) + TimeZone(1, 1) string + TimeUnit(1, 1) arrow.type.TimeUnit + end + + properties(SetAccess = protected) + ID = arrow.type.ID.Timestamp + end + + methods + function obj = TimestampType(opts) + %TIMESTAMPTYPE Construct an instance of this class + arguments + opts.TimeUnit(1, 1) arrow.type.TimeUnit = arrow.type.TimeUnit.Microsecond + opts.TimeZone(1, 1) string {mustBeNonmissing} = "" + end + obj.TimeUnit = opts.TimeUnit; + obj.TimeZone = opts.TimeZone; + end + end +end + diff --git a/matlab/test/arrow/array/tTimestampArray.m b/matlab/test/arrow/array/tTimestampArray.m new file mode 100644 index 0000000000000..b0a902f319bf1 --- /dev/null +++ b/matlab/test/arrow/array/tTimestampArray.m @@ -0,0 +1,196 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tTimestampArray < matlab.unittest.TestCase +% Tests for arrow.array.TimestampArray + + properties(TestParameter) + TimeZone = {"" "America/New_York"} + TimeUnit = {arrow.type.TimeUnit.Second arrow.type.TimeUnit.Millisecond + arrow.type.TimeUnit.Microsecond arrow.type.TimeUnit.Nanosecond} + end + + methods(Test) + function Basic(tc, TimeZone) + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + arrowArray = arrow.array.TimestampArray(dates); + className = string(class(arrowArray)); + tc.verifyEqual(className, "arrow.array.TimestampArray"); + end + + function TestLength(testCase, TimeZone) + % Verify the Length property. + import arrow.array.TimestampArray + + dates = datetime.empty(0, 1); + dates.TimeZone = TimeZone; + arrowArray = TimestampArray(dates); + testCase.verifyEqual(arrowArray.Length, int64(0)); + + dates = datetime(2023, 6, 22, TimeZone=TimeZone); + arrowArray = TimestampArray(dates); + testCase.verifyEqual(arrowArray.Length, int64(1)); + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + arrowArray = TimestampArray(dates); + testCase.verifyEqual(arrowArray.Length, int64(5)); + end + + function TestDefaultTimestampType(testCase, TimeZone) + % Verify the TimestampArray's units is Microsecond by default and + % its TimeZone value is taken from the input datetime. + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + arrowArray = TimestampArray(dates); + testCase.verifyTimestampType(arrowArray.Type, arrow.type.TimeUnit.Microsecond, TimeZone); + end + + function TestSupplyTimeUnit(testCase, TimeZone) + % Supply the TimeUnit name-value pair at construction. + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + + arrowArray = TimestampArray(dates, TimeUnit="Second"); + testCase.verifyTimestampType(arrowArray.Type, arrow.type.TimeUnit.Second, TimeZone); + + arrowArray = TimestampArray(dates, TimeUnit="Millisecond"); + testCase.verifyTimestampType(arrowArray.Type, arrow.type.TimeUnit.Millisecond, TimeZone); + + arrowArray = TimestampArray(dates, TimeUnit="Microsecond"); + testCase.verifyTimestampType(arrowArray.Type, arrow.type.TimeUnit.Microsecond, TimeZone); + + arrowArray = TimestampArray(dates, TimeUnit="Nanosecond"); + testCase.verifyTimestampType(arrowArray.Type, arrow.type.TimeUnit.Nanosecond, TimeZone); + end + + function TestToMATLAB(testCase, TimeUnit, TimeZone) + % Verify toMATLAB() round-trips the original datetime array. + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit); + values = toMATLAB(arrowArray); + testCase.verifyEqual(values, dates'); + end + + function TestDatetime(testCase, TimeUnit, TimeZone) + % Verify datetime() round-trips the original datetime array. + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit); + values = datetime(arrowArray); + testCase.verifyEqual(values, dates'); + end + + function TestValid(testCase, TimeZone) + % Verify the Valid property returns the expected logical vector. + import arrow.array.TimestampArray + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + dates([2 4]) = NaT; + arrowArray = arrow.array.TimestampArray(dates); + testCase.verifyEqual(arrowArray.Valid, [true; false; true; false; true]); + testCase.verifyEqual(toMATLAB(arrowArray), dates'); + testCase.verifyEqual(datetime(arrowArray), dates'); + end + + function TestInferNulls(testCase, TimeUnit, TimeZone) + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + dates([2 4]) = NaT; + + % Verify NaT is treated as a null value if InferNulls=true. + expectedDates = dates'; + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit, InferNulls=true); + testCase.verifyEqual(arrowArray.Valid, [true; false; true; false; true]); + testCase.verifyEqual(toMATLAB(arrowArray), expectedDates); + + % Verify NaT is not treated as a null value if InferNulls=false. + % The NaT values are mapped to int64(0). + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit, InferNulls=false); + testCase.verifyEqual(arrowArray.Valid, [true; true; true; true; true]); + + % If the TimestampArray is zoned, int64(0) may not correspond + % to Jan-1-1970. getFillValue takes into account the TimeZone. + fill = getFillValue(TimeZone); + expectedDates([2 4]) = fill; + testCase.verifyEqual(toMATLAB(arrowArray), expectedDates); + end + + function TestValidNVPair(testCase, TimeUnit, TimeZone) + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 22, TimeZone=TimeZone) + days(0:4); + dates([2 4]) = NaT; + + % Supply the Valid name-value pair as vector of indices. + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit, Valid=[1 2 5]); + testCase.verifyEqual(arrowArray.Valid, [true; true; false; false; true]); + expectedDates = dates'; + expectedDates(2) = getFillValue(TimeZone); + expectedDates([3 4]) = NaT; + testCase.verifyEqual(toMATLAB(arrowArray), expectedDates); + + % Supply the Valid name-value pair as a logical scalar. + arrowArray = arrow.array.TimestampArray(dates, TimeUnit=TimeUnit, Valid=false); + testCase.verifyEqual(arrowArray.Valid, [false; false; false; false; false]); + expectedDates(:) = NaT; + testCase.verifyEqual(toMATLAB(arrowArray), expectedDates); + end + + function ErrorIfNonVector(testCase) + import arrow.array.TimestampArray + + dates = datetime(2023, 6, 2) + days(0:11); + dates = reshape(dates, 2, 6); + fcn = @() TimestampArray(dates); + testCase.verifyError(fcn, "MATLAB:expectedVector"); + + dates = reshape(dates, 3, 2, 2); + fcn = @() TimestampArray(dates); + testCase.verifyError(fcn, "MATLAB:expectedVector"); + end + + function EmptyDatetimeVector(testCase) + import arrow.array.TimestampArray + + dates = datetime.empty(0, 0); + arrowArray = TimestampArray(dates); + testCase.verifyEqual(arrowArray.Length, int64(0)); + testCase.verifyEqual(arrowArray.Valid, logical.empty(0, 1)); + testCase.verifyEqual(toMATLAB(arrowArray), datetime.empty(0, 1)); + end + end + + methods + function verifyTimestampType(testCase, type, timeUnit, timeZone) + testCase.verifyTrue(isa(type, "arrow.type.TimestampType")); + testCase.verifyEqual(type.TimeUnit, timeUnit); + testCase.verifyEqual(type.TimeZone, timeZone); + end + end +end + +function fill = getFillValue(timezone) + fill = datetime(1970, 1, 1, TimeZone=timezone); + offset = tzoffset(fill); + if ~isnan(offset) + fill = fill + offset; + end +end \ No newline at end of file diff --git a/matlab/test/arrow/gateway/tGateway.m b/matlab/test/arrow/gateway/tGateway.m index c2b9ef9d68c99..862f83b6ac085 100644 --- a/matlab/test/arrow/gateway/tGateway.m +++ b/matlab/test/arrow/gateway/tGateway.m @@ -24,5 +24,25 @@ function UnknownProxyError(testCase) fcn = @()libmexclass.proxy.gateway("Create", "NotAProxyClass", {}); testCase.verifyError(fcn, id); end + + function TimestampUnknownTimeUnit(testCase) + % Verify the proxy constructor throws an error with the + % expected ID when given an unknown TimeUnit. Not hittable + % from arrow.array.TimestampArray. + proxyName = "arrow.array.proxy.TimestampArray"; + args = struct(MatlabArray=int64(0), Valid=true, TimeZone="", TimeUnit="bad"); + fcn = @() libmexclass.proxy.Proxy(Name=proxyName, ConstructorArguments={args}); + testCase.verifyError(fcn, "arrow:matlab:UnknownTimeUnit"); + end + + function TimeZoneUnicodeError(testCase) + % Verify the proxy constructor throws an error with the + % expected ID when given an invalid UTF-16 string as the + % TimeZone. Not hittable from arrow.array.TimestampArray. + proxyName = "arrow.array.proxy.TimestampArray"; + args = struct(MatlabArray=int64(0), Valid=true, TimeZone=string(char(0xD83D)), TimeUnit="Second"); + fcn = @() libmexclass.proxy.Proxy(Name=proxyName, ConstructorArguments={args}); + testCase.verifyError(fcn, "arrow:matlab:unicode:UnicodeConversion"); + end end end \ No newline at end of file diff --git a/matlab/test/arrow/type/tTimeUnit.m b/matlab/test/arrow/type/tTimeUnit.m new file mode 100644 index 0000000000000..b01de443443c5 --- /dev/null +++ b/matlab/test/arrow/type/tTimeUnit.m @@ -0,0 +1,41 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tTimeUnit < matlab.unittest.TestCase +% Test class for arrow.type.timeUnit + + methods (Test) + function Values(testCase) + % Verify there are four TimeUnit enum values. + import arrow.type.TimeUnit + values = enumeration(TimeUnit.Second); + expectedValues = [TimeUnit.Second, TimeUnit.Millisecond, ... + TimeUnit.Microsecond, TimeUnit.Nanosecond]'; + testCase.verifyEqual(values, expectedValues); + end + + function TicksPerSecond(testCase) + % Verify the TicksPerSecond property has the right value for each + % TimeUnit value. + import arrow.type.TimeUnit + units = [TimeUnit.Second, TimeUnit.Millisecond, ... + TimeUnit.Microsecond, TimeUnit.Nanosecond]'; + ticksPerSecond = [1 1e3 1e6 1e9]; + for ii = 1:numel(units) + testCase.verifyEqual(units(ii).TicksPerSecond, ticksPerSecond(ii)); + end + end + end +end + diff --git a/matlab/test/arrow/type/tTimestampType.m b/matlab/test/arrow/type/tTimestampType.m new file mode 100644 index 0000000000000..f8a9a37f32a63 --- /dev/null +++ b/matlab/test/arrow/type/tTimestampType.m @@ -0,0 +1,102 @@ +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef tTimestampType < hPrimitiveType +% Test class for arrow.type.TimestampType + + properties + ArrowType = arrow.type.TimestampType + TypeID = arrow.type.ID.Timestamp + BitWidth = 64; + end + + methods(Test) + function DefaultTimeUnit(testCase) + % Verify the default TimeUnit is Microsecond + type = arrow.type.TimestampType; + actualUnit = type.TimeUnit; + expectedUnit = arrow.type.TimeUnit.Microsecond; + testCase.verifyEqual(actualUnit, expectedUnit); + end + + function DefaultTimeZone(testCase) + % Verify the default TimeZone is "" + type = arrow.type.TimestampType; + actualTimezone = type.TimeZone; + expectedTimezone = ""; + testCase.verifyEqual(actualTimezone, expectedTimezone); + end + + function SupplyTimeUnitEnum(testCase) + % Supply TimeUnit as an enum value. + import arrow.type.* + expectedUnit = [TimeUnit.Second, TimeUnit.Millisecond ... + TimeUnit.Microsecond, TimeUnit.Nanosecond]; + + for unit = expectedUnit + type = TimestampType(TimeUnit=unit); + testCase.verifyEqual(type.TimeUnit, unit); + end + end + + function SupplyTimeUnitString(testCase) + % Supply TimeUnit as an string value. Verify TimeUnit is set to + % the appropriate TimeUnit enum value. + import arrow.type.* + unitString = ["second", "millisecond", "microsecond", "nanosecond"]; + expectedUnit = [TimeUnit.Second, TimeUnit.Millisecond ... + TimeUnit.Microsecond, TimeUnit.Nanosecond]; + + for ii = 1:numel(unitString) + type = TimestampType(TimeUnit=unitString(ii)); + testCase.verifyEqual(type.TimeUnit, expectedUnit(ii)); + end + end + + function SupplyTimeZone(testCase) + % Supply the TimeZone. + type = arrow.type.TimestampType(TimeZone="America/New_York"); + testCase.verifyEqual(type.TimeZone, "America/New_York"); + end + + function ErrorIfMissingStringTimeZone(testCase) + fcn = @() arrow.type.TimestampType(TimeZone=string(missing)); + testCase.verifyError(fcn, "MATLAB:validators:mustBeNonmissing"); + end + + function ErrorIfTimeZoneIsNonScalar(testCase) + fcn = @() arrow.type.TimestampType(TimeZone=["a", "b"]); + testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); + + fcn = @() arrow.type.TimestampType(TimeZone=strings(0, 0)); + testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); + end + + function ErrorIfAmbiguousTimeUnit(testCase) + fcn = @() arrow.type.TimestampType(TimeUnit="mi"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ErrorIfTimeUnitIsNonScalar(testCase) + units = [arrow.type.TimeUnit.Second; arrow.type.TimeUnit.Millisecond]; + fcn = @() arrow.type.TimestampType(TimeZone=units); + testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); + + units = ["second" "millisecond"]; + fcn = @() arrow.type.TimestampType(TimeZone=units); + testCase.verifyError(fcn, "MATLAB:validation:IncompatibleSize"); + end + end +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index ab4090f242703..f56321ea73132 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -36,12 +36,18 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_NAME arrowproxy) set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/error" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type") + + set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/boolean_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/array/proxy/timestamp_array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/pack.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/bit/unpack.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/type/time_unit.cc") + set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy/factory.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_INCLUDE_DIRS ${MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_LIBRARY_ROOT_INCLUDE_DIR}