diff --git a/c/pom.xml b/c/pom.xml
index 290cb561c1..830b519752 100644
--- a/c/pom.xml
+++ b/c/pom.xml
@@ -22,7 +22,7 @@ under the License.
org.apache.arrow
arrow-java-root
- 18.3.0
+ 18.3.0-SNAPSHOT
arrow-c-data
diff --git a/ci/scripts/jni_build.sh b/ci/scripts/jni_build.sh
index aec6fc325c..7a2b95f823 100755
--- a/ci/scripts/jni_build.sh
+++ b/ci/scripts/jni_build.sh
@@ -63,7 +63,6 @@ cmake \
-DCMAKE_PREFIX_PATH="${arrow_install_dir}" \
-DCMAKE_INSTALL_PREFIX="${prefix_dir}" \
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD:-OFF}" \
- -DProtobuf_USE_STATIC_LIBS=ON \
-GNinja \
"${EXTRA_CMAKE_OPTIONS[@]}"
cmake --build "${build_dir}"
diff --git a/ci/scripts/jni_macos_build.sh b/ci/scripts/jni_macos_build.sh
index f7543b6f7a..5c90725984 100755
--- a/ci/scripts/jni_macos_build.sh
+++ b/ci/scripts/jni_macos_build.sh
@@ -67,7 +67,7 @@ export ARROW_BUILD_TESTS
export ARROW_DATASET
: "${ARROW_GANDIVA:=ON}"
export ARROW_GANDIVA
-: "${ARROW_ORC:=ON}"
+: "${ARROW_ORC:=OFF}"
export ARROW_ORC
: "${ARROW_PARQUET:=ON}"
: "${ARROW_S3:=ON}"
@@ -125,7 +125,14 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
github_actions_group_end
fi
-export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
+# Don't set Protobuf_ROOT if it doesn't exist (when using bundled dependencies)
+# Instead, let CMake find the system protobuf
+if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then
+ export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
+else
+ # Use system protobuf - set library path explicitly
+ export JAVA_JNI_CMAKE_ARGS="-DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.dylib -DProtobuf_PROTOC_EXECUTABLE=/usr/local/bin/protoc"
+fi
"${source_dir}/ci/scripts/jni_build.sh" \
"${source_dir}" \
"${install_dir}" \
diff --git a/dataset/pom.xml b/dataset/pom.xml
index efbe310ea2..fba0e0ff7f 100644
--- a/dataset/pom.xml
+++ b/dataset/pom.xml
@@ -22,7 +22,7 @@ under the License.
org.apache.arrow
arrow-java-root
- 18.3.0
+ 18.3.0-SNAPSHOT
arrow-dataset
diff --git a/dataset/src/main/cpp/jni_wrapper.cc b/dataset/src/main/cpp/jni_wrapper.cc
index 49cc85251c..2380ca9dcb 100644
--- a/dataset/src/main/cpp/jni_wrapper.cc
+++ b/dataset/src/main/cpp/jni_wrapper.cc
@@ -25,9 +25,14 @@
#include "arrow/c/helpers.h"
#include "arrow/dataset/api.h"
#include "arrow/dataset/file_base.h"
+#include "arrow/dataset/file_parquet.h"
+#include "arrow/dataset/file_ipc.h"
#ifdef ARROW_CSV
#include "arrow/dataset/file_csv.h"
#endif
+#ifdef ARROW_JSON
+#include "arrow/dataset/file_json.h"
+#endif
#include "arrow/filesystem/api.h"
#include "arrow/filesystem/path_util.h"
#include "arrow/engine/substrait/util.h"
diff --git a/gandiva/pom.xml b/gandiva/pom.xml
index 95c62b58bc..98a4622628 100644
--- a/gandiva/pom.xml
+++ b/gandiva/pom.xml
@@ -22,7 +22,7 @@ under the License.
org.apache.arrow
arrow-java-root
- 18.3.0
+ 18.3.0-SNAPSHOT
org.apache.arrow.gandiva
diff --git a/gandiva/proto/gandiva/types.proto b/gandiva/proto/gandiva/types.proto
index 4ce342681d..27299f273e 100644
--- a/gandiva/proto/gandiva/types.proto
+++ b/gandiva/proto/gandiva/types.proto
@@ -85,6 +85,7 @@ message ExtGandivaType {
optional TimeUnit timeUnit = 6; // used by TIME32/TIME64
optional string timeZone = 7; // used by TIMESTAMP
optional IntervalType intervalType = 8; // used by INTERVAL
+ optional string extensionName = 9; // used by extension types (e.g., "uuid")
}
message Field {
diff --git a/gandiva/src/main/cpp/expression_registry_helper.cc b/gandiva/src/main/cpp/expression_registry_helper.cc
index 21077ff1db..647dfef3ce 100644
--- a/gandiva/src/main/cpp/expression_registry_helper.cc
+++ b/gandiva/src/main/cpp/expression_registry_helper.cc
@@ -17,6 +17,7 @@
#include
+#include
#include
#include
#include
@@ -42,6 +43,13 @@ gandiva::types::TimeUnit MapTimeUnit(arrow::TimeUnit::type& unit) {
}
void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_data_type) {
+ // Handle extension types by preserving extension name and using storage type
+ if (type->id() == arrow::Type::EXTENSION) {
+ auto ext_type = std::dynamic_pointer_cast(type);
+ gandiva_data_type->set_extensionname(ext_type->extension_name());
+ type = ext_type->storage_type();
+ }
+
switch (type->id()) {
case arrow::Type::BOOL:
gandiva_data_type->set_type(gandiva::types::GandivaType::BOOL);
@@ -85,6 +93,13 @@ void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_d
case arrow::Type::BINARY:
gandiva_data_type->set_type(gandiva::types::GandivaType::BINARY);
break;
+ case arrow::Type::FIXED_SIZE_BINARY: {
+ gandiva_data_type->set_type(gandiva::types::GandivaType::FIXED_SIZE_BINARY);
+ std::shared_ptr fixed_size_binary_type =
+ std::dynamic_pointer_cast(type);
+ gandiva_data_type->set_width(fixed_size_binary_type->byte_width());
+ break;
+ }
case arrow::Type::DATE32:
gandiva_data_type->set_type(gandiva::types::GandivaType::DATE32);
break;
diff --git a/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java b/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
index 49625edf27..b1b7bff9f0 100644
--- a/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
+++ b/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ExpressionRegistry.java
@@ -126,6 +126,18 @@ private static Set getSupportedFunctionsFromGandiva() throws
}
private static ArrowType getArrowType(ExtGandivaType type) {
+ // Check if this is an extension type
+ if (type.hasExtensionName() && !type.getExtensionName().isEmpty()) {
+ String extensionName = type.getExtensionName();
+
+ // Handle known extension types
+ if ("arrow.uuid".equals(extensionName)) {
+ // this should be the new Arrow UUID type from: https://github.com/apache/arrow-java/pull/903
+ return new UuidType();
+ }
+ throw new UnsupportedOperationException("Cannot get ArrowType for unknown extension type: " + extensionName);
+ }
+
switch (type.getType().getNumber()) {
case GandivaType.BOOL_VALUE:
return ArrowType.Bool.INSTANCE;
@@ -155,6 +167,8 @@ private static ArrowType getArrowType(ExtGandivaType type) {
return new ArrowType.Utf8();
case GandivaType.BINARY_VALUE:
return new ArrowType.Binary();
+ case GandivaType.FIXED_SIZE_BINARY_VALUE:
+ return new ArrowType.FixedSizeBinary(type.getWidth());
case GandivaType.DATE32_VALUE:
return new ArrowType.Date(DateUnit.DAY);
case GandivaType.DATE64_VALUE:
@@ -171,7 +185,6 @@ private static ArrowType getArrowType(ExtGandivaType type) {
return new ArrowType.Decimal(0, 0, 128);
case GandivaType.INTERVAL_VALUE:
return new ArrowType.Interval(mapArrowIntervalUnit(type.getIntervalType()));
- case GandivaType.FIXED_SIZE_BINARY_VALUE:
case GandivaType.MAP_VALUE:
case GandivaType.DICTIONARY_VALUE:
case GandivaType.LIST_VALUE:
diff --git a/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/UuidType.java b/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/UuidType.java
new file mode 100644
index 0000000000..914bb57703
--- /dev/null
+++ b/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/UuidType.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.arrow.gandiva.evaluator;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.FixedSizeBinaryVector;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+
+/** UUID extension type for Gandiva. THIS SHOULD NOT BE INCLUDED!!!*/
+public class UuidType extends ExtensionType {
+
+ @Override
+ public ArrowType storageType() {
+ return new ArrowType.FixedSizeBinary(16);
+ }
+
+ @Override
+ public String extensionName() {
+ return "arrow.uuid";
+ }
+
+ @Override
+ public boolean extensionEquals(ExtensionType other) {
+ return other instanceof UuidType;
+ }
+
+ @Override
+ public ArrowType deserialize(ArrowType storageType, String serializedData) {
+ if (!storageType.equals(storageType())) {
+ throw new UnsupportedOperationException(
+ "Cannot construct UuidType from underlying type " + storageType);
+ }
+ return new UuidType();
+ }
+
+ @Override
+ public String serialize() {
+ return "";
+ }
+
+ @Override
+ public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
+ return new FixedSizeBinaryVector(name, allocator, 16);
+ }
+}
diff --git a/pom.xml b/pom.xml
index 1dcda5c40a..a16134e899 100644
--- a/pom.xml
+++ b/pom.xml
@@ -28,7 +28,7 @@ under the License.
org.apache.arrow
arrow-java-root
- 18.3.0
+ 18.3.0-SNAPSHOT
pom
Apache Arrow Java Root POM