Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ under the License.
<parent>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-java-root</artifactId>
<version>18.3.0</version>
<version>18.3.0-SNAPSHOT</version>
</parent>

<artifactId>arrow-c-data</artifactId>
Expand Down
1 change: 0 additions & 1 deletion ci/scripts/jni_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ cmake \
-DCMAKE_PREFIX_PATH="${arrow_install_dir}" \
-DCMAKE_INSTALL_PREFIX="${prefix_dir}" \
-DCMAKE_UNITY_BUILD="${CMAKE_UNITY_BUILD:-OFF}" \
-DProtobuf_USE_STATIC_LIBS=ON \
-GNinja \
"${EXTRA_CMAKE_OPTIONS[@]}"
cmake --build "${build_dir}"
Expand Down
11 changes: 9 additions & 2 deletions ci/scripts/jni_macos_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ export ARROW_BUILD_TESTS
export ARROW_DATASET
: "${ARROW_GANDIVA:=ON}"
export ARROW_GANDIVA
: "${ARROW_ORC:=ON}"
: "${ARROW_ORC:=OFF}"
export ARROW_ORC
: "${ARROW_PARQUET:=ON}"
: "${ARROW_S3:=ON}"
Expand Down Expand Up @@ -125,7 +125,14 @@ if [ "${ARROW_RUN_TESTS:-}" == "ON" ]; then
github_actions_group_end
fi

export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
# Don't set Protobuf_ROOT if it doesn't exist (when using bundled dependencies)
# Instead, let CMake find the system protobuf
if [ -d "${build_dir}/cpp/protobuf_ep-install" ]; then
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_ROOT=${build_dir}/cpp/protobuf_ep-install"
else
# Use system protobuf - set library path explicitly
export JAVA_JNI_CMAKE_ARGS="-DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.dylib -DProtobuf_PROTOC_EXECUTABLE=/usr/local/bin/protoc"
fi
"${source_dir}/ci/scripts/jni_build.sh" \
"${source_dir}" \
"${install_dir}" \
Expand Down
2 changes: 1 addition & 1 deletion dataset/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ under the License.
<parent>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-java-root</artifactId>
<version>18.3.0</version>
<version>18.3.0-SNAPSHOT</version>
</parent>

<artifactId>arrow-dataset</artifactId>
Expand Down
5 changes: 5 additions & 0 deletions dataset/src/main/cpp/jni_wrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,14 @@
#include "arrow/c/helpers.h"
#include "arrow/dataset/api.h"
#include "arrow/dataset/file_base.h"
#include "arrow/dataset/file_parquet.h"
#include "arrow/dataset/file_ipc.h"
#ifdef ARROW_CSV
#include "arrow/dataset/file_csv.h"
#endif
#ifdef ARROW_JSON
#include "arrow/dataset/file_json.h"
#endif
#include "arrow/filesystem/api.h"
#include "arrow/filesystem/path_util.h"
#include "arrow/engine/substrait/util.h"
Expand Down
2 changes: 1 addition & 1 deletion gandiva/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ under the License.
<parent>
<groupId>org.apache.arrow</groupId>
<artifactId>arrow-java-root</artifactId>
<version>18.3.0</version>
<version>18.3.0-SNAPSHOT</version>
</parent>

<groupId>org.apache.arrow.gandiva</groupId>
Expand Down
1 change: 1 addition & 0 deletions gandiva/proto/gandiva/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ message ExtGandivaType {
optional TimeUnit timeUnit = 6; // used by TIME32/TIME64
optional string timeZone = 7; // used by TIMESTAMP
optional IntervalType intervalType = 8; // used by INTERVAL
optional string extensionName = 9; // used by extension types (e.g., "uuid")
}

message Field {
Expand Down
15 changes: 15 additions & 0 deletions gandiva/src/main/cpp/expression_registry_helper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

#include <memory>

#include <arrow/extension_type.h>
#include <arrow/util/logging.h>
#include <gandiva/arrow.h>
#include <gandiva/expression_registry.h>
Expand All @@ -42,6 +43,13 @@ gandiva::types::TimeUnit MapTimeUnit(arrow::TimeUnit::type& unit) {
}

void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_data_type) {
// Handle extension types by preserving extension name and using storage type
if (type->id() == arrow::Type::EXTENSION) {
auto ext_type = std::dynamic_pointer_cast<arrow::ExtensionType>(type);
gandiva_data_type->set_extensionname(ext_type->extension_name());
type = ext_type->storage_type();
}

switch (type->id()) {
case arrow::Type::BOOL:
gandiva_data_type->set_type(gandiva::types::GandivaType::BOOL);
Expand Down Expand Up @@ -85,6 +93,13 @@ void ArrowToProtobuf(DataTypePtr type, gandiva::types::ExtGandivaType* gandiva_d
case arrow::Type::BINARY:
gandiva_data_type->set_type(gandiva::types::GandivaType::BINARY);
break;
case arrow::Type::FIXED_SIZE_BINARY: {
gandiva_data_type->set_type(gandiva::types::GandivaType::FIXED_SIZE_BINARY);
std::shared_ptr<arrow::FixedSizeBinaryType> fixed_size_binary_type =
std::dynamic_pointer_cast<arrow::FixedSizeBinaryType>(type);
gandiva_data_type->set_width(fixed_size_binary_type->byte_width());
break;
}
case arrow::Type::DATE32:
gandiva_data_type->set_type(gandiva::types::GandivaType::DATE32);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,18 @@ private static Set<FunctionSignature> getSupportedFunctionsFromGandiva() throws
}

private static ArrowType getArrowType(ExtGandivaType type) {
// Check if this is an extension type
if (type.hasExtensionName() && !type.getExtensionName().isEmpty()) {
String extensionName = type.getExtensionName();

// Handle known extension types
if ("arrow.uuid".equals(extensionName)) {
// this should be the new Arrow UUID type from: https://github.com/apache/arrow-java/pull/903
return new UuidType();
}
throw new UnsupportedOperationException("Cannot get ArrowType for unknown extension type: " + extensionName);
}

switch (type.getType().getNumber()) {
case GandivaType.BOOL_VALUE:
return ArrowType.Bool.INSTANCE;
Expand Down Expand Up @@ -155,6 +167,8 @@ private static ArrowType getArrowType(ExtGandivaType type) {
return new ArrowType.Utf8();
case GandivaType.BINARY_VALUE:
return new ArrowType.Binary();
case GandivaType.FIXED_SIZE_BINARY_VALUE:
return new ArrowType.FixedSizeBinary(type.getWidth());
case GandivaType.DATE32_VALUE:
return new ArrowType.Date(DateUnit.DAY);
case GandivaType.DATE64_VALUE:
Expand All @@ -171,7 +185,6 @@ private static ArrowType getArrowType(ExtGandivaType type) {
return new ArrowType.Decimal(0, 0, 128);
case GandivaType.INTERVAL_VALUE:
return new ArrowType.Interval(mapArrowIntervalUnit(type.getIntervalType()));
case GandivaType.FIXED_SIZE_BINARY_VALUE:
case GandivaType.MAP_VALUE:
case GandivaType.DICTIONARY_VALUE:
case GandivaType.LIST_VALUE:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.arrow.gandiva.evaluator;

import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.vector.FieldVector;
import org.apache.arrow.vector.FixedSizeBinaryVector;
import org.apache.arrow.vector.types.pojo.ArrowType;
import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType;
import org.apache.arrow.vector.types.pojo.FieldType;

/** UUID extension type for Gandiva. THIS SHOULD NOT BE INCLUDED!!!*/
public class UuidType extends ExtensionType {

@Override
public ArrowType storageType() {
return new ArrowType.FixedSizeBinary(16);
}

@Override
public String extensionName() {
return "arrow.uuid";
}

@Override
public boolean extensionEquals(ExtensionType other) {
return other instanceof UuidType;
}

@Override
public ArrowType deserialize(ArrowType storageType, String serializedData) {
if (!storageType.equals(storageType())) {
throw new UnsupportedOperationException(
"Cannot construct UuidType from underlying type " + storageType);
}
return new UuidType();
}

@Override
public String serialize() {
return "";
}

@Override
public FieldVector getNewVector(String name, FieldType fieldType, BufferAllocator allocator) {
return new FixedSizeBinaryVector(name, allocator, 16);
}
}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ under the License.

<groupId>org.apache.arrow</groupId>
<artifactId>arrow-java-root</artifactId>
<version>18.3.0</version>
<version>18.3.0-SNAPSHOT</version>
<packaging>pom</packaging>

<name>Apache Arrow Java Root POM</name>
Expand Down
Loading