Skip to content

Commit

Permalink
apacheGH-42020: [Swift] Add Arrow decoding implementation for Swift C…
Browse files Browse the repository at this point in the history
…odable (apache#42023)

### Rationale for this change
This change implements decode for the Arrow Swift Codable implementation.  This allows the data in a RecordBatch to be copied to properties in a struct/class. 

The PR is a bit longer than desired but all three container types are required in order to implement the Decoder protocol.

### What changes are included in this PR?

The ArrowDecoder class is included in this PR along with a class for each container type (keyed, unkeyed, and single).  Most of the logic is encapsulated in the ArrowDecoder with minimal logic in each container class (Most of the methods in the container classes are a single line that calls the ArrowDecoder doDecode methods)

### Are these changes tested?

Yes, a test has been added to test the three types of containers provided by the decoder.

* GitHub Issue: apache#42020

Authored-by: Alva Bandy <abandy@live.com>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
abandy authored Jun 8, 2024
1 parent 3999384 commit 601be76
Show file tree
Hide file tree
Showing 2 changed files with 517 additions and 0 deletions.
347 changes: 347 additions & 0 deletions swift/Arrow/Sources/Arrow/ArrowDecoder.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,347 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import Foundation

public class ArrowDecoder: Decoder {
var rbIndex: UInt = 0
public var codingPath: [CodingKey] = []
public var userInfo: [CodingUserInfoKey: Any] = [:]
public let rb: RecordBatch
public let nameToCol: [String: ArrowArrayHolder]
public let columns: [ArrowArrayHolder]
public init(_ decoder: ArrowDecoder) {
self.userInfo = decoder.userInfo
self.codingPath = decoder.codingPath
self.rb = decoder.rb
self.columns = decoder.columns
self.nameToCol = decoder.nameToCol
self.rbIndex = decoder.rbIndex
}

public init(_ rb: RecordBatch) {
self.rb = rb
var colMapping = [String: ArrowArrayHolder]()
var columns = [ArrowArrayHolder]()
for index in 0..<self.rb.schema.fields.count {
let field = self.rb.schema.fields[index]
columns.append(self.rb.column(index))
colMapping[field.name] = self.rb.column(index)
}

self.columns = columns
self.nameToCol = colMapping
}

public func decode<T: Decodable>(_ type: T.Type) throws -> [T] {
var output = [T]()
for index in 0..<rb.length {
self.rbIndex = index
output.append(try type.init(from: self))
}

return output
}

public func container<Key>(keyedBy type: Key.Type
) -> KeyedDecodingContainer<Key> where Key: CodingKey {
let container = ArrowKeyedDecoding<Key>(self, codingPath: codingPath)
return KeyedDecodingContainer(container)
}

public func unkeyedContainer() -> UnkeyedDecodingContainer {
return ArrowUnkeyedDecoding(self, codingPath: codingPath)
}

public func singleValueContainer() -> SingleValueDecodingContainer {
return ArrowSingleValueDecoding(self, codingPath: codingPath)
}

func getCol(_ name: String) throws -> AnyArray {
guard let col = self.nameToCol[name] else {
throw ArrowError.invalid("Column for key \"\(name)\" not found")
}

guard let anyArray = col.array as? AnyArray else {
throw ArrowError.invalid("Unable to convert array to AnyArray")
}

return anyArray
}

func getCol(_ index: Int) throws -> AnyArray {
if index >= self.columns.count {
throw ArrowError.outOfBounds(index: Int64(index))
}

guard let anyArray = self.columns[index].array as? AnyArray else {
throw ArrowError.invalid("Unable to convert array to AnyArray")
}

return anyArray
}

func doDecode<T>(_ key: CodingKey) throws -> T? {
let array: AnyArray = try self.getCol(key.stringValue)
return array.asAny(self.rbIndex) as? T
}

func doDecode<T>(_ col: Int) throws -> T? {
let array: AnyArray = try self.getCol(col)
return array.asAny(self.rbIndex) as? T
}
}

private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer {
var codingPath: [CodingKey]
var count: Int? = 0
var isAtEnd: Bool = false
var currentIndex: Int = 0
let decoder: ArrowDecoder

init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
self.decoder = decoder
self.codingPath = codingPath
self.count = self.decoder.columns.count
}

mutating func increment() {
self.currentIndex += 1
self.isAtEnd = self.currentIndex >= self.count!
}

mutating func decodeNil() throws -> Bool {
defer {increment()}
return try self.decoder.doDecode(self.currentIndex) == nil
}

mutating func decode<T>(_ type: T.Type) throws -> T where T: Decodable {
if type == Int8.self || type == Int16.self ||
type == Int32.self || type == Int64.self ||
type == UInt8.self || type == UInt16.self ||
type == UInt32.self || type == UInt64.self ||
type == String.self || type == Double.self ||
type == Float.self || type == Date.self {
defer {increment()}
return try self.decoder.doDecode(self.currentIndex)!
} else {
throw ArrowError.invalid("Type \(type) is currently not supported")
}
}

func nestedContainer<NestedKey>(
keyedBy type: NestedKey.Type
) throws -> KeyedDecodingContainer<NestedKey> where NestedKey: CodingKey {
throw ArrowError.invalid("Nested decoding is currently not supported.")
}

func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer {
throw ArrowError.invalid("Nested decoding is currently not supported.")
}

func superDecoder() throws -> Decoder {
throw ArrowError.invalid("super decoding is currently not supported.")
}
}

private struct ArrowKeyedDecoding<Key: CodingKey>: KeyedDecodingContainerProtocol {
var codingPath = [CodingKey]()
var allKeys = [Key]()
let decoder: ArrowDecoder

init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
self.decoder = decoder
self.codingPath = codingPath
}

func contains(_ key: Key) -> Bool {
return self.decoder.nameToCol.keys.contains(key.stringValue)
}

func decodeNil(forKey key: Key) throws -> Bool {
return try self.decoder.doDecode(key) == nil
}

func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool {
return try self.decoder.doDecode(key)!
}

func decode(_ type: String.Type, forKey key: Key) throws -> String {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Double.Type, forKey key: Key) throws -> Double {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Float.Type, forKey key: Key) throws -> Float {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Int.Type, forKey key: Key) throws -> Int {
throw ArrowError.invalid(
"Int type is not supported (please use Int8, Int16, Int32 or Int64)")
}

func decode(_ type: Int8.Type, forKey key: Key) throws -> Int8 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Int16.Type, forKey key: Key) throws -> Int16 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Int32.Type, forKey key: Key) throws -> Int32 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: Int64.Type, forKey key: Key) throws -> Int64 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: UInt.Type, forKey key: Key) throws -> UInt {
throw ArrowError.invalid(
"UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)")
}

func decode(_ type: UInt8.Type, forKey key: Key) throws -> UInt8 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: UInt16.Type, forKey key: Key) throws -> UInt16 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: UInt32.Type, forKey key: Key) throws -> UInt32 {
return try self.decoder.doDecode(key)!
}

func decode(_ type: UInt64.Type, forKey key: Key) throws -> UInt64 {
return try self.decoder.doDecode(key)!
}

func decode<T>(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable {
if type == Date.self {
return try self.decoder.doDecode(key)!
} else {
throw ArrowError.invalid("Type \(type) is currently not supported")
}
}

func nestedContainer<NestedKey>(
keyedBy type: NestedKey.Type,
forKey key: Key
) throws -> KeyedDecodingContainer<NestedKey> where NestedKey: CodingKey {
throw ArrowError.invalid("Nested decoding is currently not supported.")
}

func nestedUnkeyedContainer(forKey key: Key) throws -> UnkeyedDecodingContainer {
throw ArrowError.invalid("Nested decoding is currently not supported.")
}

func superDecoder() throws -> Decoder {
throw ArrowError.invalid("super decoding is currently not supported.")
}

func superDecoder(forKey key: Key) throws -> Decoder {
throw ArrowError.invalid("super decoding is currently not supported.")
}
}

private struct ArrowSingleValueDecoding: SingleValueDecodingContainer {
var codingPath = [CodingKey]()
let decoder: ArrowDecoder

init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) {
self.decoder = decoder
self.codingPath = codingPath
}

func decodeNil() -> Bool {
do {
return try self.decoder.doDecode(0) == nil
} catch {
return false
}
}

func decode(_ type: Bool.Type) throws -> Bool {
return try self.decoder.doDecode(0)!
}

func decode(_ type: String.Type) throws -> String {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Double.Type) throws -> Double {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Float.Type) throws -> Float {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Int.Type) throws -> Int {
throw ArrowError.invalid(
"Int type is not supported (please use Int8, Int16, Int32 or Int64)")
}

func decode(_ type: Int8.Type) throws -> Int8 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Int16.Type) throws -> Int16 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Int32.Type) throws -> Int32 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: Int64.Type) throws -> Int64 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: UInt.Type) throws -> UInt {
throw ArrowError.invalid(
"UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)")
}

func decode(_ type: UInt8.Type) throws -> UInt8 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: UInt16.Type) throws -> UInt16 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: UInt32.Type) throws -> UInt32 {
return try self.decoder.doDecode(0)!
}

func decode(_ type: UInt64.Type) throws -> UInt64 {
return try self.decoder.doDecode(0)!
}

func decode<T>(_ type: T.Type) throws -> T where T: Decodable {
if type == Date.self {
return try self.decoder.doDecode(0)!
} else {
throw ArrowError.invalid("Type \(type) is currently not supported")
}
}
}
Loading

0 comments on commit 601be76

Please sign in to comment.