Skip to content

Commit b8400b0

Browse files
authored
add Arrow.jl conversion methods so that Samples/SamplesInfo can be (de)serialized as Arrow values (#68)
1 parent 3323290 commit b8400b0

File tree

6 files changed

+77
-8
lines changed

6 files changed

+77
-8
lines changed

Project.toml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
name = "Onda"
22
uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
33
authors = ["Beacon Biosignals, Inc."]
4-
version = "0.13.1"
4+
version = "0.13.2"
55

66
[deps]
77
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
@@ -17,9 +17,10 @@ TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
1717
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
1818

1919
[compat]
20-
Arrow = "~1.2.4"
20+
Arrow = "1.3"
2121
CodecZstd = "0.6, 0.7"
2222
ConstructionBase = "1.0"
23+
DataFrames = "0.22.7"
2324
MsgPack = "1.1"
2425
Tables = "1.2"
2526
TimeSpans = "0.2.2"

examples/tour.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
# before and/or alongside the completion of this tour; it explains the
99
# purpose/structure of the format.
1010

11-
using Onda, TimeSpans, DataFrames, Dates, UUIDs, Test, ConstructionBase
11+
using Onda, TimeSpans, DataFrames, Dates, UUIDs, Test, ConstructionBase, Arrow
1212

1313
#####
1414
##### generate some mock data
@@ -204,6 +204,17 @@ rows = ["c3", 4, "f3"]
204204
f_channels = ["fp1", "f3","f7", "fz", "fp2", "f4", "f8"]
205205
@test eeg[r"f", span].data == view(eeg, channel.(Ref(eeg), f_channels), span_range).data
206206

207+
# Onda overloads the necessary Arrow.jl machinery to enable individual sample data
208+
# segments (specifically, `Samples` and `SamplesInfo` values) to be (de)serialized
209+
# to/from Arrow for storage or IPC purposes; see below for an example. Note that if
210+
# you wanted to use Arrow as a storage format for whole sample data files w/ Onda,
211+
# it'd make more sense to create an `AbstractLPCMFormat` subtype for your Arrow <-> LPCM
212+
# mapping (an example of this can be seen in `examples/flac.jl`).
213+
x = (a=[eeg], b=[eeg.info])
214+
y = Arrow.Table(Arrow.tobuffer(x))
215+
@test x.a == y.a
216+
@test x.b == y.b
217+
207218
# Note that `Samples` is not an `AbstractArray` subtype; the special indexing
208219
# behavior above is only defined for convenient data manipulation. It is fine
209220
# to access the sample data matrix directly via the `data` field if you need

src/Onda.jl

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,6 @@ using TimeSpans, ConstructionBase
55
using Arrow, Tables
66
using MsgPack, TranscodingStreams, CodecZstd
77

8-
function __init__()
9-
Arrow.ArrowTypes.registertype!(TimeSpan, TimeSpan)
10-
return nothing
11-
end
12-
138
#####
149
##### includes/exports
1510
#####

src/samples.jl

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -495,3 +495,31 @@ function Base.show(io::IO, samples::Samples)
495495
show(io, "text/plain", samples.data)
496496
end
497497
end
498+
499+
#####
500+
##### Arrow conversion
501+
#####
502+
503+
const SamplesArrowType{T,S} = NamedTuple{(:data, :info, :encoded),Tuple{Vector{T},S,Bool}} where {S<:SamplesInfoArrowType}
504+
505+
const SAMPLES_ARROW_NAME = Symbol("JuliaLang.Samples")
506+
507+
Arrow.ArrowTypes.arrowname(::Type{<:Samples}) = SAMPLES_ARROW_NAME
508+
509+
Arrow.ArrowTypes.ArrowType(::Type{<:Samples{D,S}}) where {D,S} = SamplesArrowType{eltype(D),Arrow.ArrowTypes.ArrowType(S)}
510+
511+
function Arrow.ArrowTypes.toarrow(samples::Samples)
512+
return (data=vec(samples.data),
513+
info=Arrow.ArrowTypes.toarrow(samples.info),
514+
encoded=samples.encoded)
515+
end
516+
517+
function Arrow.ArrowTypes.JuliaType(::Val{SAMPLES_ARROW_NAME}, ::Type{SamplesArrowType{T,S}}) where {T,S}
518+
return Samples{Matrix{T},Arrow.ArrowTypes.JuliaType(Val(SAMPLES_INFO_ARROW_NAME), S)}
519+
end
520+
521+
function Arrow.ArrowTypes.fromarrow(::Type{<:Samples}, arrow_data, arrow_info, arrow_encoded)
522+
info = SamplesInfo(arrow_info; validate=false)
523+
data = reshape(arrow_data, (channel_count(info), :))
524+
return Samples(data, info, arrow_encoded)
525+
end

src/signals.jl

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,35 @@ function Signal(info::SamplesInfo; recording, file_path, file_format, span, cust
277277
info.sample_offset_in_unit, info.sample_type, info.sample_rate, custom...)
278278
end
279279

280+
#####
281+
##### Arrow conversion
282+
#####
283+
284+
const SamplesInfoArrowType{R,O,SR} = NamedTuple{(:kind, :channels, :sample_unit, :sample_resolution_in_unit, :sample_offset_in_unit, :sample_type, :sample_rate),
285+
Tuple{String,Vector{String},String,R,O,String,SR}}
286+
287+
const SAMPLES_INFO_ARROW_NAME = Symbol("JuliaLang.SamplesInfo")
288+
289+
Arrow.ArrowTypes.arrowname(::Type{<:SamplesInfo}) = SAMPLES_INFO_ARROW_NAME
290+
291+
Arrow.ArrowTypes.ArrowType(::Type{<:SamplesInfo{<:Any,<:Any,<:Any,R,O,<:Any,SR}}) where {R,O,SR} = SamplesInfoArrowType{R,O,SR}
292+
293+
function Arrow.ArrowTypes.toarrow(info::SamplesInfo)
294+
return (kind=convert(String, info.kind),
295+
channels=convert(Vector{String}, info.channels),
296+
sample_unit=convert(String, info.sample_unit),
297+
sample_resolution_in_unit=info.sample_resolution_in_unit,
298+
sample_offset_in_unit=info.sample_offset_in_unit,
299+
sample_type=onda_sample_type_from_julia_type(info.sample_type),
300+
sample_rate=info.sample_rate)
301+
end
302+
303+
function Arrow.ArrowTypes.JuliaType(::Val{SAMPLES_INFO_ARROW_NAME}, ::Type{SamplesInfoArrowType{R,O,SR}}) where {R,O,SR}
304+
return SamplesInfo{String,Vector{String},String,R,O,<:LPCM_SAMPLE_TYPE_UNION,SR}
305+
end
306+
307+
Arrow.ArrowTypes.fromarrow(::Type{<:SamplesInfo}, fields...) = SamplesInfo(fields...; validate=false)
308+
280309
#####
281310
##### duck-typed utilities
282311
#####

src/utilities.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,11 @@ TimeSpans.istimespan(::NamedTupleTimeSpan) = true
5454
TimeSpans.start(x::NamedTupleTimeSpan) = x.start
5555
TimeSpans.stop(x::NamedTupleTimeSpan) = x.stop
5656

57+
const TIME_SPAN_ARROW_NAME = Symbol("JuliaLang.TimeSpan")
58+
59+
Arrow.ArrowTypes.arrowname(::Type{TimeSpan}) = TIME_SPAN_ARROW_NAME
60+
ArrowTypes.JuliaType(::Val{TIME_SPAN_ARROW_NAME}) = TimeSpan
61+
5762
#####
5863
##### zstd_compress/zstd_decompress
5964
#####

0 commit comments

Comments
 (0)