Skip to content

Commit

Permalink
break up some minor utility functions (#71)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrevels authored Mar 28, 2021
1 parent 3d0a8bd commit fa2dab8
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 30 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Onda"
uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
authors = ["Beacon Biosignals, Inc."]
version = "0.12.3"
version = "0.13.0"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand Down
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Onda.file_format_string
## Utilities

```@docs
Onda.materialize
Onda.gather
Onda.validate_on_construction
Onda.upgrade_onda_dataset_to_v0_5!
Expand Down
19 changes: 11 additions & 8 deletions src/annotations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -66,22 +66,25 @@ Tables.columnnames(x::Annotation) = Tables.columnnames(getfield(x, :_row))
#####

"""
read_annotations(io_or_path; materialize::Bool=false, validate_schema::Bool=true)
read_annotations(io_or_path; validate_schema::Bool=true)
Return the `*.onda.annotations.arrow`-compliant table read from `io_or_path`.
If `validate_schema` is `true`, the table's schema will be validated to ensure it is
a `*.onda.annotations.arrow`-compliant table. An `ArgumentError` will be thrown if
any schema violation is detected.
If `materialize` is `false`, the returned table will be an `Arrow.Table` while if
`materialize` is `true`, the returned table will be a `NamedTuple` of columns. The
primary difference is that the former has a conversion-on-access behavior, while
for the latter, any potential conversion cost has been paid up front.
"""
function read_annotations(io_or_path; materialize::Bool=false, validate_schema::Bool=true)
table = read_onda_table(io_or_path; materialize)
function read_annotations(io_or_path; materialize::Union{Missing,Bool}=missing, validate_schema::Bool=true)
table = read_onda_table(io_or_path)
validate_schema && validate_annotation_schema(Tables.schema(table))
if materialize isa Bool
if materialize
@warn "`read_annotations(x; materialize=true)` is deprecated; use `Onda.materialize(read_annotations(x))` instead"
return Onda.materialize(table)
else
@warn "`read_annotations(x; materialize=false)` is deprecated; use `read_annotations(x)` instead"
end
end
return table
end

Expand Down
19 changes: 11 additions & 8 deletions src/signals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,25 @@ Tables.columnnames(x::Signal) = Tables.columnnames(getfield(x, :_row))
#####

"""
read_signals(io_or_path; materialize::Bool=false, validate_schema::Bool=false)
read_signals(io_or_path; validate_schema::Bool=false)
Return the `*.onda.signals.arrow`-compliant table read from `io_or_path`.
If `validate_schema` is `true`, the table's schema will be validated to ensure it is
a `*.onda.signals.arrow`-compliant table. An `ArgumentError` will be thrown if
any schema violation is detected.
If `materialize` is `false`, the returned table will be an `Arrow.Table` while if
`materialize` is `true`, the returned table will be a `NamedTuple` of columns. The
primary difference is that the former has a conversion-on-access behavior, while
for the latter, any potential conversion cost has been paid up front.
"""
function read_signals(io_or_path; materialize::Bool=false, validate_schema::Bool=false)
table = read_onda_table(io_or_path; materialize)
function read_signals(io_or_path; materialize::Union{Missing,Bool}=missing, validate_schema::Bool=false)
table = read_onda_table(io_or_path)
validate_schema && validate_signal_schema(Tables.schema(table))
if materialize isa Bool
if materialize
@warn "`read_signals(x; materialize=true)` is deprecated; use `Onda.materialize(read_signals(x))` instead"
return Onda.materialize(table)
else
@warn "`read_signals(x; materialize=false)` is deprecated; use `read_signals(x)` instead"
end
end
return table
end

Expand Down
49 changes: 40 additions & 9 deletions src/utilities.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ const MINIMUM_ONDA_FORMAT_VERSION = v"0.5"

const MAXIMUM_ONDA_FORMAT_VERSION = v"0.5"

function is_supported_onda_format_version(v::VersionNumber)
min_major, min_minor = MINIMUM_ONDA_FORMAT_VERSION.major, MINIMUM_ONDA_FORMAT_VERSION.minor
max_major, max_minor = MAXIMUM_ONDA_FORMAT_VERSION.major, MAXIMUM_ONDA_FORMAT_VERSION.minor
return (min_major <= v.major <= max_major) && (min_minor <= v.minor <= max_minor)
function is_supported_version(v::VersionNumber, lo::VersionNumber, hi::VersionNumber)
lo_major, lo_minor = lo.major, lo.minor
hi_major, hi_minor = hi.major, hi.minor
return (lo_major <= v.major <= hi_major) && (lo_minor <= v.minor <= hi_minor)
end

is_supported_onda_format_version(v::VersionNumber) = is_supported_version(v, MINIMUM_ONDA_FORMAT_VERSION, MAXIMUM_ONDA_FORMAT_VERSION)

const ALPHANUMERIC_SNAKE_CASE_CHARACTERS = Char['_',
'0':'9'...,
'a':'z'...]
Expand Down Expand Up @@ -100,11 +102,14 @@ write_full_path(path, bytes) = write(path, bytes)
##### tables
#####

function table_has_supported_onda_format_version(table)
function table_has_metadata(predicate, table)
m = Arrow.getmetadata(table)
return m isa Dict && is_supported_onda_format_version(VersionNumber(get(m, "onda_format_version", v"0.0.0")))
return m isa Dict && predicate(m)
end

table_has_required_onda_metadata(table) = table_has_metadata(m -> is_supported_onda_format_version(VersionNumber(get(m, "onda_format_version", v"0.0.0"))),
table)

# It would be better if Arrow.jl supported a generic API for nonstandard path-like types so that
# we can avoid potential intermediate copies here, but its documentation is explicit that it only
# supports `Union{IO,String}`.
Expand All @@ -116,10 +121,12 @@ write_arrow_table(path::String, table; kwargs...) = Arrow.write(path, table; kwa
write_arrow_table(io::IO, table; kwargs...) = Arrow.write(io, table; file=true, kwargs...)
write_arrow_table(path, table; kwargs...) = (io = IOBuffer(); write_arrow_table(io, table; kwargs...); write_full_path(path, take!(io)))

function read_onda_table(path; materialize::Bool=false)
function read_onda_table(path)
table = read_arrow_table(path)
table_has_supported_onda_format_version(table) || error("supported `onda_format_version` not found in annotations file")
return materialize ? map(collect, Tables.columntable(table)) : table
if !table_has_required_onda_metadata(table)
throw(ArgumentError("required Onda metadata not found in Arrow file; use `Onda.read_arrow_table` to read the file without this validation check"))
end
return table
end

function write_onda_table(path, table; kwargs...)
Expand Down Expand Up @@ -169,3 +176,27 @@ function gather(column_name, tables::Vararg{Any,N};
iters = ntuple(i -> _iterator_for_column(tables[i], column_name), N)
return Dict(id => ntuple(i -> extract(tables[i], locs[i]), N) for (id, locs) in locations(iters))
end

"""
materialize(table)
Return a fully deserialized copy of `table`.
This function is useful when `table` has built-in deserialize-on-access or
conversion-on-access behavior (like `Arrow.Table`) and you'd like to pay
such access costs upfront before repeatedly accessing the table. For example:
```
julia> annotations = read_annotations(path_to_annotations_file);
# iterate through all elements of `annotations.span`
julia> @time foreach(identity, (span for span in annotations.span));
0.000126 seconds (306 allocations: 6.688 KiB)
julia> materialized = Onda.materialize(annotations);
julia> @time foreach(identity, (span for span in materialized.span));
0.000014 seconds (2 allocations: 80 bytes)
```
"""
materialize(table) = map(collect, Tables.columntable(table))
3 changes: 2 additions & 1 deletion test/annotations.jl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ end
seekstart(io)
for roundtripped in (read_annotations(annotations_file_path; materialize=false, validate_schema=false),
read_annotations(annotations_file_path; materialize=true, validate_schema=true),
read_annotations(io; validate_schema=true))
Onda.materialize(read_annotations(io)),
read_annotations(seekstart(io); validate_schema=true))
roundtripped = collect(Tables.rows(roundtripped))
@test length(roundtripped) == length(annotations)
for (r, a) in zip(roundtripped, annotations)
Expand Down
5 changes: 3 additions & 2 deletions test/samples.jl
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ end
@testset "`Samples` pretty printing" begin
info = SamplesInfo("eeg", ["a", "b", "c-d"], "unit", 0.25, -0.5, Int16, 50.2)
samples = Samples(rand(Random.MersenneTwister(0), info.sample_type, 3, 5), info, true)
@test sprint(show, samples, context=(:compact => true)) == "Samples(3×5 Array{Int16,2})"
M = VERSION >= v"1.6" ? "Matrix{Int16}" : "Array{Int16,2}"
@test sprint(show, samples, context=(:compact => true)) == "Samples(3×5 $M)"
@test sprint(show, samples) == """
Samples (00:00:00.099601594):
info.kind: "eeg"
Expand All @@ -131,7 +132,7 @@ end
info.sample_rate: 50.2 Hz
encoded: true
data:
3×5 Array{Int16,2}:
3×5 $M:
20032 4760 27427 -20758 24287
14240 5037 5598 -5888 21784
16885 600 20880 -32493 -19305"""
Expand Down
3 changes: 2 additions & 1 deletion test/signals.jl
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ end
seekstart(io)
for roundtripped in (read_signals(signals_file_path; materialize=false, validate_schema=false),
read_signals(signals_file_path; materialize=true, validate_schema=true),
read_signals(io; validate_schema=true))
Onda.materialize(read_signals(io)),
read_signals(seekstart(io); validate_schema=true))
roundtripped = collect(Tables.rows(roundtripped))
@test length(roundtripped) == length(signals)
for (r, s) in zip(roundtripped, signals)
Expand Down

2 comments on commit fa2dab8

@jrevels
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/33031

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.13.0 -m "<description of version>" fa2dab801be35556a4aae9a19f9300af4989cfbb
git push origin v0.13.0

Please sign in to comment.