diff --git a/Project.toml b/Project.toml index 3f334496..3930e992 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Onda" uuid = "e853f5be-6863-11e9-128d-476edb89bfb5" authors = ["Beacon Biosignals, Inc."] -version = "0.7.6" +version = "0.8.0" [deps] CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2" diff --git a/docs/src/index.md b/docs/src/index.md index f3286c7d..403c7acc 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -14,7 +14,6 @@ Note that Onda.jl's API follows a specific philosophy with respect to property a Dataset samples_path create_recording! -set_duration! load store! delete! @@ -26,8 +25,10 @@ save_recordings_file ```@docs Signal signal_from_template +span Annotation Recording +set_span! annotate! ``` @@ -68,3 +69,9 @@ serialize_lpcm LPCM LPCMZst ``` + +## Upgrading Older Datasets to Newer Datasets + +```@docs +Onda.upgrade_onda_format_from_v0_2_to_v0_3! +``` diff --git a/examples/flac.jl b/examples/flac.jl index 086d600f..d9f2f742 100644 --- a/examples/flac.jl +++ b/examples/flac.jl @@ -2,7 +2,7 @@ # that it's a naive implementation - it just shells out and assumes you have # the `flac` command line utility installed and available on your system. -using Onda, Test, Random +using Onda, Test, Random, Dates ##### ##### FLAC @@ -69,7 +69,7 @@ end if VERSION >= v"1.1.0" @testset "FLAC example" begin - signal = Signal([:a, :b, :c], :unit, 0.25, Int16, 50, :flac, Dict(:level => 2)) + signal = Signal([:a, :b, :c], Nanosecond(0), Nanosecond(0), :unit, 0.25, 0.0, Int16, 50.0, :flac, Dict(:level => 2)) samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, 50 * 10))).data s = serializer(signal) bytes = serialize_lpcm(samples, s) diff --git a/examples/tour.jl b/examples/tour.jl index 56479989..fc05f074 100644 --- a/examples/tour.jl +++ b/examples/tour.jl @@ -22,10 +22,13 @@ eeg_signal = Signal(channel_names=[:fp1, :f3, :c3, :p3, :fz, :cz, :pz, :fp2, :f4, :c4, :p4, :f8, :t4, :t6, :o2], + start_nanosecond=Nanosecond(0), + stop_nanosecond=Nanosecond(Second(20)), sample_unit=:microvolts, sample_resolution_in_unit=0.25, + sample_offset_in_unit=0.0, sample_type=Int16, - sample_rate=256, # Hz + sample_rate=256.0, # Hz file_extension=:lpcm, file_options=nothing) @@ -33,10 +36,13 @@ ecg_signal = signal_from_template(eeg_signal; channel_names=[:avl, :avr], file_extension=Symbol("lpcm.zst")) spo2_signal = Signal(channel_names=[:spo2], + start_nanosecond=Nanosecond(Second(3)), + stop_nanosecond=Nanosecond(Second(17)), sample_unit=:percentage, sample_resolution_in_unit=(100 / typemax(UInt8)), + sample_offset_in_unit=0.0, sample_type=UInt8, - sample_rate=20, # Hz + sample_rate=20.5, # Hz file_extension=:lpcm, file_options=nothing) @@ -52,35 +58,45 @@ spo2_signal = Signal(channel_names=[:spo2], # an aside: The hypothetical person from which these hypothetical signals were # hypothetically recorded must be experiencing some pretty crazy pathologies if # their EEG/ECG are just saw waves... -saws(signal, seconds) = [(j + i) % 100 * signal.sample_resolution_in_unit for - i in 1:channel_count(signal), - j in 1:(signal.sample_rate * seconds)] +saws(signal) = [(j + i) % 100 * signal.sample_resolution_in_unit for + i in 1:channel_count(signal), j in 1:sample_count(signal)] # The second argument in the `Samples` constructor is a `Bool` that specifies if # the data is in its encoded representation. Here, we construct our signals as # "decoded" (i.e. in actual units, though for this toy example it doesn't really # matter) and then "encode" them according to the specified: -eeg = encode(Samples(eeg_signal, false, saws(eeg_signal, 20))) -ecg = encode(Samples(ecg_signal, false, saws(ecg_signal, 20))) -spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal, 20))) +eeg = encode(Samples(eeg_signal, false, saws(eeg_signal))) +ecg = encode(Samples(ecg_signal, false, saws(ecg_signal))) +spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal))) # Here are some basic functions for examining `Samples` instances: -@test sample_count(eeg) == eeg.signal.sample_rate * 20 +@test sample_count(eeg) == sample_count(eeg_signal) == 20 * eeg_signal.sample_rate @test channel_count(eeg) == channel_count(eeg_signal) == 19 @test channel(eeg, :f3) == channel(eeg_signal, :f3) == 2 @test channel(eeg, 2) == channel(eeg_signal, 2) == :f3 -@test duration(eeg) == duration(ecg) == duration(spo2) == Second(20) +@test duration(eeg) == duration(span(eeg_signal)) == Second(20) # Here are some basic indexing examples using `getindex` and `view` wherein # channel names and sample-rate-agnostic `TimeSpan`s are employed as indices: -span = TimeSpan(Second(3), Second(9)) -span_range = index_from_time(eeg.signal.sample_rate, span) -@test eeg[:, span].data == view(eeg, :, span_range).data +slice_span = TimeSpan(Second(3), Second(9)) +span_range = index_from_time(eeg.signal.sample_rate, slice_span) +@test eeg[:, slice_span].data == view(eeg, :, span_range).data @test eeg[:f3, :].data == view(eeg, 2, :).data @test eeg[:f3, 1:10].data == view(eeg, 2, 1:10).data -@test eeg[:f3, span].data == view(eeg, 2, span_range).data +@test eeg[:f3, slice_span].data == view(eeg, 2, span_range).data @test eeg[[:f3, :c3, :p3], 1:10].data == view(eeg, 2:4, 1:10).data -@test eeg[[:c3, 4, :f3], span].data == view(eeg, [3, 4, 2], span_range).data +@test eeg[[:c3, 4, :f3], slice_span].data == view(eeg, [3, 4, 2], span_range).data + +# NOTE: Keep in mind that `duration(samples.signal)` is not generally equivalent +# to `duration(samples)`; the former is the duration of the original signal in +# the context of its parent recording, whereas the latter is the actual duration +# of `samples.data` given `signal.sample_rate`. This is similarly true for the +# `sample_count` function for the same reason! +eeg_slice = eeg[:, slice_span] +@test duration(eeg_slice) == duration(slice_span) +@test duration(eeg_slice) != duration(eeg_signal) +@test sample_count(eeg_slice) == length(span_range) +@test sample_count(eeg_slice) != sample_count(eeg_signal) # NOTE: `Samples` is not an `AbstractArray` subtype; this special indexing # behavior is only defined for convenient data manipulation. It is thus fine @@ -106,12 +122,7 @@ dataset = Dataset(joinpath(root, "example.onda"); create=true) # `dataset.recordings` dictionary before returning the pair, such that the # `recording` variable we assign here references the same `Recording` instance # stored within `dataset`. -uuid, recording = create_recording!(dataset, duration(eeg), Dict()) - -# The last argument to `create_recording!` above provided a dictionary for the -# `custom` field of the returned `Recording` instance. Let's populate it with -# some custom metadata: -recording.custom["some_custom_information"] = "hey, I think this recording might be fake" +uuid, recording = create_recording!(dataset) # Store our signals/samples for the recording in our `dataset`. This both serializes # sample data to disk and adds the signal metadata to the recording stored in @@ -125,7 +136,7 @@ store!(dataset, uuid, :spo2, spo2) # like in there. For example, Beacon Biosignals stores JSON snippets in # annotations. Here, let's just go the simple route and pretend we found an # epileptiform spike in our EEG/ECG/SpO2 recording: -spike_annotation = Annotation("epileptiform", "spike", TimeSpan(Millisecond(1500), Second(2))) +spike_annotation = Annotation("epileptiform_spike", TimeSpan(Millisecond(1500), Second(2))) annotate!(recording, spike_annotation) # You can add as many annotations as you'd like to a recording. Just keep in mind @@ -138,7 +149,7 @@ annotate!(recording, spike_annotation) # short 2 second epochs across the entire recording: for (i, t) in enumerate(2:2:Second(duration(recording)).value) stage = rand(["awake", "nrem1", "nrem2", "nrem3", "rem"]) - ann = Annotation("sleep_stage", "$stage", TimeSpan(Second(t - 2), Second(t))) + ann = Annotation(stage, TimeSpan(Second(t - 2), Second(t))) annotate!(recording, ann) end @@ -159,7 +170,7 @@ dataset = Dataset(joinpath(root, "example.onda")) uuid, recording = first(dataset.recordings) # Grab the first spike annotation we see... -spike_annotation = first(ann for ann in recording.annotations if ann.value == "spike") +spike_annotation = first(ann for ann in recording.annotations if ann.value == "epileptiform_spike") # ...and load that segment of the EEG from disk as a `Samples` instance! spike_segment = load(dataset, uuid, :eeg, spike_annotation) @@ -180,7 +191,7 @@ spike_segment = load(dataset, uuid, :eeg, spike_annotation) # Welp, looks like a spike to me! Let's leave an annotation to confirm we # checked it. Remember - `spike_annotation isa AbstractTimeSpan`, so we can # generally pass it wherever we'd pass a `TimeSpan` object: -annotate!(recording, Annotation("confirmation", "spike", spike_annotation)) +annotate!(recording, Annotation("confirmed_spike_by_me", spike_annotation)) # ...and, finally, of course, let's save our annotation! save_recordings_file(dataset) diff --git a/src/Onda.jl b/src/Onda.jl index e7d6d6a2..f07af73e 100644 --- a/src/Onda.jl +++ b/src/Onda.jl @@ -5,7 +5,7 @@ using MsgPack using TranscodingStreams using CodecZstd -const ONDA_FORMAT_VERSION = v"0.2" +const ONDA_FORMAT_VERSION = v"0.3" ##### ##### utilities @@ -56,7 +56,7 @@ export AbstractTimeSpan, TimeSpan, contains, overlaps, shortest_timespan_contain index_from_time, time_from_index, duration include("recordings.jl") -export Recording, Signal, signal_from_template, Annotation, annotate! +export Recording, Signal, signal_from_template, Annotation, annotate!, span include("serialization.jl") export AbstractLPCMSerializer, serializer, deserialize_lpcm, serialize_lpcm, @@ -66,9 +66,70 @@ include("samples.jl") export Samples, encode, encode!, decode, decode!, channel, channel_count, sample_count include("dataset.jl") -export Dataset, samples_path, create_recording!, set_duration!, load, store!, delete!, +export Dataset, samples_path, create_recording!, set_span!, load, store!, delete!, save_recordings_file include("printing.jl") +##### +##### upgrades/deprecations +##### + +@deprecate set_duration!(dataset, uuid, duration) begin + r = dataset.recordings[uuid] + set_span!(r, TimeSpan(Nanosecond(0), duration)) + r +end + +""" + Onda.upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value) + +Upgrade the Onda v0.2 dataset at `path` to a Onda v0.3 dataset, returning the +upgraded `Dataset`. This upgrade process overwrites `path/recordings.msgpack.zst` +with a v0.3-compliant version of this file; for safety's sake, the old v0.2 file +is preserved at `path/old.recordings.msgpack.zst.backup`. + +A couple of the Onda v0.2 -> v0.3 changes require some special handling: + +- The `custom` field was removed from recording objects. This function thus writes out + a file at `path/recordings_custom.msgpack.zst` that contains a map of UUIDs to + corresponding recordings' `custom` values before deleting the `custom` field. This + file can be deserialized via `MsgPack.unpack(Onda.zstd_decompress(read("recordings_custom.msgpack.zst")))`. + +- Annotations no longer have a `key` field. Thus, each annotation's existing `key` and `value` + fields are combined into the single new `value` field via the provided callback + `combine_annotation_key_value(annotation_key, annotation_value)`. +""" +function upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value) + file_path = joinpath(path, "recordings.msgpack.zst") + bytes = zstd_decompress(read(file_path)) + mv(file_path, joinpath(path, "old.recordings.msgpack.zst.backup")) + io = IOBuffer(bytes) + read(io, UInt8) == 0x92 || error("corrupt recordings.msgpack.zst") + header = MsgPack.unpack(io, Header) + v"0.2" <= header.onda_format_version < v"0.3" || error("unsupported original onda_format_version: $(header.onda_format_version)") + recordings = MsgPack.unpack(io, Dict{UUID,Any}) + customs = Dict{UUID,Any}(uuid => recording["custom"] for (uuid, recording) in recordings) + write(joinpath(path, "recordings_custom.msgpack.zst"), zstd_compress(MsgPack.pack(customs))) + for (uuid, recording) in recordings + signal_stop_nanosecond = recording["duration_in_nanoseconds"] + for signal in values(recording["signals"]) + signal["start_nanosecond"] = 0 + signal["stop_nanosecond"] = signal_stop_nanosecond + signal["sample_offset_in_unit"] = 0.0 + signal["sample_rate"] = float(signal["sample_rate"]) + end + for annotation in recording["annotations"] + annotation["value"] = combine_annotation_key_value(annotation["key"], annotation["value"]) + delete!(annotation, "key") + end + delete!(recording, "duration_in_nanoseconds") + delete!(recording, "custom") + end + fixed_recordings = MsgPack.unpack(MsgPack.pack(recordings), Dict{UUID,Recording}) + dataset = Dataset(path, Header(v"0.3.0", true), fixed_recordings) + save_recordings_file(dataset) + return dataset +end + end # module diff --git a/src/dataset.jl b/src/dataset.jl index ee05bf14..f0074708 100644 --- a/src/dataset.jl +++ b/src/dataset.jl @@ -2,48 +2,39 @@ ##### `Dataset` ##### -struct Dataset{C} +struct Dataset path::String header::Header - recordings::Dict{UUID,Recording{C}} + recordings::Dict{UUID,Recording} end """ - Dataset(path, custom_type=Any; create=false, strict=()) + Dataset(path; create=false) Return a `Dataset` instance that contains all metadata necessary to read and write to the Onda dataset stored at `path`. Note that this constuctor loads all the `Recording` objects contained in `path/recordings.msgpack.zst`. -`custom_type` is the `typeof` of the `custom` value found in each `Recording` -object in the dataset. - If `create` is `true`, then an empty Onda dataset will be created at `path`. - -The `strict` keyword argument is forwarded to `MsgPack.unpack` when that -function is called while parsing `path/recordings.msgpack.zst` (see the -MsgPack documentation for details regarding `strict`). """ -function Dataset(path, custom_type::Type{C}=Any; - create::Bool=false, strict=()) where {C} +function Dataset(path; create::Bool=false) path = rstrip(abspath(path), '/') samples_path = joinpath(path, "samples") if create - endswith(path, ".onda") || throw(ArgumentError("cannot create dataset at $path: path does not end with .onda")) if isdir(path) isempty(readdir(path)) || throw(ArgumentError("cannot create dataset at $path: directory exists and is nonempty")) else mkdir(path) end - mkdir(samples_path) initial_header = Header(ONDA_FORMAT_VERSION, true) - initial_recordings = Dict{UUID,Recording{C}}() + initial_recordings = Dict{UUID,Recording}() write_recordings_file(path, initial_header, initial_recordings) - elseif !(isdir(path) && isdir(samples_path)) + elseif !isdir(path) throw(ArgumentError("$path is not a valid Onda dataset")) end - header, recordings = read_recordings_file(path, C, strict) - return Dataset{C}(path, header, recordings) + !isdir(samples_path) && mkdir(samples_path) + header, recordings = read_recordings_file(path) + return Dataset(path, header, recordings) end """ @@ -56,8 +47,6 @@ function save_recordings_file(dataset::Dataset) return write_recordings_file(dataset.path, dataset.header, dataset.recordings) end -Base.@deprecate overwrite_recordings(dataset) save_recordings_file(dataset) - ##### ##### `merge!` ##### @@ -119,46 +108,21 @@ end ##### """ - create_recording!(dataset::Dataset{C}, duration::Nanosecond, - custom=nothing, uuid::UUID=uuid4()) + create_recording!(dataset::Dataset, uuid::UUID=uuid4()) -Create `uuid::UUID => recording::Recording` where `recording` is constructed -via the provided `duration` and `custom` arguments, `uuid` is the provided UUID -(which is computed if not provided), add the pair to `dataset.recordings`, and -return the pair. - -The `custom` argument is passed along to the `Recording{C}` constructor, such -that `custom isa C` must hold true. +Create `uuid::UUID => recording::Recording`, add the pair to `dataset.recordings`, +and return the pair. """ -function create_recording!(dataset::Dataset{C}, duration::Nanosecond, - custom=nothing, uuid::UUID=uuid4()) where {C} +function create_recording!(dataset::Dataset, uuid::UUID=uuid4()) if haskey(dataset.recordings, uuid) throw(ArgumentError("recording with UUID $uuid already exists in dataset")) end - recording = Recording{C}(duration, Dict{Symbol,Signal}(), Set{Annotation}(), custom) + recording = Recording(Dict{Symbol,Signal}(), Set{Annotation}()) dataset.recordings[uuid] = recording mkpath(samples_path(dataset, uuid)) return uuid => recording end -##### -##### `set_duration!` -##### - -""" - set_duration!(dataset::Dataset{C}, uuid::UUID, duration::Nanosecond) where {C} - -Replace `dataset.recordings[uuid]` with a `Recording` instance that is the exact -same as the existing recording, but with the `duration_in_nanoseconds` field set -to the provided `duration`. Returns the newly constructed `Recording` instance. -""" -function set_duration!(dataset::Dataset{C}, uuid::UUID, duration::Nanosecond) where {C} - r = dataset.recordings[uuid] - r = Recording{C}(duration, r.signals, r.annotations, r.custom) - dataset.recordings[uuid] = r - return r -end - ##### ##### `load` ##### diff --git a/src/printing.jl b/src/printing.jl index b59e2e0f..af817fac 100644 --- a/src/printing.jl +++ b/src/printing.jl @@ -16,8 +16,11 @@ function Base.show(io::IO, samples::Samples) duration_in_nanoseconds = round(Int, duration_in_seconds * 1_000_000_000) println(io, "Samples (", format_duration(duration_in_nanoseconds), "):") println(io, " signal.channel_names: ", channel_names_string(samples.signal.channel_names)) + println(io, " signal.start_nanosecond: ", samples.signal.start_nanosecond, " (", format_duration(samples.signal.start_nanosecond), ")") + println(io, " signal.stop_nanosecond: ", samples.signal.stop_nanosecond, " (", format_duration(samples.signal.stop_nanosecond), ")") println(io, " signal.sample_unit: ", repr(samples.signal.sample_unit)) println(io, " signal.sample_resolution_in_unit: ", samples.signal.sample_resolution_in_unit) + println(io, " signal.sample_offset_in_unit: ", samples.signal.sample_offset_in_unit) println(io, " signal.sample_type: ", samples.signal.sample_type) println(io, " signal.sample_rate: ", samples.signal.sample_rate, " Hz") println(io, " signal.file_extension: ", repr(samples.signal.file_extension)) @@ -34,8 +37,11 @@ function Base.show(io::IO, signal::Signal) else println(io, "Signal:") println(io, " channel_names: ", channel_names_string(signal.channel_names)) + println(io, " start_nanosecond: ", signal.start_nanosecond, " (", format_duration(signal.start_nanosecond), ")") + println(io, " stop_nanosecond: ", signal.stop_nanosecond, " (", format_duration(signal.stop_nanosecond), ")") println(io, " sample_unit: ", repr(signal.sample_unit)) println(io, " sample_resolution_in_unit: ", signal.sample_resolution_in_unit) + println(io, " sample_offset_in_unit: ", signal.sample_offset_in_unit) println(io, " sample_type: ", signal.sample_type) println(io, " sample_rate: ", signal.sample_rate, " Hz") println(io, " file_extension: ", repr(signal.file_extension)) @@ -45,41 +51,22 @@ end function Base.show(io::IO, recording::Recording) if get(io, :compact, false) - duration_string = format_duration(recording.duration_in_nanoseconds) + duration_string = isempty(recording.signals) ? "" : format_duration(duration(recording)) print(io, "Recording(", duration_string, ')') else - duration_in_seconds = recording.duration_in_nanoseconds.value / 1_000_000_000 - duration_string = string('(', format_duration(recording.duration_in_nanoseconds), - "; ", duration_in_seconds, " seconds)") - println(io, "Recording:") - println(io, " duration_in_nanoseconds: ", recording.duration_in_nanoseconds, " ", duration_string) + if isempty(recording.signals) + duration_string = "" + else + duration_string = string(format_duration(duration(recording)), "; ", + duration(recording).value / 1_000_000_000, " seconds") + end + println(io, "Recording (", duration_string, ')') println(io, " signals:") compact_io = IOContext(io, :compact => true) for (name, signal) in recording.signals println(compact_io, " :", name, " => ", signal) end - println(io, " annotations (", length(recording.annotations), " total):") - annotation_counts = Dict() - for ann in recording.annotations - annotation_counts[ann.key] = get(annotation_counts, ann.key, 0) + 1 - end - k = 1 - annotation_counts = sort(collect(annotation_counts), by=(p -> p[2]), lt=(>)) - for (x, n) in annotation_counts - println(io, " ", n, " instance(s) of ", x) - k += 1 - if k > 5 - println(io, " ...and ", length(annotation_counts) - 5, " more.") - break - end - end - print(io, " custom:") - if recording.custom isa Nothing - print(io, " nothing") - else - println(io) - show(io, "text/plain", recording.custom) - end + print(io, " annotations: (", length(recording.annotations), " total)") end end @@ -104,7 +91,7 @@ function nanosecond_to_periods(ns::Integer) return (hr, m, s, ms, μs, ns) end -format_duration(t::Period) = format_duration(convert(Nanosecond, t).value) +format_duration(t::Period) = format_duration(convert(Nanosecond, t).value) function format_duration(ns::Integer) hr, m, s, ms, μs, ns = nanosecond_to_periods(ns) diff --git a/src/recordings.jl b/src/recordings.jl index 9c9a6aba..60f17e50 100644 --- a/src/recordings.jl +++ b/src/recordings.jl @@ -56,27 +56,24 @@ end A type representing an individual Onda annotation object. Instances contain the following fields, following the Onda specification for annotation objects: -- `key::String` - `value::String` - `start_nanosecond::Nanosecond` - `stop_nanosecond::Nanosecond` """ struct Annotation <: AbstractTimeSpan - key::String value::String start_nanosecond::Nanosecond stop_nanosecond::Nanosecond - function Annotation(key::AbstractString, value::AbstractString, - start::Nanosecond, stop::Nanosecond) + function Annotation(value::AbstractString, start::Nanosecond, stop::Nanosecond) _validate_timespan(start, stop) - return new(key, value, start, stop) + return new(value, start, stop) end end MsgPack.msgpack_type(::Type{Annotation}) = MsgPack.StructType() -function Annotation(key, value, span::AbstractTimeSpan) - return Annotation(key, value, first(span), last(span)) +function Annotation(value, span::AbstractTimeSpan) + return Annotation(value, first(span), last(span)) end Base.first(annotation::Annotation) = annotation.start_nanosecond @@ -94,21 +91,35 @@ A type representing an individual Onda signal object. Instances contain the following fields, following the Onda specification for signal objects: - `channel_names::Vector{Symbol}` +- `start_nanosecond::Nanosecond` +- `stop_nanosecond::Nanosecond` - `sample_unit::Symbol` - `sample_resolution_in_unit::Float64` +- `sample_offset_in_unit::Float64` - `sample_type::DataType` -- `sample_rate::UInt64` +- `sample_rate::Float64` - `file_extension::Symbol` - `file_options::Union{Nothing,Dict{Symbol,Any}}` """ Base.@kwdef struct Signal channel_names::Vector{Symbol} + start_nanosecond::Nanosecond + stop_nanosecond::Nanosecond sample_unit::Symbol sample_resolution_in_unit::Float64 + sample_offset_in_unit::Float64 sample_type::DataType - sample_rate::UInt64 + sample_rate::Float64 file_extension::Symbol file_options::Union{Nothing,Dict{Symbol,Any}} + function Signal(channel_names, start_nanosecond, stop_nanosecond, + sample_unit, sample_resolution_in_unit, sample_offset_in_unit, + sample_type, sample_rate, file_extension, file_options) + _validate_timespan(start_nanosecond, stop_nanosecond) + return new(channel_names, start_nanosecond, stop_nanosecond, + sample_unit, sample_resolution_in_unit, sample_offset_in_unit, + sample_type, sample_rate, file_extension, file_options) + end end function Base.:(==)(a::Signal, b::Signal) @@ -131,8 +142,11 @@ end """ signal_from_template(signal::Signal; channel_names=signal.channel_names, + start_nanosecond=signal.start_nanosecond, + stop_nanosecond=signal.stop_nanosecond, sample_unit=signal.sample_unit, sample_resolution_in_unit=signal.sample_resolution_in_unit, + sample_offset_in_unit=signal.sample_offset_in_unit, sample_type=signal.sample_type, sample_rate=signal.sample_rate, file_extension=signal.file_extension, @@ -142,19 +156,20 @@ Return a `Signal` where each field is mapped to the corresponding keyword argume """ function signal_from_template(signal::Signal; channel_names=signal.channel_names, + start_nanosecond=signal.start_nanosecond, + stop_nanosecond=signal.stop_nanosecond, sample_unit=signal.sample_unit, sample_resolution_in_unit=signal.sample_resolution_in_unit, + sample_offset_in_unit=signal.sample_offset_in_unit, sample_type=signal.sample_type, sample_rate=signal.sample_rate, file_extension=signal.file_extension, file_options=signal.file_options) - return Signal(channel_names, sample_unit, sample_resolution_in_unit, - sample_type, sample_rate, file_extension, - file_options) + return Signal(channel_names, start_nanosecond, stop_nanosecond, + sample_unit, sample_resolution_in_unit, sample_offset_in_unit, + sample_type, sample_rate, file_extension, file_options) end -Base.@deprecate copy_with(signal; kwargs...) signal_from_template(signal; kwargs...) - """ channel(signal::Signal, name::Symbol) @@ -176,33 +191,51 @@ Return `length(signal.channel_names)`. """ channel_count(signal::Signal) = length(signal.channel_names) +""" + span(signal::Signal) + +Return `TimeSpan(signal.start_nanosecond, signal.stop_nanosecond)`. +""" +span(signal::Signal) = TimeSpan(signal.start_nanosecond, signal.stop_nanosecond) + +""" + duration(signal::Signal) + +Return `duration(span(signal))`. +""" +duration(signal::Signal) = duration(span(signal)) + +""" + sample_count(signal::Signal) + +Return the number of multichannel samples that fit within `duration(signal)` +given `signal.sample_rate`. +""" +sample_count(signal::Signal) = index_from_time(signal.sample_rate, duration(signal)) - 1 + ##### ##### recordings ##### """ - Recording{C} + Recording A type representing an individual Onda recording object. Instances contain the following fields, following the Onda specification for recording objects: -- `duration_in_nanoseconds::Nanosecond` - `signals::Dict{Symbol,Signal}` - `annotations::Set{Annotation}` -- `custom::C` """ -struct Recording{C} - duration_in_nanoseconds::Nanosecond +struct Recording signals::Dict{Symbol,Signal} annotations::Set{Annotation} - custom::C end function Base.:(==)(a::Recording, b::Recording) return all(name -> getfield(a, name) == getfield(b, name), fieldnames(Recording)) end -MsgPack.msgpack_type(::Type{<:Recording}) = MsgPack.StructType() +MsgPack.msgpack_type(::Type{Recording}) = MsgPack.StructType() """ annotate!(recording::Recording, annotation::Annotation) @@ -214,9 +247,37 @@ annotate!(recording::Recording, annotation::Annotation) = push!(recording.annota """ duration(recording::Recording) -Returns `recording.duration_in_nanoseconds`. +Returns `maximum(s -> s.stop_nanosecond, values(recording.signals))`; throws an +`ArgumentError` if `recording.signals` is empty. +""" +function duration(recording::Recording) + isempty(recording.signals) && throw(ArgumentError("`duration(recording)` is not defined if `isempty(recording.signals)`")) + return maximum(s -> s.stop_nanosecond, values(recording.signals)) +end + +""" + set_span!(recording::Recording, name::Symbol, span::AbstractTimeSpan) + +Replace `recording.signals[name]` with a copy that has the `start_nanosecond` +and `start_nanosecond` fields set to match the provided `span`. Returns the +newly constructed `Signal` instance. +""" +function set_span!(recording::Recording, name::Symbol, span::AbstractTimeSpan) + signal = signal_from_template(recording.signals[name]; + start_nanosecond=first(span), + stop_nanosecond=last(span)) + recording.signals[name] = signal + return signal +end + """ -duration(recording::Recording) = recording.duration_in_nanoseconds + set_span!(recording::Recording, span::TimeSpan) + +Return `Dict(name => set_span!(recording, name, span) for name in keys(recording.signals))` +""" +function set_span!(recording::Recording, span::AbstractTimeSpan) + return Dict(name => set_span!(recording, name, span) for name in keys(recording.signals)) +end ##### ##### reading/writing `recordings.msgpack.zst` @@ -229,7 +290,7 @@ end MsgPack.msgpack_type(::Type{Header}) = MsgPack.StructType() -function read_recordings_file(path, ::Type{C}, additional_strict_args) where {C} +function read_recordings_file(path) file_path = joinpath(path, "recordings.msgpack.zst") bytes = zstd_decompress(read(file_path)) io = IOBuffer(bytes) @@ -239,15 +300,14 @@ function read_recordings_file(path, ::Type{C}, additional_strict_args) where {C} if !is_supported_onda_format_version(header.onda_format_version) @warn("attempting to load `Dataset` with unsupported Onda version", supported=ONDA_FORMAT_VERSION, attempting=header.onda_format_version) + @warn("consider upgrading old datasets via `Onda.upgrade_onda_format_from_v0_2_to_v0_3!`") end - R = Recording{C} - strict = header.ordered_keys ? (R,) : () - strict = (strict..., additional_strict_args...) - recordings = MsgPack.unpack(io, Dict{UUID,R}; strict=strict) + strict = header.ordered_keys ? (Recording,) : () + recordings = MsgPack.unpack(io, Dict{UUID,Recording}; strict=strict) return header, recordings end -function write_recordings_file(path, header::Header, recordings::Dict{UUID,<:Recording}) +function write_recordings_file(path, header::Header, recordings::Dict{UUID,Recording}) file_path = joinpath(path, "recordings.msgpack.zst") backup_file_path = joinpath(path, "_recordings.msgpack.zst.backup") isfile(file_path) && mv(file_path, backup_file_path) diff --git a/src/samples.jl b/src/samples.jl index e877ea84..d1f4a56e 100644 --- a/src/samples.jl +++ b/src/samples.jl @@ -19,60 +19,8 @@ Return a `Samples` instance with the following fields: Note that `getindex` and `view` are defined on `Samples` to accept normal integer indices, but also accept channel names for row indices and [`TimeSpan`](@ref) -values for column indices: - -``` -julia> eeg -Samples (00:43:11.062500000): - signal.channel_names: [:fp1, :f3, :c3, :p3, :f7, :t3, :t5, :o1, :fz, :cz, - :pz, :fp2, :f4, :c4, :p4, :f8, :t4, :t6, :o2] - signal.sample_unit: :microvolt - signal.sample_resolution_in_unit: 0.25 - signal.sample_type: Int16 - signal.sample_rate: 128 Hz - signal.file_extension: :zst - signal.file_options: nothing - encoded: true - data: -19×331656 Array{Int16,2}: - -421 -416 51 -229 … -164 -318 -644 - -866 -860 -401 -684 -1665 -1805 -2139 - -809 -776 -320 -641 -1402 -1571 -1892 - -698 -642 -191 -522 -1391 -1585 -1891 - -801 -778 -307 -622 -1275 -1452 -1771 - -340 -297 168 -160 … -1012 -1202 -1514 - -594 -544 -86 -410 -802 -982 -1298 - -254 -180 270 -68 68 -141 -435 - -567 -547 -86 -396 -1439 -1602 -1924 - -620 -584 -129 -450 -1486 -1664 -1970 - -595 -807 -187 -341 … -1672 -1441 -2113 - -407 -386 71 -216 -993 -1146 -1466 - -490 -475 -17 -310 -861 -1011 -1332 - -540 -555 -79 -359 -1077 -973 -1508 - -437 -394 60 -263 -1051 -1232 -1541 - -721 -692 -239 -536 … -1681 -1838 -2151 - -646 -597 -145 -460 -1119 -1298 -1602 - -468 -411 32 -295 -224 -411 -695 - -855 -792 -354 -686 -1584 -1782 -2065 - -julia> eeg[[:fp1, 2, :fz, :o1, :cz], TimeSpan(Minute(20), duration(eeg))] -Samples (00:23:11.062500000): - signal.channel_names: [:fp1, :f3, :fz, :o1, :cz] - signal.sample_unit: :microvolt - signal.sample_resolution_in_unit: 0.25 - signal.sample_type: Int16 - signal.sample_rate: 128 Hz - signal.file_extension: :zst - signal.file_options: nothing - encoded: true - data: -5×178056 Array{Int16,2}: - -326 69 154 -281 … -164 -318 -644 - -596 -204 -109 -543 -1665 -1805 -2139 - -472 -59 19 -422 -1439 -1602 -1924 - -453 -17 31 -418 68 -141 -435 - -797 -373 -306 -748 -1486 -1664 -1970 -``` +values for column indices; see `Onda/examples/tour.jl` for a comprehensive +set of indexing examples. See also: [`encode`](@ref), [`encode!`](@ref), [`decode`](@ref), [`decode!`](@ref) """ @@ -147,6 +95,12 @@ channel(samples::Samples, i::Integer) = channel(samples.signal, i) duration(samples::Samples) Returns the `Nanosecond` value for which `samples[TimeSpan(0, duration(samples))] == samples.data`. + +!!! warning + `duration(samples)` is not generally equivalent to `duration(samples.signal)`; + the former is the duration of the entire original signal in the context of its + parent recording, whereas the latter is the actual duration of `samples.data` + given `samples.signal.sample_rate`. """ duration(samples::Samples) = time_from_index(samples.signal.sample_rate, size(samples.data, 2) + 1) @@ -161,6 +115,11 @@ channel_count(samples::Samples) = channel_count(samples.signal) sample_count(samples::Samples) Return the number of multichannel samples in `samples` (i.e. `size(samples.data, 2)`) + +!!! warning + `sample_count(samples)` is not generally equivalent to `sample_count(samples.signal)`; + the former is the sample count of the entire original signal in the context of its parent + recording, whereas the latter is actual number of multichannel samples in `samples.data`. """ sample_count(samples::Samples) = size(samples.data, 2) @@ -170,12 +129,12 @@ sample_count(samples::Samples) = size(samples.data, 2) const VALID_SAMPLE_TYPE_UNION = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64} -function encode_sample(::Type{S}, resolution_in_unit, sample_in_units, - noise=zero(sample_in_units)) where {S<:VALID_SAMPLE_TYPE_UNION} - sample_in_units += noise - isnan(sample_in_units) && return typemax(S) - from_units = clamp(sample_in_units / resolution_in_unit, typemin(S), typemax(S)) - return round(S, from_units) +function encode_sample(::Type{S}, resolution_in_unit, offset_in_unit, sample_in_unit, + noise=zero(sample_in_unit)) where {S<:VALID_SAMPLE_TYPE_UNION} + sample_in_unit += noise + isnan(sample_in_unit) && return typemax(S) + from_unit = clamp((sample_in_unit - offset_in_unit) / resolution_in_unit, typemin(S), typemax(S)) + return round(S, from_unit) end function dither_noise!(rng::AbstractRNG, storage, step) @@ -200,13 +159,14 @@ end ##### """ - encode(sample_type::DataType, sample_resolution_in_unit, samples, dither_storage=nothing) + encode(sample_type::DataType, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage=nothing) -Return a copy of `samples` quantized according to `sample_type` and `sample_resolution_in_unit`. -`sample_type` must be a concrete subtype of `Onda.VALID_SAMPLE_TYPE_UNION`. Quantization of an -individual sample `s` is performed via: +Return a copy of `samples` quantized according to `sample_type`, `sample_resolution_in_unit`, +and `sample_offset_in_unit`. `sample_type` must be a concrete subtype of `Onda.VALID_SAMPLE_TYPE_UNION`. +Quantization of an individual sample `s` is performed via: - round(S, s / sample_resolution_in_unit) + round(S, (s - sample_offset_in_unit) / sample_resolution_in_unit) with additional special casing to clip values exceeding the encoding's dynamic range. @@ -219,28 +179,34 @@ Otherwise, `dither_storage` must be a container of similar shape and type to `samples`. This container is then used to store the random noise needed for the triangular dithering process, which is applied to the signal prior to quantization. """ -function encode(::Type{S}, sample_resolution_in_unit, samples, dither_storage=nothing) where {S} - return encode!(similar(samples, S), S, sample_resolution_in_unit, samples, dither_storage) +function encode(::Type{S}, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage=nothing) where {S} + return encode!(similar(samples, S), S, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage) end """ - encode!(result_storage, sample_type::DataType, sample_resolution_in_unit, samples, dither_storage=nothing) - encode!(result_storage, sample_resolution_in_unit, samples, dither_storage=nothing) + encode!(result_storage, sample_type::DataType, sample_resolution_in_unit, + sample_offset_in_unit, samples, dither_storage=nothing) + encode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage=nothing) -Similar to `encode(sample_type, sample_resolution_in_unit, samples, dither_storage)`, +Similar to `encode(sample_type, sample_resolution_in_unit, sample_offset_in_unit, samples, dither_storage)`, but write encoded values to `result_storage` rather than allocating new storage. `sample_type` defaults to `eltype(result_storage)` if it is not provided. """ -function encode!(result_storage, sample_resolution_in_unit, samples, dither_storage=nothing) +function encode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage=nothing) return encode!(result_storage, eltype(result_storage), sample_resolution_in_unit, - samples, dither_storage=nothing) + sample_offset_in_unit, samples, dither_storage=nothing) end -function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, samples, - dither_storage=nothing) where {S} +function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage=nothing) where {S} if dither_storage isa Nothing - broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, samples) + broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, + sample_offset_in_unit, samples) else if dither_storage isa Missing dither_storage = similar(samples) @@ -248,7 +214,8 @@ function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, samples, throw(DimensionMismatch("dithering storage container does not match shape of samples")) end dither_noise!(dither_storage, sample_resolution_in_unit) - broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, samples, dither_storage) + broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, sample_offset_in_unit, + samples, dither_storage) end return result_storage end @@ -260,6 +227,7 @@ If `samples.encoded` is `false`, return a `Samples` instance that wraps: encode(samples.signal.sample_type, samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, samples.data, dither_storage) If `samples.encoded` is `true`, this function is the identity. @@ -268,6 +236,7 @@ function encode(samples::Samples, dither_storage=nothing) samples.encoded && return samples data = encode(samples.signal.sample_type, samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, samples.data, dither_storage) return Samples(samples.signal, true, data) end @@ -280,6 +249,7 @@ If `samples.encoded` is `false`, return a `Samples` instance that wraps: encode!(result_storage, samples.signal.sample_type, samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, samples.data, dither_storage)`. If `samples.encoded` is `true`, return a `Samples` instance that wraps @@ -292,6 +262,7 @@ function encode!(result_storage, samples::Samples, dither_storage=nothing) end encode!(result_storage, samples.signal.sample_type, samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, samples.data, dither_storage) return Samples(samples.signal, true, result_storage) end @@ -301,33 +272,38 @@ end ##### """ - decode(sample_resolution_in_unit, samples) + decode(sample_resolution_in_unit, sample_offset_in_unit, samples) -Return `sample_resolution_in_unit .* samples` +Return `sample_resolution_in_unit .* samples .+ sample_offset_in_unit` """ -decode(sample_resolution_in_unit, samples) = sample_resolution_in_unit .* samples +function decode(sample_resolution_in_unit, sample_offset_in_unit, samples) + return sample_resolution_in_unit .* samples .+ sample_offset_in_unit +end """ - decode!(result_storage, sample_resolution_in_unit, samples) + decode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, samples) -Similar to `decode(sample_resolution_in_unit, samples)`, but write decoded values -to `result_storage` rather than allocating new storage. +Similar to `decode(sample_resolution_in_unit, sample_offset_in_unit, samples)`, but +write decoded values to `result_storage` rather than allocating new storage. """ -function decode!(result_storage, sample_resolution_in_unit, samples) - return broadcast!(*, result_storage, sample_resolution_in_unit, samples) +function decode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, samples) + f = x -> sample_resolution_in_unit * x + sample_offset_in_unit + return broadcast!(f, result_storage, samples) end """ decode(samples::Samples) If `samples.encoded` is `true`, return a `Samples` instance that wraps -`decode(samples.signal.sample_resolution_in_unit, samples.data)`. +`decode(samples.signal.sample_resolution_in_unit, samples.signal.sample_offset_in_unit, samples.data)`. If `samples.encoded` is `false`, this function is the identity. """ function decode(samples::Samples) samples.encoded || return samples - data = decode(samples.signal.sample_resolution_in_unit, samples.data) + data = decode(samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, + samples.data) return Samples(samples.signal, false, data) end @@ -335,14 +311,15 @@ end decode!(result_storage, samples::Samples) If `samples.encoded` is `true`, return a `Samples` instance that wraps -`decode!(result_storage, samples.signal.sample_resolution_in_unit, samples.data)`. +`decode!(result_storage, samples.signal.sample_resolution_in_unit, samples.signal.sample_offset_in_unit, samples.data)`. If `samples.encoded` is `false`, return a `Samples` instance that wraps `copyto!(result_storage, samples.data)`. """ function decode!(result_storage, samples::Samples) if samples.encoded - broadcast!(*, result_storage, samples.signal.sample_resolution_in_unit, samples.data) + decode!(result_storage, samples.signal.sample_resolution_in_unit, + samples.signal.sample_offset_in_unit, samples.data) return Samples(samples.signal, false, result_storage) end copyto!(result_storage, samples.data) diff --git a/src/timespans.jl b/src/timespans.jl index 1a6ab3a2..f30760b2 100644 --- a/src/timespans.jl +++ b/src/timespans.jl @@ -114,7 +114,7 @@ For `span::Period`, this function is the identity. duration(t::AbstractTimeSpan) = last(t) - first(t) duration(t::Period) = t -nanoseconds_per_sample(sample_rate) = floor(Int, inv(sample_rate) * 1_000_000_000) +nanoseconds_per_sample(sample_rate) = inv(sample_rate) * 1_000_000_000 """ index_from_time(sample_rate, sample_time) @@ -193,7 +193,7 @@ julia> time_from_index(100, 101) """ function time_from_index(sample_rate, index) index > 0 || throw(ArgumentError("`index` must be > 0")) - return Nanosecond((index - 1) * nanoseconds_per_sample(sample_rate)) + return Nanosecond(ceil(Int, (index - 1) * nanoseconds_per_sample(sample_rate))) end """ diff --git a/test/dataset.jl b/test/dataset.jl index 962611f5..3501c812 100644 --- a/test/dataset.jl +++ b/test/dataset.jl @@ -3,19 +3,21 @@ using Test, Onda, Dates, MsgPack @testset "round trip" begin mktempdir() do root # generate a test dataset - dataset = Dataset(joinpath(root, "test.onda"); create=true) + dataset = Dataset(joinpath(root, "test"); create=true) @test dataset isa Dataset @test isdir(dataset.path) @test isdir(joinpath(dataset.path, "samples")) duration_in_seconds = Second(10) duration_in_nanoseconds = Nanosecond(duration_in_seconds) - uuid, recording = create_recording!(dataset, duration_in_nanoseconds) + uuid, recording = create_recording!(dataset) Ts = (UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64) + sample_rate = 50.5 signals = Dict(Symbol(:x, i) => Signal(Symbol.([:a, :b, :c], i), - Symbol(:unit, i), 0.25, T, - 100, Symbol("lpcm.zst"), nothing) + Nanosecond(0), duration_in_nanoseconds, + Symbol(:unit, i), 0.25, i, T, sample_rate, + Symbol("lpcm.zst"), nothing) for (i, T) in enumerate(Ts)) - samples = Dict(k => Samples(v, true, rand(v.sample_type, 3, 100 * 10)) + samples = Dict(k => Samples(v, true, rand(v.sample_type, 3, sample_count(v))) for (k, v) in signals) for (name, s) in samples @test channel_count(s) == length(s.signal.channel_names) @@ -34,9 +36,9 @@ using Test, Onda, Dates, MsgPack decode!(tmp, s) @test tmp == d.data tmp = similar(d.data) - decode!(tmp, s.signal.sample_resolution_in_unit, s.data) + decode!(tmp, s.signal.sample_resolution_in_unit, s.signal.sample_offset_in_unit, s.data) @test tmp == d.data - @test d.data == (s.data .* s.signal.sample_resolution_in_unit) + @test d.data == (s.data .* s.signal.sample_resolution_in_unit .+ s.signal.sample_offset_in_unit) if sizeof(s.signal.sample_type) >= 8 # decoding from 64-bit to floating point is fairly lossy tmp = similar(s.data) @@ -69,14 +71,14 @@ using Test, Onda, Dates, MsgPack @test s[ch_inds, TimeSpan(t, t2)].data == s.data[ch_inds, i:j] @test s[ch_inds, i:j].data == s.data[ch_inds, i:j] end - @test size(s[:, TimeSpan(0, Second(1))].data, 2) == s.signal.sample_rate + @test size(s[:, TimeSpan(0, Second(1))].data, 2) == floor(s.signal.sample_rate) for i in 1:length(chs) @test channel(s, chs[i]) == i @test channel(s, i) == chs[i] @test channel(s.signal, chs[i]) == i @test channel(s.signal, i) == chs[i] end - @test duration(s) == Nanosecond((100 * 10) * (1_000_000_000) // 100) + @test duration(s) == duration_in_seconds @test s[:, TimeSpan(0, duration(s))].data == s.data store!(dataset, uuid, name, s) end @@ -84,7 +86,7 @@ using Test, Onda, Dates, MsgPack # read back in the test dataset, add some annotations old_dataset = dataset - dataset = Dataset(joinpath(root, "test.onda")) + dataset = Dataset(joinpath(root, "test")) @test length(dataset.recordings) == 1 uuid, recording = first(dataset.recordings) x1 = load(dataset, uuid, :x1) @@ -103,7 +105,7 @@ using Test, Onda, Dates, MsgPack @test xi[:, span].data == xs_span[name].data end for i in 1:3 - annotate!(recording, Annotation("key_$i", "value_$i", Nanosecond(i), Nanosecond(i + rand(1:1000000)))) + annotate!(recording, Annotation("value_$i", Nanosecond(i), Nanosecond(i + rand(1:1000000)))) end save_recordings_file(dataset) @@ -111,11 +113,11 @@ using Test, Onda, Dates, MsgPack old_uuid = uuid old_recording = recording old_dataset = dataset - dataset = Dataset(joinpath(root, "test.onda")) + dataset = Dataset(joinpath(root, "test")) uuid, recording = first(dataset.recordings) @test old_recording == recording delete!(dataset.recordings, uuid) - uuid, recording = create_recording!(dataset, old_recording.duration_in_nanoseconds) + uuid, recording = create_recording!(dataset) foreach(x -> annotate!(recording, x), old_recording.annotations) foreach(x -> store!(dataset, uuid, x, load(old_dataset, old_uuid, x)), keys(old_recording.signals)) merge!(dataset, old_dataset, only_recordings=true) @@ -125,19 +127,15 @@ using Test, Onda, Dates, MsgPack r2 = dataset.recordings[uuid] @test r2 == recording @test old_uuid != uuid - @test r1.duration_in_nanoseconds == r2.duration_in_nanoseconds @test r1.signals == r2.signals @test r1.annotations == r2.annotations - @test r1.custom == r2.custom - new_duration = r2.duration_in_nanoseconds + Nanosecond(1) - r3 = set_duration!(dataset, uuid, new_duration) - @test r3.signals === r2.signals - @test r3.annotations === r2.annotations - @test r3.custom === r2.custom - @test r3.duration_in_nanoseconds === new_duration - @test dataset.recordings[uuid] === r3 - set_duration!(dataset, uuid, r2.duration_in_nanoseconds) + old_duration = duration(r2) + new_duration = old_duration + Nanosecond(1) + r2signals = set_span!(r2, TimeSpan(Nanosecond(0), new_duration)) + @test keys(r2signals) == keys(r2.signals) + @test all(duration.(values(r2signals)) .== new_duration) + set_span!(r2, TimeSpan(Nanosecond(0), old_duration)) r = dataset.recordings[uuid] original_signals_length = length(r.signals) @@ -152,35 +150,38 @@ using Test, Onda, Dates, MsgPack store!(dataset, uuid, signal_name, signal_samples) # read back everything, but without assuming an order on the metadata - dataset = Dataset(joinpath(root, "test.onda")) + dataset = Dataset(joinpath(root, "test")) Onda.write_recordings_file(dataset.path, Onda.Header(dataset.header.onda_format_version, false), dataset.recordings) - dataset = Dataset(joinpath(root, "test.onda")) + dataset = Dataset(joinpath(root, "test")) @test Dict(old_uuid => old_recording) == dataset.recordings delete!(dataset, old_uuid) save_recordings_file(dataset) # read back the dataset that should now be empty - dataset = Dataset(joinpath(root, "test.onda")) + dataset = Dataset(joinpath(root, "test")) @test isempty(dataset.recordings) @test !isdir(joinpath(dataset.path, "samples", string(old_uuid))) + + # make sure samples directory is appropriately created if not present + no_samples_path = joinpath(root, "no_samples_dir.onda") + mkdir(no_samples_path) + cp(joinpath(dataset.path, "recordings.msgpack.zst"), joinpath(no_samples_path, "recordings.msgpack.zst")) + Dataset(no_samples_path; create=false) + @test isdir(joinpath(no_samples_path, "samples")) end end @testset "Error conditions" begin mktempdir() do root - @test_throws ArgumentError Dataset(joinpath(root, "doesnt_end_with_onda"); create=true) mkdir(joinpath(root, "i_exist.onda")) touch(joinpath(root, "i_exist.onda", "memes")) @test_throws ArgumentError Dataset(joinpath(root, "i_exist.onda"); create=true) - mkdir(joinpath(root, "no_samples_dir.onda")) - @test_throws ArgumentError Dataset(joinpath(root, "no_samples_dir.onda"); create=false) dataset = Dataset(joinpath(root, "okay.onda"); create=true) - duration = Nanosecond(Second(10)) - uuid, recording = create_recording!(dataset, duration) - signal = Signal([:a], :mv, 0.25, Int8, 100, Symbol("lpcm.zst"), nothing) + uuid, recording = create_recording!(dataset) + signal = Signal([:a], Nanosecond(0), Nanosecond(Second(10)), :mv, 0.25, 0.0, Int8, 100, Symbol("lpcm.zst"), nothing) @test_throws DimensionMismatch Samples(signal, true, rand(Int8, 2, 10)) @test_throws ArgumentError Samples(signal, true, rand(Float32, 1, 10)) samples = Samples(signal, true, rand(Int8, 1, 10 * 100)) @@ -188,14 +189,57 @@ end store!(dataset, uuid, :name_okay, samples) @test_throws ArgumentError store!(dataset, uuid, :name_okay, samples; overwrite=false) - @test_throws ArgumentError Annotation("hi", "there", Nanosecond(20), Nanosecond(4)) + @test_throws ArgumentError Annotation("hi", Nanosecond(20), Nanosecond(4)) mkdir(joinpath(root, "other.onda")) other = Dataset(joinpath(root, "other.onda"); create=true) # Using existing empty directory - create_recording!(other, duration, nothing, uuid) - @test_throws ArgumentError create_recording!(other, duration, nothing, uuid) + create_recording!(other, uuid) + @test_throws ArgumentError create_recording!(other, uuid) store!(other, uuid, :cool_stuff, samples) @test_throws ErrorException merge!(dataset, other; only_recordings=false) @test_throws ArgumentError merge!(dataset, other; only_recordings=true) end end + +@testset "upgrade_onda_format_from_v0_2_to_v0_3!" begin + mktempdir() do new_path + old_path = joinpath(@__DIR__, "old_test_v0_2.onda") + cp(old_path, new_path; force=true) + dataset = Onda.upgrade_onda_format_from_v0_2_to_v0_3!(new_path, (k, v) -> string(k, '.', v)) + @test dataset.path == new_path + @test dataset.header.onda_format_version == v"0.3.0" + @test dataset.header.ordered_keys + old_recordings = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(old_path, "recordings.msgpack.zst"))))[2] + new_customs = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(new_path, "recordings_custom.msgpack.zst")))) + @test length(dataset.recordings) == 1 + @test length(new_customs) == 1 + uuid = first(keys(dataset.recordings)) + recording = first(values(dataset.recordings)) + old_recording = first(values(old_recordings)) + @test string(uuid) == first(keys(new_customs)) == first(keys(old_recordings)) + @test first(values(new_customs)) == old_recording["custom"] + sorted_annotations = sort(collect(recording.annotations); by=first) + sorted_old_annotations = sort(old_recording["annotations"]; by=(x -> x["start_nanosecond"])) + @test length(sorted_annotations) == length(sorted_old_annotations) + for (ann, old_ann) in zip(sorted_annotations, sorted_old_annotations) + @test ann.value == string(old_ann["key"], '.', old_ann["value"]) + @test ann.start_nanosecond.value == old_ann["start_nanosecond"] + @test ann.stop_nanosecond.value == old_ann["stop_nanosecond"] + end + old_signals = old_recording["signals"] + @test keys(recording.signals) == Set(Symbol.(keys(old_signals))) + for (signal_name, signal) in recording.signals + old_signal = old_signals[string(signal_name)] + @test signal.channel_names == Symbol.(old_signal["channel_names"]) + @test signal.start_nanosecond == Nanosecond(0) + @test signal.stop_nanosecond == Nanosecond(old_recording["duration_in_nanoseconds"]) + @test signal.sample_unit == Symbol(old_signal["sample_unit"]) + @test signal.sample_resolution_in_unit == old_signal["sample_resolution_in_unit"] + @test signal.sample_offset_in_unit == 0.0 + @test signal.sample_type == Onda.julia_type_from_onda_sample_type(old_signal["sample_type"]) + @test signal.sample_rate == old_signal["sample_rate"] + @test signal.file_extension == Symbol(old_signal["file_extension"]) + @test signal.file_options == old_signal["file_options"] + end + end +end diff --git a/test/old_test_v0_2.onda/recordings.msgpack.zst b/test/old_test_v0_2.onda/recordings.msgpack.zst new file mode 100644 index 00000000..70caa251 Binary files /dev/null and b/test/old_test_v0_2.onda/recordings.msgpack.zst differ diff --git a/test/printing.jl b/test/printing.jl index 49d06582..005bd0e2 100644 --- a/test/printing.jl +++ b/test/printing.jl @@ -3,27 +3,33 @@ using Test, Onda, Dates, Random, UUIDs @testset "pretty printing" begin @test repr(TimeSpan(6149872364198, 123412345678910)) == "TimeSpan(01:42:29.872364198, 34:16:52.345678910)" - signal = Signal([:a, :b, Symbol("c-d")], :unit, 0.25, Int16, 50, Symbol("lpcm.zst"), nothing) + signal = Signal([:a, :b, Symbol("c-d")], Nanosecond(3), Nanosecond(Second(12345)), :unit, 0.25, -0.5, Int16, 50.2, Symbol("lpcm.zst"), nothing) @test sprint(show, signal, context=(:compact => true)) == "Signal([:a, :b, Symbol(\"c-d\")])" @test sprint(show, signal) == """ Signal: channel_names: [:a, :b, Symbol(\"c-d\")] + start_nanosecond: 3 nanoseconds (00:00:00.000000003) + stop_nanosecond: 12345000000000 nanoseconds (03:25:45.000000000) sample_unit: :unit sample_resolution_in_unit: 0.25 + sample_offset_in_unit: -0.5 sample_type: Int16 - sample_rate: 50 Hz + sample_rate: 50.2 Hz file_extension: Symbol(\"lpcm.zst\") file_options: nothing""" samples = Samples(signal, true, rand(Random.MersenneTwister(0), signal.sample_type, 3, 5)) @test sprint(show, samples, context=(:compact => true)) == "Samples(3×5 Array{Int16,2})" @test sprint(show, samples) == """ - Samples (00:00:00.100000000): + Samples (00:00:00.099601594): signal.channel_names: [:a, :b, Symbol(\"c-d\")] + signal.start_nanosecond: 3 nanoseconds (00:00:00.000000003) + signal.stop_nanosecond: 12345000000000 nanoseconds (03:25:45.000000000) signal.sample_unit: :unit signal.sample_resolution_in_unit: 0.25 + signal.sample_offset_in_unit: -0.5 signal.sample_type: Int16 - signal.sample_rate: 50 Hz + signal.sample_rate: 50.2 Hz signal.file_extension: Symbol(\"lpcm.zst\") signal.file_options: nothing encoded: true @@ -32,35 +38,13 @@ using Test, Onda, Dates, Random, UUIDs 20032 4760 27427 -20758 24287 14240 5037 5598 -5888 21784 16885 600 20880 -32493 -19305""" - annotations = Set(Annotation("key$i", "val", TimeSpan(0, 1)) for i in 1:10) - recording = Recording(Nanosecond(100_000_000), Dict(:test => signal), annotations, nothing) - recording_string = sprint(show, recording) - @test startswith(recording_string, """ - Recording: - duration_in_nanoseconds: 100000000 nanoseconds (00:00:00.100000000; 0.1 seconds) - signals: - :test => Signal([:a, :b, Symbol(\"c-d\")]) - annotations (10 total):""") - @test endswith(recording_string, "...and 5 more.\n custom: nothing") - annotations = Set(reduce(vcat, [[Annotation("key$i", string(rand()), TimeSpan(0, 1)) for _ in 1:i] for i in 1:10])) - recording = Recording(Nanosecond(100_000_000), Dict(:test => signal), annotations, Dict(:a => 1, :b => 2, :c => 2)) + annotations = Set(Annotation("val", TimeSpan(i, i + 1)) for i in 1:10) + recording = Recording(Dict(:test => signal), annotations) @test sprint(show, recording) == """ - Recording: - duration_in_nanoseconds: 100000000 nanoseconds (00:00:00.100000000; 0.1 seconds) + Recording (03:25:45.000000000; 12345.0 seconds) signals: :test => Signal([:a, :b, Symbol(\"c-d\")]) - annotations (55 total): - 10 instance(s) of key10 - 9 instance(s) of key9 - 8 instance(s) of key8 - 7 instance(s) of key7 - 6 instance(s) of key6 - ...and 5 more. - custom: - Dict{Symbol,Int64} with 3 entries: - :a => 1 - :b => 2 - :c => 2""" + annotations: (10 total)""" mktempdir() do root dataset = Dataset(joinpath(root, "test.onda"); create=true) @test sprint(show, dataset) == "Dataset($(dataset.path), 0 recordings)" diff --git a/test/serialization.jl b/test/serialization.jl index 5c0e8c9b..5eba71b7 100644 --- a/test/serialization.jl +++ b/test/serialization.jl @@ -1,9 +1,9 @@ -using Test, Onda, Random +using Test, Onda, Random, Dates @testset "$(repr(name)) serializer" for (name, options) in [(:lpcm, nothing), (Symbol("lpcm.zst"), Dict(:level => 2))] - signal = Signal([:a, :b, :c], :unit, 0.25, Int16, 50, name, options) - samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, 50 * 10))).data + signal = Signal([:a, :b, :c], Nanosecond(0), Nanosecond(0), :unit, 0.25, -0.5, Int16, 50.5, name, options) + samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, Int(50.5 * 10)))).data s = serializer(signal) bytes = serialize_lpcm(samples, s) name == :lpcm && @test bytes == reinterpret(UInt8, vec(samples)) diff --git a/test/timespans.jl b/test/timespans.jl index 7a308f61..0fcb12aa 100644 --- a/test/timespans.jl +++ b/test/timespans.jl @@ -57,4 +57,12 @@ end @test index_from_time(100, Nanosecond(0)) == 1 @test index_from_time(100, TimeSpan(Second(3), Second(6))) == 301:600 @test index_from_time(100, TimeSpan(Second(1), Second(1))) == 101:101 + # test non-integer sample rates + rate = 100.66 + ns_per_sample = Onda.nanoseconds_per_sample(rate) + for i in 1:1000 + t = Nanosecond(ceil(Int, (i - 1) * ns_per_sample)) + @test index_from_time(rate, t) == i + @test time_from_index(rate, i) == t + end end