Skip to content

Commit

Permalink
update Onda for OndaFormat v0.3.0 (#28)
Browse files Browse the repository at this point in the history
Co-Authored-By: Alex Arslan <ararslan@comcast.net>
  • Loading branch information
jrevels and ararslan authored Mar 4, 2020
1 parent 9e61956 commit 53d7655
Show file tree
Hide file tree
Showing 15 changed files with 401 additions and 298 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Onda"
uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
authors = ["Beacon Biosignals, Inc."]
version = "0.7.6"
version = "0.8.0"

[deps]
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
Expand Down
9 changes: 8 additions & 1 deletion docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ Note that Onda.jl's API follows a specific philosophy with respect to property a
Dataset
samples_path
create_recording!
set_duration!
load
store!
delete!
Expand All @@ -26,8 +25,10 @@ save_recordings_file
```@docs
Signal
signal_from_template
span
Annotation
Recording
set_span!
annotate!
```

Expand Down Expand Up @@ -68,3 +69,9 @@ serialize_lpcm
LPCM
LPCMZst
```

## Upgrading Older Datasets to Newer Datasets

```@docs
Onda.upgrade_onda_format_from_v0_2_to_v0_3!
```
4 changes: 2 additions & 2 deletions examples/flac.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# that it's a naive implementation - it just shells out and assumes you have
# the `flac` command line utility installed and available on your system.

using Onda, Test, Random
using Onda, Test, Random, Dates

#####
##### FLAC
Expand Down Expand Up @@ -69,7 +69,7 @@ end

if VERSION >= v"1.1.0"
@testset "FLAC example" begin
signal = Signal([:a, :b, :c], :unit, 0.25, Int16, 50, :flac, Dict(:level => 2))
signal = Signal([:a, :b, :c], Nanosecond(0), Nanosecond(0), :unit, 0.25, 0.0, Int16, 50.0, :flac, Dict(:level => 2))
samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, 50 * 10))).data
s = serializer(signal)
bytes = serialize_lpcm(samples, s)
Expand Down
61 changes: 36 additions & 25 deletions examples/tour.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,21 +22,27 @@ eeg_signal = Signal(channel_names=[:fp1, :f3, :c3, :p3,
:fz, :cz, :pz,
:fp2, :f4, :c4, :p4,
:f8, :t4, :t6, :o2],
start_nanosecond=Nanosecond(0),
stop_nanosecond=Nanosecond(Second(20)),
sample_unit=:microvolts,
sample_resolution_in_unit=0.25,
sample_offset_in_unit=0.0,
sample_type=Int16,
sample_rate=256, # Hz
sample_rate=256.0, # Hz
file_extension=:lpcm,
file_options=nothing)

ecg_signal = signal_from_template(eeg_signal; channel_names=[:avl, :avr],
file_extension=Symbol("lpcm.zst"))

spo2_signal = Signal(channel_names=[:spo2],
start_nanosecond=Nanosecond(Second(3)),
stop_nanosecond=Nanosecond(Second(17)),
sample_unit=:percentage,
sample_resolution_in_unit=(100 / typemax(UInt8)),
sample_offset_in_unit=0.0,
sample_type=UInt8,
sample_rate=20, # Hz
sample_rate=20.5, # Hz
file_extension=:lpcm,
file_options=nothing)

Expand All @@ -52,35 +58,45 @@ spo2_signal = Signal(channel_names=[:spo2],
# an aside: The hypothetical person from which these hypothetical signals were
# hypothetically recorded must be experiencing some pretty crazy pathologies if
# their EEG/ECG are just saw waves...
saws(signal, seconds) = [(j + i) % 100 * signal.sample_resolution_in_unit for
i in 1:channel_count(signal),
j in 1:(signal.sample_rate * seconds)]
saws(signal) = [(j + i) % 100 * signal.sample_resolution_in_unit for
i in 1:channel_count(signal), j in 1:sample_count(signal)]

# The second argument in the `Samples` constructor is a `Bool` that specifies if
# the data is in its encoded representation. Here, we construct our signals as
# "decoded" (i.e. in actual units, though for this toy example it doesn't really
# matter) and then "encode" them according to the specified:
eeg = encode(Samples(eeg_signal, false, saws(eeg_signal, 20)))
ecg = encode(Samples(ecg_signal, false, saws(ecg_signal, 20)))
spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal, 20)))
eeg = encode(Samples(eeg_signal, false, saws(eeg_signal)))
ecg = encode(Samples(ecg_signal, false, saws(ecg_signal)))
spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal)))

# Here are some basic functions for examining `Samples` instances:
@test sample_count(eeg) == eeg.signal.sample_rate * 20
@test sample_count(eeg) == sample_count(eeg_signal) == 20 * eeg_signal.sample_rate
@test channel_count(eeg) == channel_count(eeg_signal) == 19
@test channel(eeg, :f3) == channel(eeg_signal, :f3) == 2
@test channel(eeg, 2) == channel(eeg_signal, 2) == :f3
@test duration(eeg) == duration(ecg) == duration(spo2) == Second(20)
@test duration(eeg) == duration(span(eeg_signal)) == Second(20)

# Here are some basic indexing examples using `getindex` and `view` wherein
# channel names and sample-rate-agnostic `TimeSpan`s are employed as indices:
span = TimeSpan(Second(3), Second(9))
span_range = index_from_time(eeg.signal.sample_rate, span)
@test eeg[:, span].data == view(eeg, :, span_range).data
slice_span = TimeSpan(Second(3), Second(9))
span_range = index_from_time(eeg.signal.sample_rate, slice_span)
@test eeg[:, slice_span].data == view(eeg, :, span_range).data
@test eeg[:f3, :].data == view(eeg, 2, :).data
@test eeg[:f3, 1:10].data == view(eeg, 2, 1:10).data
@test eeg[:f3, span].data == view(eeg, 2, span_range).data
@test eeg[:f3, slice_span].data == view(eeg, 2, span_range).data
@test eeg[[:f3, :c3, :p3], 1:10].data == view(eeg, 2:4, 1:10).data
@test eeg[[:c3, 4, :f3], span].data == view(eeg, [3, 4, 2], span_range).data
@test eeg[[:c3, 4, :f3], slice_span].data == view(eeg, [3, 4, 2], span_range).data

# NOTE: Keep in mind that `duration(samples.signal)` is not generally equivalent
# to `duration(samples)`; the former is the duration of the original signal in
# the context of its parent recording, whereas the latter is the actual duration
# of `samples.data` given `signal.sample_rate`. This is similarly true for the
# `sample_count` function for the same reason!
eeg_slice = eeg[:, slice_span]
@test duration(eeg_slice) == duration(slice_span)
@test duration(eeg_slice) != duration(eeg_signal)
@test sample_count(eeg_slice) == length(span_range)
@test sample_count(eeg_slice) != sample_count(eeg_signal)

# NOTE: `Samples` is not an `AbstractArray` subtype; this special indexing
# behavior is only defined for convenient data manipulation. It is thus fine
Expand All @@ -106,12 +122,7 @@ dataset = Dataset(joinpath(root, "example.onda"); create=true)
# `dataset.recordings` dictionary before returning the pair, such that the
# `recording` variable we assign here references the same `Recording` instance
# stored within `dataset`.
uuid, recording = create_recording!(dataset, duration(eeg), Dict())

# The last argument to `create_recording!` above provided a dictionary for the
# `custom` field of the returned `Recording` instance. Let's populate it with
# some custom metadata:
recording.custom["some_custom_information"] = "hey, I think this recording might be fake"
uuid, recording = create_recording!(dataset)

# Store our signals/samples for the recording in our `dataset`. This both serializes
# sample data to disk and adds the signal metadata to the recording stored in
Expand All @@ -125,7 +136,7 @@ store!(dataset, uuid, :spo2, spo2)
# like in there. For example, Beacon Biosignals stores JSON snippets in
# annotations. Here, let's just go the simple route and pretend we found an
# epileptiform spike in our EEG/ECG/SpO2 recording:
spike_annotation = Annotation("epileptiform", "spike", TimeSpan(Millisecond(1500), Second(2)))
spike_annotation = Annotation("epileptiform_spike", TimeSpan(Millisecond(1500), Second(2)))
annotate!(recording, spike_annotation)

# You can add as many annotations as you'd like to a recording. Just keep in mind
Expand All @@ -138,7 +149,7 @@ annotate!(recording, spike_annotation)
# short 2 second epochs across the entire recording:
for (i, t) in enumerate(2:2:Second(duration(recording)).value)
stage = rand(["awake", "nrem1", "nrem2", "nrem3", "rem"])
ann = Annotation("sleep_stage", "$stage", TimeSpan(Second(t - 2), Second(t)))
ann = Annotation(stage, TimeSpan(Second(t - 2), Second(t)))
annotate!(recording, ann)
end

Expand All @@ -159,7 +170,7 @@ dataset = Dataset(joinpath(root, "example.onda"))
uuid, recording = first(dataset.recordings)

# Grab the first spike annotation we see...
spike_annotation = first(ann for ann in recording.annotations if ann.value == "spike")
spike_annotation = first(ann for ann in recording.annotations if ann.value == "epileptiform_spike")

# ...and load that segment of the EEG from disk as a `Samples` instance!
spike_segment = load(dataset, uuid, :eeg, spike_annotation)
Expand All @@ -180,7 +191,7 @@ spike_segment = load(dataset, uuid, :eeg, spike_annotation)
# Welp, looks like a spike to me! Let's leave an annotation to confirm we
# checked it. Remember - `spike_annotation isa AbstractTimeSpan`, so we can
# generally pass it wherever we'd pass a `TimeSpan` object:
annotate!(recording, Annotation("confirmation", "spike", spike_annotation))
annotate!(recording, Annotation("confirmed_spike_by_me", spike_annotation))

# ...and, finally, of course, let's save our annotation!
save_recordings_file(dataset)
67 changes: 64 additions & 3 deletions src/Onda.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using MsgPack
using TranscodingStreams
using CodecZstd

const ONDA_FORMAT_VERSION = v"0.2"
const ONDA_FORMAT_VERSION = v"0.3"

#####
##### utilities
Expand Down Expand Up @@ -56,7 +56,7 @@ export AbstractTimeSpan, TimeSpan, contains, overlaps, shortest_timespan_contain
index_from_time, time_from_index, duration

include("recordings.jl")
export Recording, Signal, signal_from_template, Annotation, annotate!
export Recording, Signal, signal_from_template, Annotation, annotate!, span

include("serialization.jl")
export AbstractLPCMSerializer, serializer, deserialize_lpcm, serialize_lpcm,
Expand All @@ -66,9 +66,70 @@ include("samples.jl")
export Samples, encode, encode!, decode, decode!, channel, channel_count, sample_count

include("dataset.jl")
export Dataset, samples_path, create_recording!, set_duration!, load, store!, delete!,
export Dataset, samples_path, create_recording!, set_span!, load, store!, delete!,
save_recordings_file

include("printing.jl")

#####
##### upgrades/deprecations
#####

@deprecate set_duration!(dataset, uuid, duration) begin
r = dataset.recordings[uuid]
set_span!(r, TimeSpan(Nanosecond(0), duration))
r
end

"""
Onda.upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value)
Upgrade the Onda v0.2 dataset at `path` to a Onda v0.3 dataset, returning the
upgraded `Dataset`. This upgrade process overwrites `path/recordings.msgpack.zst`
with a v0.3-compliant version of this file; for safety's sake, the old v0.2 file
is preserved at `path/old.recordings.msgpack.zst.backup`.
A couple of the Onda v0.2 -> v0.3 changes require some special handling:
- The `custom` field was removed from recording objects. This function thus writes out
a file at `path/recordings_custom.msgpack.zst` that contains a map of UUIDs to
corresponding recordings' `custom` values before deleting the `custom` field. This
file can be deserialized via `MsgPack.unpack(Onda.zstd_decompress(read("recordings_custom.msgpack.zst")))`.
- Annotations no longer have a `key` field. Thus, each annotation's existing `key` and `value`
fields are combined into the single new `value` field via the provided callback
`combine_annotation_key_value(annotation_key, annotation_value)`.
"""
function upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value)
file_path = joinpath(path, "recordings.msgpack.zst")
bytes = zstd_decompress(read(file_path))
mv(file_path, joinpath(path, "old.recordings.msgpack.zst.backup"))
io = IOBuffer(bytes)
read(io, UInt8) == 0x92 || error("corrupt recordings.msgpack.zst")
header = MsgPack.unpack(io, Header)
v"0.2" <= header.onda_format_version < v"0.3" || error("unsupported original onda_format_version: $(header.onda_format_version)")
recordings = MsgPack.unpack(io, Dict{UUID,Any})
customs = Dict{UUID,Any}(uuid => recording["custom"] for (uuid, recording) in recordings)
write(joinpath(path, "recordings_custom.msgpack.zst"), zstd_compress(MsgPack.pack(customs)))
for (uuid, recording) in recordings
signal_stop_nanosecond = recording["duration_in_nanoseconds"]
for signal in values(recording["signals"])
signal["start_nanosecond"] = 0
signal["stop_nanosecond"] = signal_stop_nanosecond
signal["sample_offset_in_unit"] = 0.0
signal["sample_rate"] = float(signal["sample_rate"])
end
for annotation in recording["annotations"]
annotation["value"] = combine_annotation_key_value(annotation["key"], annotation["value"])
delete!(annotation, "key")
end
delete!(recording, "duration_in_nanoseconds")
delete!(recording, "custom")
end
fixed_recordings = MsgPack.unpack(MsgPack.pack(recordings), Dict{UUID,Recording})
dataset = Dataset(path, Header(v"0.3.0", true), fixed_recordings)
save_recordings_file(dataset)
return dataset
end

end # module
Loading

2 comments on commit 53d7655

@jrevels
Copy link
Member Author

@jrevels jrevels commented on 53d7655 Mar 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/10498

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if Julia TagBot is installed, or can be done manually through the github interface, or via:

git tag -a v0.8.0 -m "<description of version>" 53d7655aee23db3f6fa9aba7526ef37053562fae
git push origin v0.8.0

Please sign in to comment.