diff --git a/Project.toml b/Project.toml
index 3f334496..3930e992 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "Onda"
 uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.7.6"
+version = "0.8.0"
 
 [deps]
 CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
diff --git a/docs/src/index.md b/docs/src/index.md
index f3286c7d..403c7acc 100644
--- a/docs/src/index.md
+++ b/docs/src/index.md
@@ -14,7 +14,6 @@ Note that Onda.jl's API follows a specific philosophy with respect to property a
 Dataset
 samples_path
 create_recording!
-set_duration!
 load
 store!
 delete!
@@ -26,8 +25,10 @@ save_recordings_file
 ```@docs
 Signal
 signal_from_template
+span
 Annotation
 Recording
+set_span!
 annotate!
 ```
 
@@ -68,3 +69,9 @@ serialize_lpcm
 LPCM
 LPCMZst
 ```
+
+## Upgrading Older Datasets to Newer Datasets
+
+```@docs
+Onda.upgrade_onda_format_from_v0_2_to_v0_3!
+```
diff --git a/examples/flac.jl b/examples/flac.jl
index 086d600f..d9f2f742 100644
--- a/examples/flac.jl
+++ b/examples/flac.jl
@@ -2,7 +2,7 @@
 # that it's a naive implementation - it just shells out and assumes you have
 # the `flac` command line utility installed and available on your system.
 
-using Onda, Test, Random
+using Onda, Test, Random, Dates
 
 #####
 ##### FLAC
@@ -69,7 +69,7 @@ end
 
 if VERSION >= v"1.1.0"
     @testset "FLAC example" begin
-        signal = Signal([:a, :b, :c], :unit, 0.25, Int16, 50, :flac, Dict(:level => 2))
+        signal = Signal([:a, :b, :c], Nanosecond(0), Nanosecond(0), :unit, 0.25, 0.0, Int16, 50.0, :flac, Dict(:level => 2))
         samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, 50 * 10))).data
         s = serializer(signal)
         bytes = serialize_lpcm(samples, s)
diff --git a/examples/tour.jl b/examples/tour.jl
index 56479989..fc05f074 100644
--- a/examples/tour.jl
+++ b/examples/tour.jl
@@ -22,10 +22,13 @@ eeg_signal = Signal(channel_names=[:fp1, :f3, :c3, :p3,
                                    :fz, :cz, :pz,
                                    :fp2, :f4, :c4, :p4,
                                    :f8, :t4, :t6, :o2],
+                    start_nanosecond=Nanosecond(0),
+                    stop_nanosecond=Nanosecond(Second(20)),
                     sample_unit=:microvolts,
                     sample_resolution_in_unit=0.25,
+                    sample_offset_in_unit=0.0,
                     sample_type=Int16,
-                    sample_rate=256, # Hz
+                    sample_rate=256.0, # Hz
                     file_extension=:lpcm,
                     file_options=nothing)
 
@@ -33,10 +36,13 @@ ecg_signal = signal_from_template(eeg_signal; channel_names=[:avl, :avr],
                                   file_extension=Symbol("lpcm.zst"))
 
 spo2_signal = Signal(channel_names=[:spo2],
+                     start_nanosecond=Nanosecond(Second(3)),
+                     stop_nanosecond=Nanosecond(Second(17)),
                      sample_unit=:percentage,
                      sample_resolution_in_unit=(100 / typemax(UInt8)),
+                     sample_offset_in_unit=0.0,
                      sample_type=UInt8,
-                     sample_rate=20, # Hz
+                     sample_rate=20.5, # Hz
                      file_extension=:lpcm,
                      file_options=nothing)
 
@@ -52,35 +58,45 @@ spo2_signal = Signal(channel_names=[:spo2],
 # an aside: The hypothetical person from which these hypothetical signals were
 # hypothetically recorded must be experiencing some pretty crazy pathologies if
 # their EEG/ECG are just saw waves...
-saws(signal, seconds) = [(j + i) % 100 * signal.sample_resolution_in_unit for
-                         i in 1:channel_count(signal),
-                         j in 1:(signal.sample_rate * seconds)]
+saws(signal) = [(j + i) % 100 * signal.sample_resolution_in_unit for
+                i in 1:channel_count(signal), j in 1:sample_count(signal)]
 
 # The second argument in the `Samples` constructor is a `Bool` that specifies if
 # the data is in its encoded representation. Here, we construct our signals as
 # "decoded" (i.e. in actual units, though for this toy example it doesn't really
 # matter) and then "encode" them according to the specified:
-eeg = encode(Samples(eeg_signal, false, saws(eeg_signal, 20)))
-ecg = encode(Samples(ecg_signal, false, saws(ecg_signal, 20)))
-spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal, 20)))
+eeg = encode(Samples(eeg_signal, false, saws(eeg_signal)))
+ecg = encode(Samples(ecg_signal, false, saws(ecg_signal)))
+spo2 = encode(Samples(spo2_signal, false, saws(spo2_signal)))
 
 # Here are some basic functions for examining `Samples` instances:
-@test sample_count(eeg) == eeg.signal.sample_rate * 20
+@test sample_count(eeg) == sample_count(eeg_signal) == 20 * eeg_signal.sample_rate
 @test channel_count(eeg) == channel_count(eeg_signal) == 19
 @test channel(eeg, :f3) == channel(eeg_signal, :f3) == 2
 @test channel(eeg, 2) == channel(eeg_signal, 2) == :f3
-@test duration(eeg) == duration(ecg) == duration(spo2) == Second(20)
+@test duration(eeg) == duration(span(eeg_signal)) == Second(20)
 
 # Here are some basic indexing examples using `getindex` and `view` wherein
 # channel names and sample-rate-agnostic `TimeSpan`s are employed as indices:
-span = TimeSpan(Second(3), Second(9))
-span_range = index_from_time(eeg.signal.sample_rate, span)
-@test eeg[:, span].data == view(eeg, :, span_range).data
+slice_span = TimeSpan(Second(3), Second(9))
+span_range = index_from_time(eeg.signal.sample_rate, slice_span)
+@test eeg[:, slice_span].data == view(eeg, :, span_range).data
 @test eeg[:f3, :].data == view(eeg, 2, :).data
 @test eeg[:f3, 1:10].data == view(eeg, 2, 1:10).data
-@test eeg[:f3, span].data == view(eeg, 2, span_range).data
+@test eeg[:f3, slice_span].data == view(eeg, 2, span_range).data
 @test eeg[[:f3, :c3, :p3], 1:10].data == view(eeg, 2:4, 1:10).data
-@test eeg[[:c3, 4, :f3], span].data == view(eeg, [3, 4, 2], span_range).data
+@test eeg[[:c3, 4, :f3], slice_span].data == view(eeg, [3, 4, 2], span_range).data
+
+# NOTE: Keep in mind that `duration(samples.signal)` is not generally equivalent
+# to `duration(samples)`; the former is the duration of the original signal in
+# the context of its parent recording, whereas the latter is the actual duration
+# of `samples.data` given `signal.sample_rate`. This is similarly true for the
+# `sample_count` function for the same reason!
+eeg_slice = eeg[:, slice_span]
+@test duration(eeg_slice) == duration(slice_span)
+@test duration(eeg_slice) != duration(eeg_signal)
+@test sample_count(eeg_slice) == length(span_range)
+@test sample_count(eeg_slice) != sample_count(eeg_signal)
 
 # NOTE: `Samples` is not an `AbstractArray` subtype; this special indexing
 # behavior is only defined for convenient data manipulation. It is thus fine
@@ -106,12 +122,7 @@ dataset = Dataset(joinpath(root, "example.onda"); create=true)
 # `dataset.recordings` dictionary before returning the pair, such that the
 # `recording` variable we assign here references the same `Recording` instance
 # stored within `dataset`.
-uuid, recording = create_recording!(dataset, duration(eeg), Dict())
-
-# The last argument to `create_recording!` above provided a dictionary for the
-# `custom` field of the returned `Recording` instance. Let's populate it with
-# some custom metadata:
-recording.custom["some_custom_information"] = "hey, I think this recording might be fake"
+uuid, recording = create_recording!(dataset)
 
 # Store our signals/samples for the recording in our `dataset`. This both serializes
 # sample data to disk and adds the signal metadata to the recording stored in
@@ -125,7 +136,7 @@ store!(dataset, uuid, :spo2, spo2)
 # like in there. For example, Beacon Biosignals stores JSON snippets in
 # annotations. Here, let's just go the simple route and pretend we found an
 # epileptiform spike in our EEG/ECG/SpO2 recording:
-spike_annotation = Annotation("epileptiform", "spike", TimeSpan(Millisecond(1500), Second(2)))
+spike_annotation = Annotation("epileptiform_spike", TimeSpan(Millisecond(1500), Second(2)))
 annotate!(recording, spike_annotation)
 
 # You can add as many annotations as you'd like to a recording. Just keep in mind
@@ -138,7 +149,7 @@ annotate!(recording, spike_annotation)
 # short 2 second epochs across the entire recording:
 for (i, t) in enumerate(2:2:Second(duration(recording)).value)
     stage = rand(["awake", "nrem1", "nrem2", "nrem3", "rem"])
-    ann = Annotation("sleep_stage", "$stage", TimeSpan(Second(t - 2), Second(t)))
+    ann = Annotation(stage, TimeSpan(Second(t - 2), Second(t)))
     annotate!(recording, ann)
 end
 
@@ -159,7 +170,7 @@ dataset = Dataset(joinpath(root, "example.onda"))
 uuid, recording = first(dataset.recordings)
 
 # Grab the first spike annotation we see...
-spike_annotation = first(ann for ann in recording.annotations if ann.value == "spike")
+spike_annotation = first(ann for ann in recording.annotations if ann.value == "epileptiform_spike")
 
 # ...and load that segment of the EEG from disk as a `Samples` instance!
 spike_segment = load(dataset, uuid, :eeg, spike_annotation)
@@ -180,7 +191,7 @@ spike_segment = load(dataset, uuid, :eeg, spike_annotation)
 # Welp, looks like a spike to me! Let's leave an annotation to confirm we
 # checked it. Remember - `spike_annotation isa AbstractTimeSpan`, so we can
 # generally pass it wherever we'd pass a `TimeSpan` object:
-annotate!(recording, Annotation("confirmation", "spike", spike_annotation))
+annotate!(recording, Annotation("confirmed_spike_by_me", spike_annotation))
 
 # ...and, finally, of course, let's save our annotation!
 save_recordings_file(dataset)
diff --git a/src/Onda.jl b/src/Onda.jl
index e7d6d6a2..f07af73e 100644
--- a/src/Onda.jl
+++ b/src/Onda.jl
@@ -5,7 +5,7 @@ using MsgPack
 using TranscodingStreams
 using CodecZstd
 
-const ONDA_FORMAT_VERSION = v"0.2"
+const ONDA_FORMAT_VERSION = v"0.3"
 
 #####
 ##### utilities
@@ -56,7 +56,7 @@ export AbstractTimeSpan, TimeSpan, contains, overlaps, shortest_timespan_contain
        index_from_time, time_from_index, duration
 
 include("recordings.jl")
-export Recording, Signal, signal_from_template, Annotation, annotate!
+export Recording, Signal, signal_from_template, Annotation, annotate!, span
 
 include("serialization.jl")
 export AbstractLPCMSerializer, serializer, deserialize_lpcm, serialize_lpcm,
@@ -66,9 +66,70 @@ include("samples.jl")
 export Samples, encode, encode!, decode, decode!, channel, channel_count, sample_count
 
 include("dataset.jl")
-export Dataset, samples_path, create_recording!, set_duration!, load, store!, delete!,
+export Dataset, samples_path, create_recording!, set_span!, load, store!, delete!,
        save_recordings_file
 
 include("printing.jl")
 
+#####
+##### upgrades/deprecations
+#####
+
+@deprecate set_duration!(dataset, uuid, duration) begin
+    r = dataset.recordings[uuid]
+    set_span!(r, TimeSpan(Nanosecond(0), duration))
+    r
+end
+
+"""
+    Onda.upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value)
+
+Upgrade the Onda v0.2 dataset at `path` to a Onda v0.3 dataset, returning the
+upgraded `Dataset`. This upgrade process overwrites `path/recordings.msgpack.zst`
+with a v0.3-compliant version of this file; for safety's sake, the old v0.2 file
+is preserved at `path/old.recordings.msgpack.zst.backup`.
+
+A couple of the Onda v0.2 -> v0.3 changes require some special handling:
+
+- The `custom` field was removed from recording objects. This function thus writes out
+  a file at `path/recordings_custom.msgpack.zst` that contains a map of UUIDs to
+  corresponding recordings' `custom` values before deleting the `custom` field. This
+  file can be deserialized via `MsgPack.unpack(Onda.zstd_decompress(read("recordings_custom.msgpack.zst")))`.
+
+- Annotations no longer have a `key` field. Thus, each annotation's existing `key` and `value`
+  fields are combined into the single new `value` field via the provided callback
+  `combine_annotation_key_value(annotation_key, annotation_value)`.
+"""
+function upgrade_onda_format_from_v0_2_to_v0_3!(path, combine_annotation_key_value)
+    file_path = joinpath(path, "recordings.msgpack.zst")
+    bytes = zstd_decompress(read(file_path))
+    mv(file_path, joinpath(path, "old.recordings.msgpack.zst.backup"))
+    io = IOBuffer(bytes)
+    read(io, UInt8) == 0x92 || error("corrupt recordings.msgpack.zst")
+    header = MsgPack.unpack(io, Header)
+    v"0.2" <= header.onda_format_version < v"0.3" || error("unsupported original onda_format_version: $(header.onda_format_version)")
+    recordings = MsgPack.unpack(io, Dict{UUID,Any})
+    customs = Dict{UUID,Any}(uuid => recording["custom"] for (uuid, recording) in recordings)
+    write(joinpath(path, "recordings_custom.msgpack.zst"), zstd_compress(MsgPack.pack(customs)))
+    for (uuid, recording) in recordings
+        signal_stop_nanosecond = recording["duration_in_nanoseconds"]
+        for signal in values(recording["signals"])
+            signal["start_nanosecond"] = 0
+            signal["stop_nanosecond"] = signal_stop_nanosecond
+            signal["sample_offset_in_unit"] = 0.0
+            signal["sample_rate"] = float(signal["sample_rate"])
+        end
+        for annotation in recording["annotations"]
+            annotation["value"] = combine_annotation_key_value(annotation["key"], annotation["value"])
+            delete!(annotation, "key")
+        end
+        delete!(recording, "duration_in_nanoseconds")
+        delete!(recording, "custom")
+    end
+    fixed_recordings = MsgPack.unpack(MsgPack.pack(recordings), Dict{UUID,Recording})
+    dataset = Dataset(path, Header(v"0.3.0", true), fixed_recordings)
+    save_recordings_file(dataset)
+    return dataset
+end
+
 end # module
diff --git a/src/dataset.jl b/src/dataset.jl
index ee05bf14..f0074708 100644
--- a/src/dataset.jl
+++ b/src/dataset.jl
@@ -2,48 +2,39 @@
 ##### `Dataset`
 #####
 
-struct Dataset{C}
+struct Dataset
     path::String
     header::Header
-    recordings::Dict{UUID,Recording{C}}
+    recordings::Dict{UUID,Recording}
 end
 
 """
-    Dataset(path, custom_type=Any; create=false, strict=())
+    Dataset(path; create=false)
 
 Return a `Dataset` instance that contains all metadata necessary to read and
 write to the Onda dataset stored at `path`. Note that this constuctor loads all
 the `Recording` objects contained in `path/recordings.msgpack.zst`.
 
-`custom_type` is the `typeof` of the `custom` value found in each `Recording`
-object in the dataset.
-
 If `create` is `true`, then an empty Onda dataset will be created at `path`.
-
-The `strict` keyword argument is forwarded to `MsgPack.unpack` when that
-function is called while parsing `path/recordings.msgpack.zst` (see the
-MsgPack documentation for details regarding `strict`).
 """
-function Dataset(path, custom_type::Type{C}=Any;
-                 create::Bool=false, strict=()) where {C}
+function Dataset(path; create::Bool=false)
     path = rstrip(abspath(path), '/')
     samples_path = joinpath(path, "samples")
     if create
-        endswith(path, ".onda") || throw(ArgumentError("cannot create dataset at $path: path does not end with .onda"))
         if isdir(path)
             isempty(readdir(path)) || throw(ArgumentError("cannot create dataset at $path: directory exists and is nonempty"))
         else
             mkdir(path)
         end
-        mkdir(samples_path)
         initial_header = Header(ONDA_FORMAT_VERSION, true)
-        initial_recordings = Dict{UUID,Recording{C}}()
+        initial_recordings = Dict{UUID,Recording}()
         write_recordings_file(path, initial_header, initial_recordings)
-    elseif !(isdir(path) && isdir(samples_path))
+    elseif !isdir(path)
         throw(ArgumentError("$path is not a valid Onda dataset"))
     end
-    header, recordings = read_recordings_file(path, C, strict)
-    return Dataset{C}(path, header, recordings)
+    !isdir(samples_path) && mkdir(samples_path)
+    header, recordings = read_recordings_file(path)
+    return Dataset(path, header, recordings)
 end
 
 """
@@ -56,8 +47,6 @@ function save_recordings_file(dataset::Dataset)
     return write_recordings_file(dataset.path, dataset.header, dataset.recordings)
 end
 
-Base.@deprecate overwrite_recordings(dataset) save_recordings_file(dataset)
-
 #####
 ##### `merge!`
 #####
@@ -119,46 +108,21 @@ end
 #####
 
 """
-    create_recording!(dataset::Dataset{C}, duration::Nanosecond,
-                      custom=nothing, uuid::UUID=uuid4())
+    create_recording!(dataset::Dataset, uuid::UUID=uuid4())
 
-Create `uuid::UUID => recording::Recording` where `recording` is constructed
-via the provided `duration` and `custom` arguments, `uuid` is the provided UUID
-(which is computed if not provided), add the pair to `dataset.recordings`, and
-return the pair.
-
-The `custom` argument is passed along to the `Recording{C}` constructor, such
-that `custom isa C` must hold true.
+Create `uuid::UUID => recording::Recording`, add the pair to `dataset.recordings`,
+and return the pair.
 """
-function create_recording!(dataset::Dataset{C}, duration::Nanosecond,
-                           custom=nothing, uuid::UUID=uuid4()) where {C}
+function create_recording!(dataset::Dataset, uuid::UUID=uuid4())
     if haskey(dataset.recordings, uuid)
         throw(ArgumentError("recording with UUID $uuid already exists in dataset"))
     end
-    recording = Recording{C}(duration, Dict{Symbol,Signal}(), Set{Annotation}(), custom)
+    recording = Recording(Dict{Symbol,Signal}(), Set{Annotation}())
     dataset.recordings[uuid] = recording
     mkpath(samples_path(dataset, uuid))
     return uuid => recording
 end
 
-#####
-##### `set_duration!`
-#####
-
-"""
-    set_duration!(dataset::Dataset{C}, uuid::UUID, duration::Nanosecond) where {C}
-
-Replace `dataset.recordings[uuid]` with a `Recording` instance that is the exact
-same as the existing recording, but with the `duration_in_nanoseconds` field set
-to the provided `duration`. Returns the newly constructed `Recording` instance.
-"""
-function set_duration!(dataset::Dataset{C}, uuid::UUID, duration::Nanosecond) where {C}
-    r = dataset.recordings[uuid]
-    r = Recording{C}(duration, r.signals, r.annotations, r.custom)
-    dataset.recordings[uuid] = r
-    return r
-end
-
 #####
 ##### `load`
 #####
diff --git a/src/printing.jl b/src/printing.jl
index b59e2e0f..af817fac 100644
--- a/src/printing.jl
+++ b/src/printing.jl
@@ -16,8 +16,11 @@ function Base.show(io::IO, samples::Samples)
         duration_in_nanoseconds = round(Int, duration_in_seconds * 1_000_000_000)
         println(io, "Samples (", format_duration(duration_in_nanoseconds), "):")
         println(io, "  signal.channel_names: ", channel_names_string(samples.signal.channel_names))
+        println(io, "  signal.start_nanosecond: ", samples.signal.start_nanosecond, " (", format_duration(samples.signal.start_nanosecond), ")")
+        println(io, "  signal.stop_nanosecond: ", samples.signal.stop_nanosecond, " (", format_duration(samples.signal.stop_nanosecond), ")")
         println(io, "  signal.sample_unit: ", repr(samples.signal.sample_unit))
         println(io, "  signal.sample_resolution_in_unit: ", samples.signal.sample_resolution_in_unit)
+        println(io, "  signal.sample_offset_in_unit: ", samples.signal.sample_offset_in_unit)
         println(io, "  signal.sample_type: ", samples.signal.sample_type)
         println(io, "  signal.sample_rate: ", samples.signal.sample_rate, " Hz")
         println(io, "  signal.file_extension: ", repr(samples.signal.file_extension))
@@ -34,8 +37,11 @@ function Base.show(io::IO, signal::Signal)
     else
         println(io, "Signal:")
         println(io, "  channel_names: ", channel_names_string(signal.channel_names))
+        println(io, "  start_nanosecond: ", signal.start_nanosecond, " (", format_duration(signal.start_nanosecond), ")")
+        println(io, "  stop_nanosecond: ", signal.stop_nanosecond, " (", format_duration(signal.stop_nanosecond), ")")
         println(io, "  sample_unit: ", repr(signal.sample_unit))
         println(io, "  sample_resolution_in_unit: ", signal.sample_resolution_in_unit)
+        println(io, "  sample_offset_in_unit: ", signal.sample_offset_in_unit)
         println(io, "  sample_type: ", signal.sample_type)
         println(io, "  sample_rate: ", signal.sample_rate, " Hz")
         println(io, "  file_extension: ", repr(signal.file_extension))
@@ -45,41 +51,22 @@ end
 
 function Base.show(io::IO, recording::Recording)
     if get(io, :compact, false)
-        duration_string = format_duration(recording.duration_in_nanoseconds)
+        duration_string = isempty(recording.signals) ? "<no signals>" : format_duration(duration(recording))
         print(io, "Recording(", duration_string, ')')
     else
-        duration_in_seconds = recording.duration_in_nanoseconds.value / 1_000_000_000
-        duration_string = string('(', format_duration(recording.duration_in_nanoseconds),
-                                 "; ", duration_in_seconds, " seconds)")
-        println(io, "Recording:")
-        println(io, "  duration_in_nanoseconds: ", recording.duration_in_nanoseconds, " ", duration_string)
+        if isempty(recording.signals)
+            duration_string = "<no signals>"
+        else
+            duration_string = string(format_duration(duration(recording)), "; ",
+                                     duration(recording).value / 1_000_000_000, " seconds")
+        end
+        println(io, "Recording (", duration_string, ')')
         println(io, "  signals:")
         compact_io = IOContext(io, :compact => true)
         for (name, signal) in recording.signals
             println(compact_io, "    :", name, " => ", signal)
         end
-        println(io, "  annotations (", length(recording.annotations), " total):")
-        annotation_counts = Dict()
-        for ann in recording.annotations
-            annotation_counts[ann.key] = get(annotation_counts, ann.key, 0) + 1
-        end
-        k = 1
-        annotation_counts = sort(collect(annotation_counts), by=(p -> p[2]), lt=(>))
-        for (x, n) in annotation_counts
-            println(io, "    ", n, " instance(s) of ", x)
-            k += 1
-            if k > 5
-                println(io, "    ...and ", length(annotation_counts) - 5, " more.")
-                break
-            end
-        end
-        print(io, "  custom:")
-        if recording.custom isa Nothing
-            print(io, " nothing")
-        else
-            println(io)
-            show(io, "text/plain", recording.custom)
-        end
+        print(io, "  annotations: (", length(recording.annotations), " total)")
     end
 end
 
@@ -104,7 +91,7 @@ function nanosecond_to_periods(ns::Integer)
     return (hr, m, s, ms, μs, ns)
 end
 
-format_duration(t::Period) =  format_duration(convert(Nanosecond, t).value)
+format_duration(t::Period) = format_duration(convert(Nanosecond, t).value)
 
 function format_duration(ns::Integer)
     hr, m, s, ms, μs, ns = nanosecond_to_periods(ns)
diff --git a/src/recordings.jl b/src/recordings.jl
index 9c9a6aba..60f17e50 100644
--- a/src/recordings.jl
+++ b/src/recordings.jl
@@ -56,27 +56,24 @@ end
 A type representing an individual Onda annotation object. Instances contain
 the following fields, following the Onda specification for annotation objects:
 
-- `key::String`
 - `value::String`
 - `start_nanosecond::Nanosecond`
 - `stop_nanosecond::Nanosecond`
 """
 struct Annotation <: AbstractTimeSpan
-    key::String
     value::String
     start_nanosecond::Nanosecond
     stop_nanosecond::Nanosecond
-    function Annotation(key::AbstractString, value::AbstractString,
-                        start::Nanosecond, stop::Nanosecond)
+    function Annotation(value::AbstractString, start::Nanosecond, stop::Nanosecond)
         _validate_timespan(start, stop)
-        return new(key, value, start, stop)
+        return new(value, start, stop)
     end
 end
 
 MsgPack.msgpack_type(::Type{Annotation}) = MsgPack.StructType()
 
-function Annotation(key, value, span::AbstractTimeSpan)
-    return Annotation(key, value, first(span), last(span))
+function Annotation(value, span::AbstractTimeSpan)
+    return Annotation(value, first(span), last(span))
 end
 
 Base.first(annotation::Annotation) = annotation.start_nanosecond
@@ -94,21 +91,35 @@ A type representing an individual Onda signal object. Instances contain
 the following fields, following the Onda specification for signal objects:
 
 - `channel_names::Vector{Symbol}`
+- `start_nanosecond::Nanosecond`
+- `stop_nanosecond::Nanosecond`
 - `sample_unit::Symbol`
 - `sample_resolution_in_unit::Float64`
+- `sample_offset_in_unit::Float64`
 - `sample_type::DataType`
-- `sample_rate::UInt64`
+- `sample_rate::Float64`
 - `file_extension::Symbol`
 - `file_options::Union{Nothing,Dict{Symbol,Any}}`
 """
 Base.@kwdef struct Signal
     channel_names::Vector{Symbol}
+    start_nanosecond::Nanosecond
+    stop_nanosecond::Nanosecond
     sample_unit::Symbol
     sample_resolution_in_unit::Float64
+    sample_offset_in_unit::Float64
     sample_type::DataType
-    sample_rate::UInt64
+    sample_rate::Float64
     file_extension::Symbol
     file_options::Union{Nothing,Dict{Symbol,Any}}
+    function Signal(channel_names, start_nanosecond, stop_nanosecond,
+                    sample_unit, sample_resolution_in_unit, sample_offset_in_unit,
+                    sample_type, sample_rate, file_extension, file_options)
+        _validate_timespan(start_nanosecond, stop_nanosecond)
+        return new(channel_names, start_nanosecond, stop_nanosecond,
+                   sample_unit, sample_resolution_in_unit, sample_offset_in_unit,
+                   sample_type, sample_rate, file_extension, file_options)
+    end
 end
 
 function Base.:(==)(a::Signal, b::Signal)
@@ -131,8 +142,11 @@ end
 """
     signal_from_template(signal::Signal;
                          channel_names=signal.channel_names,
+                         start_nanosecond=signal.start_nanosecond,
+                         stop_nanosecond=signal.stop_nanosecond,
                          sample_unit=signal.sample_unit,
                          sample_resolution_in_unit=signal.sample_resolution_in_unit,
+                         sample_offset_in_unit=signal.sample_offset_in_unit,
                          sample_type=signal.sample_type,
                          sample_rate=signal.sample_rate,
                          file_extension=signal.file_extension,
@@ -142,19 +156,20 @@ Return a `Signal` where each field is mapped to the corresponding keyword argume
 """
 function signal_from_template(signal::Signal;
                               channel_names=signal.channel_names,
+                              start_nanosecond=signal.start_nanosecond,
+                              stop_nanosecond=signal.stop_nanosecond,
                               sample_unit=signal.sample_unit,
                               sample_resolution_in_unit=signal.sample_resolution_in_unit,
+                              sample_offset_in_unit=signal.sample_offset_in_unit,
                               sample_type=signal.sample_type,
                               sample_rate=signal.sample_rate,
                               file_extension=signal.file_extension,
                               file_options=signal.file_options)
-    return Signal(channel_names, sample_unit, sample_resolution_in_unit,
-                  sample_type, sample_rate, file_extension,
-                  file_options)
+    return Signal(channel_names, start_nanosecond, stop_nanosecond,
+                  sample_unit, sample_resolution_in_unit, sample_offset_in_unit,
+                  sample_type, sample_rate, file_extension, file_options)
 end
 
-Base.@deprecate copy_with(signal; kwargs...) signal_from_template(signal; kwargs...)
-
 """
     channel(signal::Signal, name::Symbol)
 
@@ -176,33 +191,51 @@ Return `length(signal.channel_names)`.
 """
 channel_count(signal::Signal) = length(signal.channel_names)
 
+"""
+    span(signal::Signal)
+
+Return `TimeSpan(signal.start_nanosecond, signal.stop_nanosecond)`.
+"""
+span(signal::Signal) = TimeSpan(signal.start_nanosecond, signal.stop_nanosecond)
+
+"""
+    duration(signal::Signal)
+
+Return `duration(span(signal))`.
+"""
+duration(signal::Signal) = duration(span(signal))
+
+"""
+    sample_count(signal::Signal)
+
+Return the number of multichannel samples that fit within `duration(signal)`
+given `signal.sample_rate`.
+"""
+sample_count(signal::Signal) = index_from_time(signal.sample_rate, duration(signal)) - 1
+
 #####
 ##### recordings
 #####
 
 """
-    Recording{C}
+    Recording
 
 A type representing an individual Onda recording object. Instances contain
 the following fields, following the Onda specification for recording objects:
 
-- `duration_in_nanoseconds::Nanosecond`
 - `signals::Dict{Symbol,Signal}`
 - `annotations::Set{Annotation}`
-- `custom::C`
 """
-struct Recording{C}
-    duration_in_nanoseconds::Nanosecond
+struct Recording
     signals::Dict{Symbol,Signal}
     annotations::Set{Annotation}
-    custom::C
 end
 
 function Base.:(==)(a::Recording, b::Recording)
     return all(name -> getfield(a, name) == getfield(b, name), fieldnames(Recording))
 end
 
-MsgPack.msgpack_type(::Type{<:Recording}) = MsgPack.StructType()
+MsgPack.msgpack_type(::Type{Recording}) = MsgPack.StructType()
 
 """
     annotate!(recording::Recording, annotation::Annotation)
@@ -214,9 +247,37 @@ annotate!(recording::Recording, annotation::Annotation) = push!(recording.annota
 """
     duration(recording::Recording)
 
-Returns `recording.duration_in_nanoseconds`.
+Returns `maximum(s -> s.stop_nanosecond, values(recording.signals))`; throws an
+`ArgumentError` if `recording.signals` is empty.
+"""
+function duration(recording::Recording)
+    isempty(recording.signals) && throw(ArgumentError("`duration(recording)` is not defined if `isempty(recording.signals)`"))
+    return maximum(s -> s.stop_nanosecond, values(recording.signals))
+end
+
+"""
+    set_span!(recording::Recording, name::Symbol, span::AbstractTimeSpan)
+
+Replace `recording.signals[name]` with a copy that has the `start_nanosecond`
+and `start_nanosecond` fields set to match the provided `span`. Returns the
+newly constructed `Signal` instance.
+"""
+function set_span!(recording::Recording, name::Symbol, span::AbstractTimeSpan)
+    signal = signal_from_template(recording.signals[name];
+                                  start_nanosecond=first(span),
+                                  stop_nanosecond=last(span))
+    recording.signals[name] = signal
+    return signal
+end
+
 """
-duration(recording::Recording) = recording.duration_in_nanoseconds
+    set_span!(recording::Recording, span::TimeSpan)
+
+Return `Dict(name => set_span!(recording, name, span) for name in keys(recording.signals))`
+"""
+function set_span!(recording::Recording, span::AbstractTimeSpan)
+    return Dict(name => set_span!(recording, name, span) for name in keys(recording.signals))
+end
 
 #####
 ##### reading/writing `recordings.msgpack.zst`
@@ -229,7 +290,7 @@ end
 
 MsgPack.msgpack_type(::Type{Header}) = MsgPack.StructType()
 
-function read_recordings_file(path, ::Type{C}, additional_strict_args) where {C}
+function read_recordings_file(path)
     file_path = joinpath(path, "recordings.msgpack.zst")
     bytes = zstd_decompress(read(file_path))
     io = IOBuffer(bytes)
@@ -239,15 +300,14 @@ function read_recordings_file(path, ::Type{C}, additional_strict_args) where {C}
     if !is_supported_onda_format_version(header.onda_format_version)
         @warn("attempting to load `Dataset` with unsupported Onda version",
               supported=ONDA_FORMAT_VERSION, attempting=header.onda_format_version)
+        @warn("consider upgrading old datasets via `Onda.upgrade_onda_format_from_v0_2_to_v0_3!`")
     end
-    R = Recording{C}
-    strict = header.ordered_keys ? (R,) : ()
-    strict = (strict..., additional_strict_args...)
-    recordings = MsgPack.unpack(io, Dict{UUID,R}; strict=strict)
+    strict = header.ordered_keys ? (Recording,) : ()
+    recordings = MsgPack.unpack(io, Dict{UUID,Recording}; strict=strict)
     return header, recordings
 end
 
-function write_recordings_file(path, header::Header, recordings::Dict{UUID,<:Recording})
+function write_recordings_file(path, header::Header, recordings::Dict{UUID,Recording})
     file_path = joinpath(path, "recordings.msgpack.zst")
     backup_file_path = joinpath(path, "_recordings.msgpack.zst.backup")
     isfile(file_path) && mv(file_path, backup_file_path)
diff --git a/src/samples.jl b/src/samples.jl
index e877ea84..d1f4a56e 100644
--- a/src/samples.jl
+++ b/src/samples.jl
@@ -19,60 +19,8 @@ Return a `Samples` instance with the following fields:
 
 Note that `getindex` and `view` are defined on `Samples` to accept normal integer
 indices, but also accept channel names for row indices and [`TimeSpan`](@ref)
-values for column indices:
-
-```
-julia> eeg
-Samples (00:43:11.062500000):
-  signal.channel_names: [:fp1, :f3, :c3, :p3, :f7, :t3, :t5, :o1, :fz, :cz,
-                         :pz, :fp2, :f4, :c4, :p4, :f8, :t4, :t6, :o2]
-  signal.sample_unit: :microvolt
-  signal.sample_resolution_in_unit: 0.25
-  signal.sample_type: Int16
-  signal.sample_rate: 128 Hz
-  signal.file_extension: :zst
-  signal.file_options: nothing
-  encoded: true
-  data:
-19×331656 Array{Int16,2}:
- -421  -416    51  -229  …   -164   -318   -644
- -866  -860  -401  -684     -1665  -1805  -2139
- -809  -776  -320  -641     -1402  -1571  -1892
- -698  -642  -191  -522     -1391  -1585  -1891
- -801  -778  -307  -622     -1275  -1452  -1771
- -340  -297   168  -160  …  -1012  -1202  -1514
- -594  -544   -86  -410      -802   -982  -1298
- -254  -180   270   -68        68   -141   -435
- -567  -547   -86  -396     -1439  -1602  -1924
- -620  -584  -129  -450     -1486  -1664  -1970
- -595  -807  -187  -341  …  -1672  -1441  -2113
- -407  -386    71  -216      -993  -1146  -1466
- -490  -475   -17  -310      -861  -1011  -1332
- -540  -555   -79  -359     -1077   -973  -1508
- -437  -394    60  -263     -1051  -1232  -1541
- -721  -692  -239  -536  …  -1681  -1838  -2151
- -646  -597  -145  -460     -1119  -1298  -1602
- -468  -411    32  -295      -224   -411   -695
- -855  -792  -354  -686     -1584  -1782  -2065
-
-julia> eeg[[:fp1, 2, :fz, :o1, :cz], TimeSpan(Minute(20), duration(eeg))]
-Samples (00:23:11.062500000):
-  signal.channel_names: [:fp1, :f3, :fz, :o1, :cz]
-  signal.sample_unit: :microvolt
-  signal.sample_resolution_in_unit: 0.25
-  signal.sample_type: Int16
-  signal.sample_rate: 128 Hz
-  signal.file_extension: :zst
-  signal.file_options: nothing
-  encoded: true
-  data:
-5×178056 Array{Int16,2}:
- -326    69   154  -281   …   -164   -318   -644
- -596  -204  -109  -543      -1665  -1805  -2139
- -472   -59    19  -422      -1439  -1602  -1924
- -453   -17    31  -418         68   -141   -435
- -797  -373  -306  -748      -1486  -1664  -1970
-```
+values for column indices; see `Onda/examples/tour.jl` for a comprehensive
+set of indexing examples.
 
 See also: [`encode`](@ref), [`encode!`](@ref), [`decode`](@ref), [`decode!`](@ref)
 """
@@ -147,6 +95,12 @@ channel(samples::Samples, i::Integer) = channel(samples.signal, i)
     duration(samples::Samples)
 
 Returns the `Nanosecond` value for which `samples[TimeSpan(0, duration(samples))] == samples.data`.
+
+!!! warning
+    `duration(samples)` is not generally equivalent to `duration(samples.signal)`;
+    the former is the duration of the entire original signal in the context of its
+    parent recording, whereas the latter is the actual duration of `samples.data`
+    given `samples.signal.sample_rate`.
 """
 duration(samples::Samples) = time_from_index(samples.signal.sample_rate, size(samples.data, 2) + 1)
 
@@ -161,6 +115,11 @@ channel_count(samples::Samples) = channel_count(samples.signal)
     sample_count(samples::Samples)
 
 Return the number of multichannel samples in `samples` (i.e. `size(samples.data, 2)`)
+
+!!! warning
+    `sample_count(samples)` is not generally equivalent to `sample_count(samples.signal)`;
+    the former is the sample count of the entire original signal in the context of its parent
+    recording, whereas the latter is actual number of multichannel samples in `samples.data`.
 """
 sample_count(samples::Samples) = size(samples.data, 2)
 
@@ -170,12 +129,12 @@ sample_count(samples::Samples) = size(samples.data, 2)
 
 const VALID_SAMPLE_TYPE_UNION = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64}
 
-function encode_sample(::Type{S}, resolution_in_unit, sample_in_units,
-                       noise=zero(sample_in_units)) where {S<:VALID_SAMPLE_TYPE_UNION}
-    sample_in_units += noise
-    isnan(sample_in_units) && return typemax(S)
-    from_units = clamp(sample_in_units / resolution_in_unit, typemin(S), typemax(S))
-    return round(S, from_units)
+function encode_sample(::Type{S}, resolution_in_unit, offset_in_unit, sample_in_unit,
+                       noise=zero(sample_in_unit)) where {S<:VALID_SAMPLE_TYPE_UNION}
+    sample_in_unit += noise
+    isnan(sample_in_unit) && return typemax(S)
+    from_unit = clamp((sample_in_unit - offset_in_unit) / resolution_in_unit, typemin(S), typemax(S))
+    return round(S, from_unit)
 end
 
 function dither_noise!(rng::AbstractRNG, storage, step)
@@ -200,13 +159,14 @@ end
 #####
 
 """
-    encode(sample_type::DataType, sample_resolution_in_unit, samples, dither_storage=nothing)
+    encode(sample_type::DataType, sample_resolution_in_unit, sample_offset_in_unit,
+           samples, dither_storage=nothing)
 
-Return a copy of `samples` quantized according to `sample_type` and `sample_resolution_in_unit`.
-`sample_type` must be a concrete subtype of `Onda.VALID_SAMPLE_TYPE_UNION`. Quantization of an
-individual sample `s` is performed via:
+Return a copy of `samples` quantized according to `sample_type`, `sample_resolution_in_unit`,
+and `sample_offset_in_unit`. `sample_type` must be a concrete subtype of `Onda.VALID_SAMPLE_TYPE_UNION`.
+Quantization of an individual sample `s` is performed via:
 
-    round(S, s / sample_resolution_in_unit)
+    round(S, (s - sample_offset_in_unit) / sample_resolution_in_unit)
 
 with additional special casing to clip values exceeding the encoding's dynamic range.
 
@@ -219,28 +179,34 @@ Otherwise, `dither_storage` must be a container of similar shape and type to
 `samples`. This container is then used to store the random noise needed for the
 triangular dithering process, which is applied to the signal prior to quantization.
 """
-function encode(::Type{S}, sample_resolution_in_unit, samples, dither_storage=nothing) where {S}
-    return encode!(similar(samples, S), S, sample_resolution_in_unit, samples, dither_storage)
+function encode(::Type{S}, sample_resolution_in_unit, sample_offset_in_unit,
+                samples, dither_storage=nothing) where {S}
+    return encode!(similar(samples, S), S, sample_resolution_in_unit, sample_offset_in_unit,
+                   samples, dither_storage)
 end
 
 """
-    encode!(result_storage, sample_type::DataType, sample_resolution_in_unit, samples, dither_storage=nothing)
-    encode!(result_storage, sample_resolution_in_unit, samples, dither_storage=nothing)
+    encode!(result_storage, sample_type::DataType, sample_resolution_in_unit,
+            sample_offset_in_unit, samples, dither_storage=nothing)
+    encode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit,
+            samples, dither_storage=nothing)
 
-Similar to `encode(sample_type, sample_resolution_in_unit, samples, dither_storage)`,
+Similar to `encode(sample_type, sample_resolution_in_unit, sample_offset_in_unit, samples, dither_storage)`,
 but write encoded values to `result_storage` rather than allocating new storage.
 
 `sample_type` defaults to `eltype(result_storage)` if it is not provided.
 """
-function encode!(result_storage, sample_resolution_in_unit, samples, dither_storage=nothing)
+function encode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit,
+                 samples, dither_storage=nothing)
     return encode!(result_storage, eltype(result_storage), sample_resolution_in_unit,
-                   samples, dither_storage=nothing)
+                   sample_offset_in_unit, samples, dither_storage=nothing)
 end
 
-function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, samples,
-                 dither_storage=nothing) where {S}
+function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, sample_offset_in_unit,
+                 samples, dither_storage=nothing) where {S}
     if dither_storage isa Nothing
-        broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, samples)
+        broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit,
+                   sample_offset_in_unit, samples)
     else
         if dither_storage isa Missing
             dither_storage = similar(samples)
@@ -248,7 +214,8 @@ function encode!(result_storage, ::Type{S}, sample_resolution_in_unit, samples,
             throw(DimensionMismatch("dithering storage container does not match shape of samples"))
         end
         dither_noise!(dither_storage, sample_resolution_in_unit)
-        broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, samples, dither_storage)
+        broadcast!(encode_sample, result_storage, S, sample_resolution_in_unit, sample_offset_in_unit,
+                   samples, dither_storage)
     end
     return result_storage
 end
@@ -260,6 +227,7 @@ If `samples.encoded` is `false`, return a `Samples` instance that wraps:
 
     encode(samples.signal.sample_type,
            samples.signal.sample_resolution_in_unit,
+           samples.signal.sample_offset_in_unit,
            samples.data, dither_storage)
 
 If `samples.encoded` is `true`, this function is the identity.
@@ -268,6 +236,7 @@ function encode(samples::Samples, dither_storage=nothing)
     samples.encoded && return samples
     data = encode(samples.signal.sample_type,
                   samples.signal.sample_resolution_in_unit,
+                  samples.signal.sample_offset_in_unit,
                   samples.data, dither_storage)
     return Samples(samples.signal, true, data)
 end
@@ -280,6 +249,7 @@ If `samples.encoded` is `false`, return a `Samples` instance that wraps:
     encode!(result_storage,
             samples.signal.sample_type,
             samples.signal.sample_resolution_in_unit,
+            samples.signal.sample_offset_in_unit,
             samples.data, dither_storage)`.
 
 If `samples.encoded` is `true`, return a `Samples` instance that wraps
@@ -292,6 +262,7 @@ function encode!(result_storage, samples::Samples, dither_storage=nothing)
     end
     encode!(result_storage, samples.signal.sample_type,
             samples.signal.sample_resolution_in_unit,
+            samples.signal.sample_offset_in_unit,
             samples.data, dither_storage)
     return Samples(samples.signal, true, result_storage)
 end
@@ -301,33 +272,38 @@ end
 #####
 
 """
-    decode(sample_resolution_in_unit, samples)
+    decode(sample_resolution_in_unit, sample_offset_in_unit, samples)
 
-Return `sample_resolution_in_unit .* samples`
+Return `sample_resolution_in_unit .* samples .+ sample_offset_in_unit`
 """
-decode(sample_resolution_in_unit, samples) = sample_resolution_in_unit .* samples
+function decode(sample_resolution_in_unit, sample_offset_in_unit, samples)
+    return sample_resolution_in_unit .* samples .+ sample_offset_in_unit
+end
 
 """
-    decode!(result_storage, sample_resolution_in_unit, samples)
+    decode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, samples)
 
-Similar to `decode(sample_resolution_in_unit, samples)`, but write decoded values
-to `result_storage` rather than allocating new storage.
+Similar to `decode(sample_resolution_in_unit, sample_offset_in_unit, samples)`, but
+write decoded values to `result_storage` rather than allocating new storage.
 """
-function decode!(result_storage, sample_resolution_in_unit, samples)
-    return broadcast!(*, result_storage, sample_resolution_in_unit, samples)
+function decode!(result_storage, sample_resolution_in_unit, sample_offset_in_unit, samples)
+    f = x -> sample_resolution_in_unit * x + sample_offset_in_unit
+    return broadcast!(f, result_storage, samples)
 end
 
 """
     decode(samples::Samples)
 
 If `samples.encoded` is `true`, return a `Samples` instance that wraps
-`decode(samples.signal.sample_resolution_in_unit, samples.data)`.
+`decode(samples.signal.sample_resolution_in_unit, samples.signal.sample_offset_in_unit, samples.data)`.
 
 If `samples.encoded` is `false`, this function is the identity.
 """
 function decode(samples::Samples)
     samples.encoded || return samples
-    data = decode(samples.signal.sample_resolution_in_unit, samples.data)
+    data = decode(samples.signal.sample_resolution_in_unit,
+                  samples.signal.sample_offset_in_unit,
+                  samples.data)
     return Samples(samples.signal, false, data)
 end
 
@@ -335,14 +311,15 @@ end
     decode!(result_storage, samples::Samples)
 
 If `samples.encoded` is `true`, return a `Samples` instance that wraps
-`decode!(result_storage, samples.signal.sample_resolution_in_unit, samples.data)`.
+`decode!(result_storage, samples.signal.sample_resolution_in_unit, samples.signal.sample_offset_in_unit, samples.data)`.
 
 If `samples.encoded` is `false`, return a `Samples` instance that wraps
 `copyto!(result_storage, samples.data)`.
 """
 function decode!(result_storage, samples::Samples)
     if samples.encoded
-        broadcast!(*, result_storage, samples.signal.sample_resolution_in_unit, samples.data)
+        decode!(result_storage, samples.signal.sample_resolution_in_unit,
+                samples.signal.sample_offset_in_unit, samples.data)
         return Samples(samples.signal, false, result_storage)
     end
     copyto!(result_storage, samples.data)
diff --git a/src/timespans.jl b/src/timespans.jl
index 1a6ab3a2..f30760b2 100644
--- a/src/timespans.jl
+++ b/src/timespans.jl
@@ -114,7 +114,7 @@ For `span::Period`, this function is the identity.
 duration(t::AbstractTimeSpan) = last(t) - first(t)
 duration(t::Period) = t
 
-nanoseconds_per_sample(sample_rate) = floor(Int, inv(sample_rate) * 1_000_000_000)
+nanoseconds_per_sample(sample_rate) = inv(sample_rate) * 1_000_000_000
 
 """
     index_from_time(sample_rate, sample_time)
@@ -193,7 +193,7 @@ julia> time_from_index(100, 101)
 """
 function time_from_index(sample_rate, index)
     index > 0 || throw(ArgumentError("`index` must be > 0"))
-    return Nanosecond((index - 1) * nanoseconds_per_sample(sample_rate))
+    return Nanosecond(ceil(Int, (index - 1) * nanoseconds_per_sample(sample_rate)))
 end
 
 """
diff --git a/test/dataset.jl b/test/dataset.jl
index 962611f5..3501c812 100644
--- a/test/dataset.jl
+++ b/test/dataset.jl
@@ -3,19 +3,21 @@ using Test, Onda, Dates, MsgPack
 @testset "round trip" begin
     mktempdir() do root
         # generate a test dataset
-        dataset = Dataset(joinpath(root, "test.onda"); create=true)
+        dataset = Dataset(joinpath(root, "test"); create=true)
         @test dataset isa Dataset
         @test isdir(dataset.path)
         @test isdir(joinpath(dataset.path, "samples"))
         duration_in_seconds = Second(10)
         duration_in_nanoseconds = Nanosecond(duration_in_seconds)
-        uuid, recording = create_recording!(dataset, duration_in_nanoseconds)
+        uuid, recording = create_recording!(dataset)
         Ts = (UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64)
+        sample_rate = 50.5
         signals = Dict(Symbol(:x, i) => Signal(Symbol.([:a, :b, :c], i),
-                                               Symbol(:unit, i), 0.25, T,
-                                               100, Symbol("lpcm.zst"), nothing)
+                                               Nanosecond(0), duration_in_nanoseconds,
+                                               Symbol(:unit, i), 0.25, i, T, sample_rate,
+                                               Symbol("lpcm.zst"), nothing)
                        for (i, T) in enumerate(Ts))
-        samples = Dict(k => Samples(v, true, rand(v.sample_type, 3, 100 * 10))
+        samples = Dict(k => Samples(v, true, rand(v.sample_type, 3, sample_count(v)))
                        for (k, v) in signals)
         for (name, s) in samples
             @test channel_count(s) == length(s.signal.channel_names)
@@ -34,9 +36,9 @@ using Test, Onda, Dates, MsgPack
             decode!(tmp, s)
             @test tmp == d.data
             tmp = similar(d.data)
-            decode!(tmp, s.signal.sample_resolution_in_unit, s.data)
+            decode!(tmp, s.signal.sample_resolution_in_unit, s.signal.sample_offset_in_unit, s.data)
             @test tmp == d.data
-            @test d.data == (s.data .* s.signal.sample_resolution_in_unit)
+            @test d.data == (s.data .* s.signal.sample_resolution_in_unit .+ s.signal.sample_offset_in_unit)
             if sizeof(s.signal.sample_type) >= 8
                 # decoding from 64-bit to floating point is fairly lossy
                 tmp = similar(s.data)
@@ -69,14 +71,14 @@ using Test, Onda, Dates, MsgPack
                 @test s[ch_inds, TimeSpan(t, t2)].data == s.data[ch_inds, i:j]
                 @test s[ch_inds, i:j].data == s.data[ch_inds, i:j]
             end
-            @test size(s[:, TimeSpan(0, Second(1))].data, 2) == s.signal.sample_rate
+            @test size(s[:, TimeSpan(0, Second(1))].data, 2) == floor(s.signal.sample_rate)
             for i in 1:length(chs)
                 @test channel(s, chs[i]) == i
                 @test channel(s, i) == chs[i]
                 @test channel(s.signal, chs[i]) == i
                 @test channel(s.signal, i) == chs[i]
             end
-            @test duration(s) == Nanosecond((100 * 10) * (1_000_000_000) // 100)
+            @test duration(s) == duration_in_seconds
             @test s[:, TimeSpan(0, duration(s))].data == s.data
             store!(dataset, uuid, name, s)
         end
@@ -84,7 +86,7 @@ using Test, Onda, Dates, MsgPack
 
         # read back in the test dataset, add some annotations
         old_dataset = dataset
-        dataset = Dataset(joinpath(root, "test.onda"))
+        dataset = Dataset(joinpath(root, "test"))
         @test length(dataset.recordings) == 1
         uuid, recording = first(dataset.recordings)
         x1 = load(dataset, uuid, :x1)
@@ -103,7 +105,7 @@ using Test, Onda, Dates, MsgPack
             @test xi[:, span].data == xs_span[name].data
         end
         for i in 1:3
-            annotate!(recording, Annotation("key_$i", "value_$i", Nanosecond(i), Nanosecond(i + rand(1:1000000))))
+            annotate!(recording, Annotation("value_$i", Nanosecond(i), Nanosecond(i + rand(1:1000000))))
         end
         save_recordings_file(dataset)
 
@@ -111,11 +113,11 @@ using Test, Onda, Dates, MsgPack
         old_uuid = uuid
         old_recording = recording
         old_dataset = dataset
-        dataset = Dataset(joinpath(root, "test.onda"))
+        dataset = Dataset(joinpath(root, "test"))
         uuid, recording = first(dataset.recordings)
         @test old_recording == recording
         delete!(dataset.recordings, uuid)
-        uuid, recording = create_recording!(dataset, old_recording.duration_in_nanoseconds)
+        uuid, recording = create_recording!(dataset)
         foreach(x -> annotate!(recording, x), old_recording.annotations)
         foreach(x -> store!(dataset, uuid, x, load(old_dataset, old_uuid, x)), keys(old_recording.signals))
         merge!(dataset, old_dataset, only_recordings=true)
@@ -125,19 +127,15 @@ using Test, Onda, Dates, MsgPack
         r2 = dataset.recordings[uuid]
         @test r2 == recording
         @test old_uuid != uuid
-        @test r1.duration_in_nanoseconds == r2.duration_in_nanoseconds
         @test r1.signals == r2.signals
         @test r1.annotations == r2.annotations
-        @test r1.custom == r2.custom
 
-        new_duration = r2.duration_in_nanoseconds + Nanosecond(1)
-        r3 = set_duration!(dataset, uuid, new_duration)
-        @test r3.signals === r2.signals
-        @test r3.annotations === r2.annotations
-        @test r3.custom === r2.custom
-        @test r3.duration_in_nanoseconds === new_duration
-        @test dataset.recordings[uuid] === r3
-        set_duration!(dataset, uuid, r2.duration_in_nanoseconds)
+        old_duration = duration(r2)
+        new_duration = old_duration + Nanosecond(1)
+        r2signals = set_span!(r2, TimeSpan(Nanosecond(0), new_duration))
+        @test keys(r2signals) == keys(r2.signals)
+        @test all(duration.(values(r2signals)) .== new_duration)
+        set_span!(r2, TimeSpan(Nanosecond(0), old_duration))
 
         r = dataset.recordings[uuid]
         original_signals_length = length(r.signals)
@@ -152,35 +150,38 @@ using Test, Onda, Dates, MsgPack
         store!(dataset, uuid, signal_name, signal_samples)
 
         # read back everything, but without assuming an order on the metadata
-        dataset = Dataset(joinpath(root, "test.onda"))
+        dataset = Dataset(joinpath(root, "test"))
         Onda.write_recordings_file(dataset.path,
                                    Onda.Header(dataset.header.onda_format_version, false),
                                    dataset.recordings)
-        dataset = Dataset(joinpath(root, "test.onda"))
+        dataset = Dataset(joinpath(root, "test"))
         @test Dict(old_uuid => old_recording) == dataset.recordings
         delete!(dataset, old_uuid)
         save_recordings_file(dataset)
 
         # read back the dataset that should now be empty
-        dataset = Dataset(joinpath(root, "test.onda"))
+        dataset = Dataset(joinpath(root, "test"))
         @test isempty(dataset.recordings)
         @test !isdir(joinpath(dataset.path, "samples", string(old_uuid)))
+
+        # make sure samples directory is appropriately created if not present
+        no_samples_path = joinpath(root, "no_samples_dir.onda")
+        mkdir(no_samples_path)
+        cp(joinpath(dataset.path, "recordings.msgpack.zst"), joinpath(no_samples_path, "recordings.msgpack.zst"))
+        Dataset(no_samples_path; create=false)
+        @test isdir(joinpath(no_samples_path, "samples"))
     end
 end
 
 @testset "Error conditions" begin
     mktempdir() do root
-        @test_throws ArgumentError Dataset(joinpath(root, "doesnt_end_with_onda"); create=true)
         mkdir(joinpath(root, "i_exist.onda"))
         touch(joinpath(root, "i_exist.onda", "memes"))
         @test_throws ArgumentError Dataset(joinpath(root, "i_exist.onda"); create=true)
-        mkdir(joinpath(root, "no_samples_dir.onda"))
-        @test_throws ArgumentError Dataset(joinpath(root, "no_samples_dir.onda"); create=false)
 
         dataset = Dataset(joinpath(root, "okay.onda"); create=true)
-        duration = Nanosecond(Second(10))
-        uuid, recording = create_recording!(dataset, duration)
-        signal = Signal([:a], :mv, 0.25, Int8, 100, Symbol("lpcm.zst"), nothing)
+        uuid, recording = create_recording!(dataset)
+        signal = Signal([:a], Nanosecond(0), Nanosecond(Second(10)), :mv, 0.25, 0.0, Int8, 100, Symbol("lpcm.zst"), nothing)
         @test_throws DimensionMismatch Samples(signal, true, rand(Int8, 2, 10))
         @test_throws ArgumentError Samples(signal, true, rand(Float32, 1, 10))
         samples = Samples(signal, true, rand(Int8, 1, 10 * 100))
@@ -188,14 +189,57 @@ end
         store!(dataset, uuid, :name_okay, samples)
         @test_throws ArgumentError store!(dataset, uuid, :name_okay, samples; overwrite=false)
 
-        @test_throws ArgumentError Annotation("hi", "there", Nanosecond(20), Nanosecond(4))
+        @test_throws ArgumentError Annotation("hi", Nanosecond(20), Nanosecond(4))
 
         mkdir(joinpath(root, "other.onda"))
         other = Dataset(joinpath(root, "other.onda"); create=true)  # Using existing empty directory
-        create_recording!(other, duration, nothing, uuid)
-        @test_throws ArgumentError create_recording!(other, duration, nothing, uuid)
+        create_recording!(other, uuid)
+        @test_throws ArgumentError create_recording!(other, uuid)
         store!(other, uuid, :cool_stuff, samples)
         @test_throws ErrorException merge!(dataset, other; only_recordings=false)
         @test_throws ArgumentError merge!(dataset, other; only_recordings=true)
     end
 end
+
+@testset "upgrade_onda_format_from_v0_2_to_v0_3!" begin
+    mktempdir() do new_path
+        old_path = joinpath(@__DIR__, "old_test_v0_2.onda")
+        cp(old_path, new_path; force=true)
+        dataset = Onda.upgrade_onda_format_from_v0_2_to_v0_3!(new_path, (k, v) -> string(k, '.', v))
+        @test dataset.path == new_path
+        @test dataset.header.onda_format_version == v"0.3.0"
+        @test dataset.header.ordered_keys
+        old_recordings = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(old_path, "recordings.msgpack.zst"))))[2]
+        new_customs = MsgPack.unpack(Onda.zstd_decompress(read(joinpath(new_path, "recordings_custom.msgpack.zst"))))
+        @test length(dataset.recordings) == 1
+        @test length(new_customs) == 1
+        uuid = first(keys(dataset.recordings))
+        recording = first(values(dataset.recordings))
+        old_recording = first(values(old_recordings))
+        @test string(uuid) == first(keys(new_customs)) == first(keys(old_recordings))
+        @test first(values(new_customs)) == old_recording["custom"]
+        sorted_annotations = sort(collect(recording.annotations); by=first)
+        sorted_old_annotations = sort(old_recording["annotations"]; by=(x -> x["start_nanosecond"]))
+        @test length(sorted_annotations) == length(sorted_old_annotations)
+        for (ann, old_ann) in zip(sorted_annotations, sorted_old_annotations)
+            @test ann.value == string(old_ann["key"], '.', old_ann["value"])
+            @test ann.start_nanosecond.value == old_ann["start_nanosecond"]
+            @test ann.stop_nanosecond.value == old_ann["stop_nanosecond"]
+        end
+        old_signals = old_recording["signals"]
+        @test keys(recording.signals) == Set(Symbol.(keys(old_signals)))
+        for (signal_name, signal) in recording.signals
+            old_signal = old_signals[string(signal_name)]
+            @test signal.channel_names == Symbol.(old_signal["channel_names"])
+            @test signal.start_nanosecond == Nanosecond(0)
+            @test signal.stop_nanosecond == Nanosecond(old_recording["duration_in_nanoseconds"])
+            @test signal.sample_unit == Symbol(old_signal["sample_unit"])
+            @test signal.sample_resolution_in_unit == old_signal["sample_resolution_in_unit"]
+            @test signal.sample_offset_in_unit == 0.0
+            @test signal.sample_type == Onda.julia_type_from_onda_sample_type(old_signal["sample_type"])
+            @test signal.sample_rate == old_signal["sample_rate"]
+            @test signal.file_extension == Symbol(old_signal["file_extension"])
+            @test signal.file_options == old_signal["file_options"]
+        end
+    end
+end
diff --git a/test/old_test_v0_2.onda/recordings.msgpack.zst b/test/old_test_v0_2.onda/recordings.msgpack.zst
new file mode 100644
index 00000000..70caa251
Binary files /dev/null and b/test/old_test_v0_2.onda/recordings.msgpack.zst differ
diff --git a/test/printing.jl b/test/printing.jl
index 49d06582..005bd0e2 100644
--- a/test/printing.jl
+++ b/test/printing.jl
@@ -3,27 +3,33 @@ using Test, Onda, Dates, Random, UUIDs
 @testset "pretty printing" begin
     @test repr(TimeSpan(6149872364198, 123412345678910)) == "TimeSpan(01:42:29.872364198, 34:16:52.345678910)"
 
-    signal = Signal([:a, :b, Symbol("c-d")], :unit, 0.25, Int16, 50, Symbol("lpcm.zst"), nothing)
+    signal = Signal([:a, :b, Symbol("c-d")], Nanosecond(3), Nanosecond(Second(12345)), :unit, 0.25, -0.5, Int16, 50.2, Symbol("lpcm.zst"), nothing)
     @test sprint(show, signal, context=(:compact => true)) == "Signal([:a, :b, Symbol(\"c-d\")])"
     @test sprint(show, signal) == """
                                   Signal:
                                     channel_names: [:a, :b, Symbol(\"c-d\")]
+                                    start_nanosecond: 3 nanoseconds (00:00:00.000000003)
+                                    stop_nanosecond: 12345000000000 nanoseconds (03:25:45.000000000)
                                     sample_unit: :unit
                                     sample_resolution_in_unit: 0.25
+                                    sample_offset_in_unit: -0.5
                                     sample_type: Int16
-                                    sample_rate: 50 Hz
+                                    sample_rate: 50.2 Hz
                                     file_extension: Symbol(\"lpcm.zst\")
                                     file_options: nothing"""
 
     samples = Samples(signal, true, rand(Random.MersenneTwister(0), signal.sample_type, 3, 5))
     @test sprint(show, samples, context=(:compact => true)) == "Samples(3×5 Array{Int16,2})"
     @test sprint(show, samples) == """
-                                   Samples (00:00:00.100000000):
+                                   Samples (00:00:00.099601594):
                                      signal.channel_names: [:a, :b, Symbol(\"c-d\")]
+                                     signal.start_nanosecond: 3 nanoseconds (00:00:00.000000003)
+                                     signal.stop_nanosecond: 12345000000000 nanoseconds (03:25:45.000000000)
                                      signal.sample_unit: :unit
                                      signal.sample_resolution_in_unit: 0.25
+                                     signal.sample_offset_in_unit: -0.5
                                      signal.sample_type: Int16
-                                     signal.sample_rate: 50 Hz
+                                     signal.sample_rate: 50.2 Hz
                                      signal.file_extension: Symbol(\"lpcm.zst\")
                                      signal.file_options: nothing
                                      encoded: true
@@ -32,35 +38,13 @@ using Test, Onda, Dates, Random, UUIDs
                                     20032  4760  27427  -20758   24287
                                     14240  5037   5598   -5888   21784
                                     16885   600  20880  -32493  -19305"""
-    annotations = Set(Annotation("key$i", "val", TimeSpan(0, 1)) for i in 1:10)
-    recording = Recording(Nanosecond(100_000_000), Dict(:test => signal), annotations, nothing)
-    recording_string = sprint(show, recording)
-    @test startswith(recording_string, """
-                                       Recording:
-                                         duration_in_nanoseconds: 100000000 nanoseconds (00:00:00.100000000; 0.1 seconds)
-                                         signals:
-                                           :test => Signal([:a, :b, Symbol(\"c-d\")])
-                                         annotations (10 total):""")
-    @test endswith(recording_string, "...and 5 more.\n  custom: nothing")
-    annotations = Set(reduce(vcat, [[Annotation("key$i", string(rand()), TimeSpan(0, 1)) for _ in 1:i] for i in 1:10]))
-    recording = Recording(Nanosecond(100_000_000), Dict(:test => signal), annotations, Dict(:a => 1, :b => 2, :c => 2))
+    annotations = Set(Annotation("val", TimeSpan(i, i + 1)) for i in 1:10)
+    recording = Recording(Dict(:test => signal), annotations)
     @test sprint(show, recording) == """
-                                     Recording:
-                                       duration_in_nanoseconds: 100000000 nanoseconds (00:00:00.100000000; 0.1 seconds)
+                                     Recording (03:25:45.000000000; 12345.0 seconds)
                                        signals:
                                          :test => Signal([:a, :b, Symbol(\"c-d\")])
-                                       annotations (55 total):
-                                         10 instance(s) of key10
-                                         9 instance(s) of key9
-                                         8 instance(s) of key8
-                                         7 instance(s) of key7
-                                         6 instance(s) of key6
-                                         ...and 5 more.
-                                       custom:
-                                     Dict{Symbol,Int64} with 3 entries:
-                                       :a => 1
-                                       :b => 2
-                                       :c => 2"""
+                                       annotations: (10 total)"""
     mktempdir() do root
         dataset = Dataset(joinpath(root, "test.onda"); create=true)
         @test sprint(show, dataset) == "Dataset($(dataset.path), 0 recordings)"
diff --git a/test/serialization.jl b/test/serialization.jl
index 5c0e8c9b..5eba71b7 100644
--- a/test/serialization.jl
+++ b/test/serialization.jl
@@ -1,9 +1,9 @@
-using Test, Onda, Random
+using Test, Onda, Random, Dates
 
 @testset "$(repr(name)) serializer" for (name, options) in [(:lpcm, nothing),
                                                         (Symbol("lpcm.zst"), Dict(:level => 2))]
-    signal = Signal([:a, :b, :c], :unit, 0.25, Int16, 50, name, options)
-    samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, 50 * 10))).data
+    signal = Signal([:a, :b, :c], Nanosecond(0), Nanosecond(0), :unit, 0.25, -0.5, Int16, 50.5, name, options)
+    samples = encode(Samples(signal, false, rand(MersenneTwister(1), 3, Int(50.5 * 10)))).data
     s = serializer(signal)
     bytes = serialize_lpcm(samples, s)
     name == :lpcm && @test bytes == reinterpret(UInt8, vec(samples))
diff --git a/test/timespans.jl b/test/timespans.jl
index 7a308f61..0fcb12aa 100644
--- a/test/timespans.jl
+++ b/test/timespans.jl
@@ -57,4 +57,12 @@ end
     @test index_from_time(100, Nanosecond(0)) == 1
     @test index_from_time(100, TimeSpan(Second(3), Second(6))) == 301:600
     @test index_from_time(100, TimeSpan(Second(1), Second(1))) == 101:101
+    # test non-integer sample rates
+    rate = 100.66
+    ns_per_sample = Onda.nanoseconds_per_sample(rate)
+    for i in 1:1000
+        t = Nanosecond(ceil(Int, (i - 1) * ns_per_sample))
+        @test index_from_time(rate, t) == i
+        @test time_from_index(rate, i) == t
+    end
 end