beacon-biosignals · ericphanson · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.4'  # Earliest supported version
+          - '1.7'  # Earliest supported version
           - '1'    # Latest release
           - 'nightly'
         os:

diff --git a/Project.toml b/Project.toml
@@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 [compat]
 BitIntegers = "0.2"
 FilePathsBase = "0.9.13"
-julia = "1.4"
+julia = "1.7"
 
 [extras]
 FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"

diff --git a/src/types.jl b/src/types.jl
@@ -8,15 +8,17 @@ const SUPPORTED_SAMPLE_TYPES = Union{EDF_SAMPLE_TYPE,BDF_SAMPLE_TYPE}
 ##### `EDF.Signal`
 #####
 
-const SIGNAL_HEADER_FIELDS = [(:label, 16),
-                              (:transducer_type, 80),
-                              (:physical_dimension, 8),
-                              (:physical_minimum, 8),
-                              (:physical_maximum, 8),
-                              (:digital_minimum, 8),
-                              (:digital_maximum, 8),
-                              (:prefilter, 80),
-                              (:samples_per_record, 8)]
+# List of triples of `(fieldname, byte_limit, allow_scientific_notation)`
+# describing how these header fields should be written
+const SIGNAL_HEADER_FIELDS = [(:label, 16, false),
+                              (:transducer_type, 80, false),
+                              (:physical_dimension, 8, false),
+                              (:physical_minimum, 8, true),
+                              (:physical_maximum, 8, true),
+                              (:digital_minimum, 8, true),
+                              (:digital_maximum, 8, true),
+                              (:prefilter, 80, false),
+                              (:samples_per_record, 8, false)]
 
 """
     EDF.SignalHeader

diff --git a/src/write.jl b/src/write.jl
@@ -2,52 +2,50 @@
 ##### utilities
 #####
 
-_edf_repr(value::Union{String,Char}) = value
-_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
-_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")
-
-# XXX this is really really hacky and doesn't support use of scientific notation
-# where appropriate; keep in mind if you do improve this to support scientific
-# notation, that scientific is NOT allowed in EDF annotation onset/duration fields
-function _edf_repr(x::Real)
-    result = missing
-    if isinteger(x)
-        str = string(trunc(Int, x))
-        if length(str) <= 8
-            result = str
-        end
-    else
-        fpart, ipart = modf(x)
-        ipart_str = string('-'^signbit(x), Int(abs(ipart))) # handles `-0.0` case
-        fpart_str = @sprintf "%.7f" abs(fpart)
-        fpart_str = fpart_str[3:end] # remove leading `0.`
-        if length(ipart_str) < 7
-            result = ipart_str * '.' * fpart_str[1:(7 - length(ipart_str))]
-        elseif length(ipart_str) <= 8
-            result = ipart_str
+const FORMATS = (; G=[Printf.Format("%.$(i)G") for i in 8:-1:1],
+                 F=[Printf.Format("%.$(i)F") for i in 8:-1:1])
+
+function _edf_repr(x::Real, allow_scientific::Bool=false)
+    fmts = allow_scientific ? FORMATS.G : FORMATS.F
+    for fmt in fmts
+        str = Printf.format(fmt, x)
+        # Remove extra zero in scientific notation, e.g. `1E+01` becomes `1E+1`
+        # Also drop decimal if all digits are zero: `123.0000` to `123`
+        str = replace(str, r"(E[+-])0" => s"\1", r"\.0+$" => "")
+        # Removing trailing 0's after decimal place
+        if contains(str, '.') && !allow_scientific
+            str = rstrip(str, '0')
         end
-    end
-    if !ismissing(result)
-        if all(c -> c in ('0', '.', '-'), result)
-            x == 0 && return result
-        else
-            return result
+        str = strip(str)
+        if length(str) <= 8
+            if str == "0" && x != 0
+                @warn "Underflow to zero when writing number `$x` to 8-character ASCII" maxlog=10
+            end
+            return String(str)
         end
     end
-    error("failed to fit number into EDF's 8 ASCII character limit: $x")
+    throw(ArgumentError("cannot represent $x in 8 ASCII characters"))
 end
 
+_edf_repr(value, ::Any) = _edf_repr(value)
+
+_edf_repr(value::Union{String,Char}) = value
+_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
+_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")
+
 _edf_metadata_repr(::Missing) = 'X'
 _edf_metadata_repr(x) = _edf_repr(x)
 
-function _edf_repr(metadata::T) where T<:Union{PatientID,RecordingID}
+function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}}
     header = T <: RecordingID ? String["Startdate"] : String[]
+    # None of the fields of `PatientID` or `RecordingID` are floating point, so we don't need
+    # to worry about passing `allow_scientific=true`.
     return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], ' ')
 end
 
-function edf_write(io::IO, value, byte_limit::Integer)
-    edf_value = _edf_repr(value)
-    sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value")
+function edf_write(io::IO, value, byte_limit::Integer; allow_scientific=false)
+    edf_value = _edf_repr(value, allow_scientific)
+    sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: `$value`. Representation: `$edf_value`")
     bytes_written = Base.write(io, edf_value)
     while bytes_written < byte_limit
         bytes_written += Base.write(io, UInt8(' '))
@@ -86,13 +84,13 @@ function write_header(io::IO, file::File)
     bytes_written += edf_write(io, expected_bytes_written, 8)
     bytes_written += edf_write(io, file.header.is_contiguous ? "EDF+C" : "EDF+D", 44)
     bytes_written += edf_write(io, file.header.record_count, 8)
-    bytes_written += edf_write(io, file.header.seconds_per_record, 8)
+    bytes_written += edf_write(io, file.header.seconds_per_record, 8; allow_scientific=true)
     bytes_written += edf_write(io, length(file.signals), 4)
     signal_headers = SignalHeader.(file.signals)
-    for (field_name, byte_limit) in SIGNAL_HEADER_FIELDS
+    for (field_name, byte_limit, allow_scientific) in SIGNAL_HEADER_FIELDS
         for signal_header in signal_headers
             field = getfield(signal_header, field_name)
-            bytes_written += edf_write(io, field, byte_limit)
+            bytes_written += edf_write(io, field, byte_limit; allow_scientific)
         end
     end
     bytes_written += edf_write(io, ' ', 32 * length(file.signals))
@@ -138,10 +136,11 @@ function write_tal(io::IO, tal::TimestampedAnnotationList)
     if !signbit(tal.onset_in_seconds) # otherwise, the `-` will already be in number string
         bytes_written += Base.write(io, '+')
     end
+    # We do not pass `allow_scientific=true`, since that is not allowed for onset or durations
     bytes_written += Base.write(io, _edf_repr(tal.onset_in_seconds))
     if tal.duration_in_seconds !== nothing
         bytes_written += Base.write(io, 0x15)
-        bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds))
+        bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds)) # again, no `allow_scientific=true`
     end
     if isempty(tal.annotations)
         bytes_written += Base.write(io, 0x14)

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,6 @@
 using EDF
 using EDF: TimestampedAnnotationList, PatientID, RecordingID, SignalHeader,
-           Signal, AnnotationsSignal
+           Signal, AnnotationsSignal, _edf_repr
 using Dates
 using FilePathsBase
 using Test
@@ -40,6 +40,43 @@ deep_equal(a::T, b::S) where {T,S} = false
 const DATADIR = joinpath(@__DIR__, "data")
 
 @testset "Just Do It" begin
+
+    @testset "Additional _edf_repr(x::Real, allow_scientific::Bool) tests" begin
+
+        # Moderate sized numbers - should be the same either way
+        for allow_scientific in (true, false)
+            @test _edf_repr(0.123, allow_scientific) == "0.123"
+            @test _edf_repr(1.123, allow_scientific) == "1.123"
+            @test _edf_repr(10.123, allow_scientific) == "10.123"
+
+            @test _edf_repr(123, allow_scientific) == "123"
+            @test _edf_repr(123 + eps(Float64), allow_scientific) == "123"
+
+            # Moderate numbers, many digits
+            @test _edf_repr(0.8945620050698592, false) == "0.894562"
+        end
+
+        # Large numbers / few digits
+        @test _edf_repr(0.123e10, true) == "1.23E+9"
+        # decimal version cannot handle it:
+        err = ArgumentError("cannot represent 1.23e9 in 8 ASCII characters")
+        @test_throws err _edf_repr(0.123e10, false)
+
+        # Large numbers / many digits
+        @test _edf_repr(0.8945620050698592e10, true) == "8.946E+9"
+        err = ArgumentError("cannot represent 8.945620050698591e9 in 8 ASCII characters")
+        @test_throws err _edf_repr(0.8945620050698592e10, false)
+
+        # Small numbers / few digits
+        @test _edf_repr(0.123e-10, true) == "1.23E-11"
+        # decimal version underflows:
+        @test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.123e-10, false)) == "0"
+
+        # Small numbers / many digits
+        @test _edf_repr(0.8945620050698592e-10, true) == "8.95E-11"
+        @test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.8945620050698592e-10, false)) == "0"
+    end
+
     # test EDF.read(::AbstractString)
     edf = EDF.read(joinpath(DATADIR, "test.edf"))
     @test sprint(show, edf) == "EDF.File with 140 16-bit-encoded signals"
@@ -68,7 +105,7 @@ const DATADIR = joinpath(@__DIR__, "data")
                         [TimestampedAnnotationList(4.0, nothing, String[""]), TimestampedAnnotationList(2.5, 2.5, ["type A"])],
                         [TimestampedAnnotationList(5.0, nothing, String[""])]]
             @test all(signal.records .== expected)
-            @test AnnotationsSignal(signal.records).samples_per_record == 16
+            @test AnnotationsSignal(signal.records).samples_per_record == 14
         end
     end
 
@@ -113,19 +150,35 @@ const DATADIR = joinpath(@__DIR__, "data")
     end
 
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-0.0023405432)) == "-0.00234"
-    @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002340"
+    @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002341"
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343"
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054"
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1011.05432)) == "-1011.05"
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442"
     @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441"
     @test EDF._edf_repr(34577777) == "34577777"
-    @test EDF._edf_repr(0.0345) == "0.034500"
-    @test EDF._edf_repr(-0.02) == "-0.02000"
+    @test EDF._edf_repr(0.0345) == "0.0345"
+    @test EDF._edf_repr(-0.02) == "-0.02"
     @test EDF._edf_repr(-187.74445) == "-187.744"
-    @test_throws ErrorException EDF._edf_repr(123456789)
-    @test_throws ErrorException EDF._edf_repr(-12345678)
-    @test_throws ErrorException EDF._edf_repr(0.00000000024)
+    @test_throws ArgumentError EDF._edf_repr(123456789)
+    @test_throws ArgumentError EDF._edf_repr(-12345678)
+
+    @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(4.180821e-7)) == "0"
+    @test EDF._edf_repr(4.180821e-7, true) == "4.181E-7"
+
+    @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(floatmin(Float64))) == "0"
+    @test EDF._edf_repr(floatmin(Float64), true) == "2.2E-308"
+
+    @test_throws ArgumentError EDF._edf_repr(floatmax(Float64))
+    @test EDF._edf_repr(floatmax(Float64), true) == "1.8E+308"
+
+    # We still get errors if we too "big" (in the exponent)
+    @test_throws ArgumentError EDF._edf_repr(big"1e-999999", true)
+    @test_throws ArgumentError EDF._edf_repr(big"1e999999", true)
+
+    # if we don't allow scientific notation, we allow rounding down here
+    @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(0.00000000024)) == "0"
+    @test EDF._edf_repr(0.00000000024, true) == "2.4E-10"
     @test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4)
 
     uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf"))
@@ -246,10 +299,10 @@ end
 
 @testset "BDF+ Files" begin
     # This is a `BDF+` file containing only trigger information.
-    # It is similiar to a `EDF Annotations` file except that 
+    # It is similiar to a `EDF Annotations` file except that
     # The `ANNOTATIONS_SIGNAL_LABEL` is `BDF Annotations`.
-    # The test data has 1081 trigger events, and 
-    # has 180 trials in total, and 
+    # The test data has 1081 trigger events, and
+    # has 180 trials in total, and
     # The annotation `255` signifies the offset of a trial.
     # More information, contact: zhanlikan@hotmail.com
     evt = EDF.read(joinpath(DATADIR, "evt.bdf"))