respond to review feedback

beacon-biosignals · Oct 16, 2023 · 769fcda · 769fcda
1 parent ff6fc63
commit 769fcda
Show file tree

Hide file tree

Showing 5 changed files with 62 additions and 126 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -12,7 +12,7 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1.6'  # Earliest supported version
+          - '1.7'  # Earliest supported version
           - '1'    # Latest release
           - 'nightly'
         os:

diff --git a/Project.toml b/Project.toml
@@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 [compat]
 BitIntegers = "0.2"
 FilePathsBase = "0.9.13"
-julia = "1.6"
+julia = "1.7"
 
 [extras]
 FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"

diff --git a/src/types.jl b/src/types.jl
@@ -8,6 +8,8 @@ const SUPPORTED_SAMPLE_TYPES = Union{EDF_SAMPLE_TYPE,BDF_SAMPLE_TYPE}
 ##### `EDF.Signal`
 #####
 
+# List of triples of `(fieldname, byte_limit, allow_scientific_notation)`
+# describing how these header fields should be written
 const SIGNAL_HEADER_FIELDS = [(:label, 16, false),
                               (:transducer_type, 80, false),
                               (:physical_dimension, 8, false),

diff --git a/src/write.jl b/src/write.jl
@@ -2,115 +2,49 @@
 ##### utilities
 #####
 
-# `allow_scientific` is only meaningful for `value::Number`. We allow passing it though,
-# so `edf_write` can be more generic.
-_edf_repr(value::Union{String,Char}; allow_scientific=nothing) = value
-_edf_repr(date::Date; allow_scientific=nothing) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
-_edf_repr(date::DateTime; allow_scientific=nothing) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")
-
-"""
-    sprintf_G_under_8(x) -> String
-
-Return a string of length at most 8, written in either scientific notation (using capital-'E'),
-or in decimal format, using as much precision as possible.
-"""
-function sprintf_G_under_8(x)
-    shorten = str -> begin
-        if contains(str, 'E')
-            # Remove extraneous 0's in exponential notation
-            str = replace(str, "E-0" => "E-")
-            str = replace(str, "E+0" => "E+")
-            # Remove any leading/trailing whitespace
-            return strip(str)
-        else
-            # Decimal format. Call out to `trim_F`
-            return trim_F(str)
+const FORMATS = (; G=[Printf.Format("%.$(i)G") for i in 8:-1:1],
+                 F=[Printf.Format("%.$(i)F") for i in 8:-1:1])
+
+function _edf_repr(x::Real, allow_scientific::Bool=false)
+    fmts = allow_scientific ? FORMATS.G : FORMATS.F
+    for fmt in fmts
+        str = Printf.format(fmt, x)
+        # Remove extra zero in scientific notation, e.g. `1E+01` becomes `1E+1`
+        # Also drop decimal if all digits are zero: `123.0000` to `123`
+        str = replace(str, r"(E[+-])0" => s"\1", r"\.0+$" => "")
+        # Removing trailing 0's after decimal place
+        if contains(str, '.') && !allow_scientific
+            str = rstrip(str, '0')
+        end
+        str = strip(str)
+        if length(str) <= 8
+            if str == "0" && x != 0
+                @warn "Underflow to zero when writing number `$x` to 8-character ASCII" maxlog=10
+            end
+            return String(str)
         end
     end
-    # Strategy:
-    # `@printf("%0.NG", x)` means:
-    # - `G`: Use the shortest representation: %E or %F. That is, scientific notation (with capital E) or decimals, whichever is shorted
-    # - `.N`: (for literal `N`, like `5`): the maximum number of significant digits to be printed
-    # However, `@printf("%0.NG", x)` may have more than `N` characters (e.g. presence of E, and the values for the exponent, the decimal place, etc)
-    # So we start from 8 (most precision), and stop as soon as we get under 8 characters
-    sig_8 = shorten(@sprintf("%.8G", x))
-    length(sig_8) <= 8 && return sig_8
-    sig_7 = shorten(@sprintf("%.7G", x))
-    length(sig_7) <= 8 && return sig_7
-    sig_6 = shorten(@sprintf("%.6G", x))
-    length(sig_6) <= 8 && return sig_6
-    sig_5 = shorten(@sprintf("%.5G", x))
-    length(sig_5) <= 8 && return sig_5
-    sig_4 = shorten(@sprintf("%.4G", x))
-    length(sig_4) <= 8 && return sig_4
-    sig_3 = shorten(@sprintf("%.3G", x))
-    length(sig_3) <= 8 && return sig_3
-    sig_2 = shorten(@sprintf("%.2G", x))
-    length(sig_2) <= 8 && return sig_2
-    sig_1 = shorten(@sprintf("%.1G", x))
-    length(sig_1) <= 8 && return sig_1
-    error("failed to fit number into EDF's 8 ASCII character limit: $x")
-end
-
-function trim_F(str)
-    if contains(str, '.')
-        # Remove trailing 0's after the decimal point
-        str = rstrip(str, '0')
-        # If the `.` is at the end now, strip it
-        str = rstrip(str, '.')
-    end
-    # Removing leading or trailing whitespace
-    str = strip(str)
-    return str
+    throw(ArgumentError("cannot represent $x in 8 ASCII characters"))
 end
 
-function sprintf_F_under_8(x)
-    shorten = trim_F
-    # Strategy:
-    # `@printf("%0.NF", x)` means:
-    # - `F`: print with decimals
-    # - `.N`: (for literal `N`, like `5`): the maximum number of digits to print after the decimal
-    # However, `@printf("%0.NF", x)` may have more than `N` characters (e.g. digits to the left of the decimal point)
-    # So we start from 8 (most precision), and stop as soon as we get under 8 characters
-    sig_8 = shorten(@sprintf("%.8F", x))
-    length(sig_8) <= 8 && return sig_8
-    sig_7 = shorten(@sprintf("%.7F", x))
-    length(sig_7) <= 8 && return sig_7
-    sig_6 = shorten(@sprintf("%.6F", x))
-    length(sig_6) <= 8 && return sig_6
-    sig_5 = shorten(@sprintf("%.5F", x))
-    length(sig_5) <= 8 && return sig_5
-    sig_4 = shorten(@sprintf("%.4F", x))
-    length(sig_4) <= 8 && return sig_4
-    sig_3 = shorten(@sprintf("%.3F", x))
-    length(sig_3) <= 8 && return sig_3
-    sig_2 = shorten(@sprintf("%.2F", x))
-    length(sig_2) <= 8 && return sig_2
-    sig_1 = shorten(@sprintf("%.1F", x))
-    length(sig_1) <= 8 && return sig_1
-    error("failed to fit number into EDF's 8 ASCII character limit: $x")
-end
+_edf_repr(value, ::Any) = _edf_repr(value)
 
-function _edf_repr(x::Real; allow_scientific=false)
-    if allow_scientific
-        return sprintf_G_under_8(x)
-    else
-        return sprintf_F_under_8(x)
-    end
-end
+_edf_repr(value::Union{String,Char}) = value
+_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
+_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")
 
 _edf_metadata_repr(::Missing) = 'X'
 _edf_metadata_repr(x) = _edf_repr(x)
 
-function _edf_repr(metadata::T; allow_scientific=nothing) where {T<:Union{PatientID,RecordingID}}
+function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}}
     header = T <: RecordingID ? String["Startdate"] : String[]
     # None of the fields of `PatientID` or `RecordingID` are floating point, so we don't need
     # to worry about passing `allow_scientific=true`.
     return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], ' ')
 end
 
 function edf_write(io::IO, value, byte_limit::Integer; allow_scientific=false)
-    edf_value = _edf_repr(value; allow_scientific)
+    edf_value = _edf_repr(value, allow_scientific)
     sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: `$value`. Representation: `$edf_value`")
     bytes_written = Base.write(io, edf_value)
     while bytes_written < byte_limit

diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,6 +1,6 @@
 using EDF
 using EDF: TimestampedAnnotationList, PatientID, RecordingID, SignalHeader,
-           Signal, AnnotationsSignal, sprintf_G_under_8, sprintf_F_under_8
+           Signal, AnnotationsSignal, _edf_repr
 using Dates
 using FilePathsBase
 using Test
@@ -41,40 +41,40 @@ const DATADIR = joinpath(@__DIR__, "data")
 
 @testset "Just Do It" begin
 
-    @testset "sprintf functionality" begin
-        # Moderate sized numbers
-        @test sprintf_G_under_8(0.123) == "0.123"
-        @test sprintf_F_under_8(0.123) == "0.123"
+    @testset "_edf_repr(x::Real, allow_scientific::Bool)" begin
 
-        @test sprintf_G_under_8(1.123) == "1.123"
-        @test sprintf_F_under_8(1.123) == "1.123"
+        # Moderate sized numbers - should be the same either way
+        for allow_scientific in (true, false)
+            @test _edf_repr(0.123, allow_scientific) == "0.123"
+            @test _edf_repr(1.123, allow_scientific) == "1.123"
+            @test _edf_repr(10.123, allow_scientific) == "10.123"
 
-        @test sprintf_G_under_8(10.123) == "10.123"
-        @test sprintf_F_under_8(10.123) == "10.123"
+            @test _edf_repr(123, allow_scientific) == "123"
+            @test _edf_repr(123 + eps(Float64), allow_scientific) == "123"
 
-        # Moderate numbers, many digits
-        @test sprintf_G_under_8(0.8945620050698592) == "0.894562"
-        @test sprintf_F_under_8(0.8945620050698592) == "0.894562"
+            # Moderate numbers, many digits
+            @test _edf_repr(0.8945620050698592, false) == "0.894562"
+        end
 
         # Large numbers / few digits
-        @test sprintf_G_under_8(0.123e10) == "1.23E+9"
+        @test _edf_repr(0.123e10, true) == "1.23E+9"
         # decimal version cannot handle it:
-        err = ErrorException("failed to fit number into EDF's 8 ASCII character limit: 1.23e9")
-        @test_throws err sprintf_F_under_8(0.123e10)
+        err = ArgumentError("cannot represent 1.23e9 in 8 ASCII characters")
+        @test_throws err _edf_repr(0.123e10, false)
 
         # Large numbers / many digits
-        @test sprintf_G_under_8(0.8945620050698592e10) == "8.946E+9"
-        err = ErrorException("failed to fit number into EDF's 8 ASCII character limit: 8.945620050698591e9")
-        @test_throws err sprintf_F_under_8(0.8945620050698592e10)
+        @test _edf_repr(0.8945620050698592e10, true) == "8.946E+9"
+        err = ArgumentError("cannot represent 8.945620050698591e9 in 8 ASCII characters")
+        @test_throws err _edf_repr(0.8945620050698592e10, false)
 
         # Small numbers / few digits
-        @test sprintf_G_under_8(0.123e-10) == "1.23E-11"
+        @test _edf_repr(0.123e-10, true) == "1.23E-11"
         # decimal version underflows:
-        @test sprintf_F_under_8(0.123e-10) == "0"
+        @test _edf_repr(0.123e-10, false) == "0"
 
         # Small numbers / many digits
-        @test sprintf_G_under_8(0.8945620050698592e-10) == "8.95E-11"
-        @test sprintf_F_under_8(0.8945620050698592e-10) == "0"
+        @test _edf_repr(0.8945620050698592e-10, true) == "8.95E-11"
+        @test _edf_repr(0.8945620050698592e-10, false) == "0"
     end
 
     # test EDF.read(::AbstractString)
@@ -160,25 +160,25 @@ const DATADIR = joinpath(@__DIR__, "data")
     @test EDF._edf_repr(0.0345) == "0.0345"
     @test EDF._edf_repr(-0.02) == "-0.02"
     @test EDF._edf_repr(-187.74445) == "-187.744"
-    @test_throws ErrorException EDF._edf_repr(123456789)
-    @test_throws ErrorException EDF._edf_repr(-12345678)
+    @test_throws ArgumentError EDF._edf_repr(123456789)
+    @test_throws ArgumentError EDF._edf_repr(-12345678)
 
     @test EDF._edf_repr(4.180821e-7) == "0"
-    @test EDF._edf_repr(4.180821e-7; allow_scientific=true) == "4.181E-7"
+    @test EDF._edf_repr(4.180821e-7, true) == "4.181E-7"
 
     @test EDF._edf_repr(floatmin(Float64)) == "0"
-    @test EDF._edf_repr(floatmin(Float64); allow_scientific=true) == "2.2E-308"
+    @test EDF._edf_repr(floatmin(Float64), true) == "2.2E-308"
 
-    @test_throws ErrorException EDF._edf_repr(floatmax(Float64)) == "0"
-    @test EDF._edf_repr(floatmax(Float64); allow_scientific=true) == "1.8E+308"
+    @test_throws ArgumentError EDF._edf_repr(floatmax(Float64))
+    @test EDF._edf_repr(floatmax(Float64), true) == "1.8E+308"
 
     # We still get errors if we too "big" (in the exponent)
-    @test_throws ErrorException EDF._edf_repr(big"1e-999999"; allow_scientific=true)
-    @test_throws ErrorException EDF._edf_repr(big"1e999999"; allow_scientific=true)
+    @test_throws ArgumentError EDF._edf_repr(big"1e-999999", true)
+    @test_throws ArgumentError EDF._edf_repr(big"1e999999", true)
 
     # if we don't allow scientific notation, we allow rounding down here
     @test EDF._edf_repr(0.00000000024) == "0"
-    @test EDF._edf_repr(0.00000000024; allow_scientific=true) == "2.4E-10"
+    @test EDF._edf_repr(0.00000000024, true) == "2.4E-10"
     @test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4)
 
     uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf"))