diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8a8b63c..ba27fc7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: fail-fast: false matrix: version: - - '1.4' # Earliest supported version + - '1.7' # Earliest supported version - '1' # Latest release - 'nightly' os: diff --git a/Project.toml b/Project.toml index 9deffef..aebe0b8 100644 --- a/Project.toml +++ b/Project.toml @@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" [compat] BitIntegers = "0.2" FilePathsBase = "0.9.13" -julia = "1.4" +julia = "1.7" [extras] FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f" diff --git a/src/types.jl b/src/types.jl index 37eeaa1..0555c2d 100644 --- a/src/types.jl +++ b/src/types.jl @@ -8,15 +8,17 @@ const SUPPORTED_SAMPLE_TYPES = Union{EDF_SAMPLE_TYPE,BDF_SAMPLE_TYPE} ##### `EDF.Signal` ##### -const SIGNAL_HEADER_FIELDS = [(:label, 16), - (:transducer_type, 80), - (:physical_dimension, 8), - (:physical_minimum, 8), - (:physical_maximum, 8), - (:digital_minimum, 8), - (:digital_maximum, 8), - (:prefilter, 80), - (:samples_per_record, 8)] +# List of triples of `(fieldname, byte_limit, allow_scientific_notation)` +# describing how these header fields should be written +const SIGNAL_HEADER_FIELDS = [(:label, 16, false), + (:transducer_type, 80, false), + (:physical_dimension, 8, false), + (:physical_minimum, 8, true), + (:physical_maximum, 8, true), + (:digital_minimum, 8, true), + (:digital_maximum, 8, true), + (:prefilter, 80, false), + (:samples_per_record, 8, false)] """ EDF.SignalHeader diff --git a/src/write.jl b/src/write.jl index e1e327f..0c0741b 100644 --- a/src/write.jl +++ b/src/write.jl @@ -2,52 +2,50 @@ ##### utilities ##### -_edf_repr(value::Union{String,Char}) = value -_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy")) -_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") - -# XXX this is really really hacky and doesn't support use of scientific notation -# where appropriate; keep in mind if you do improve this to support scientific -# notation, that scientific is NOT allowed in EDF annotation onset/duration fields -function _edf_repr(x::Real) - result = missing - if isinteger(x) - str = string(trunc(Int, x)) - if length(str) <= 8 - result = str - end - else - fpart, ipart = modf(x) - ipart_str = string('-'^signbit(x), Int(abs(ipart))) # handles `-0.0` case - fpart_str = @sprintf "%.7f" abs(fpart) - fpart_str = fpart_str[3:end] # remove leading `0.` - if length(ipart_str) < 7 - result = ipart_str * '.' * fpart_str[1:(7 - length(ipart_str))] - elseif length(ipart_str) <= 8 - result = ipart_str +const FORMATS = (; G=[Printf.Format("%.$(i)G") for i in 8:-1:1], + F=[Printf.Format("%.$(i)F") for i in 8:-1:1]) + +function _edf_repr(x::Real, allow_scientific::Bool=false) + fmts = allow_scientific ? FORMATS.G : FORMATS.F + for fmt in fmts + str = Printf.format(fmt, x) + # Remove extra zero in scientific notation, e.g. `1E+01` becomes `1E+1` + # Also drop decimal if all digits are zero: `123.0000` to `123` + str = replace(str, r"(E[+-])0" => s"\1", r"\.0+$" => "") + # Removing trailing 0's after decimal place + if contains(str, '.') && !allow_scientific + str = rstrip(str, '0') end - end - if !ismissing(result) - if all(c -> c in ('0', '.', '-'), result) - x == 0 && return result - else - return result + str = strip(str) + if length(str) <= 8 + if str == "0" && x != 0 + @warn "Underflow to zero when writing number `$x` to 8-character ASCII" maxlog=10 + end + return String(str) end end - error("failed to fit number into EDF's 8 ASCII character limit: $x") + throw(ArgumentError("cannot represent $x in 8 ASCII characters")) end +_edf_repr(value, ::Any) = _edf_repr(value) + +_edf_repr(value::Union{String,Char}) = value +_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy")) +_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS") + _edf_metadata_repr(::Missing) = 'X' _edf_metadata_repr(x) = _edf_repr(x) -function _edf_repr(metadata::T) where T<:Union{PatientID,RecordingID} +function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}} header = T <: RecordingID ? String["Startdate"] : String[] + # None of the fields of `PatientID` or `RecordingID` are floating point, so we don't need + # to worry about passing `allow_scientific=true`. return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], ' ') end -function edf_write(io::IO, value, byte_limit::Integer) - edf_value = _edf_repr(value) - sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value") +function edf_write(io::IO, value, byte_limit::Integer; allow_scientific=false) + edf_value = _edf_repr(value, allow_scientific) + sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: `$value`. Representation: `$edf_value`") bytes_written = Base.write(io, edf_value) while bytes_written < byte_limit bytes_written += Base.write(io, UInt8(' ')) @@ -86,13 +84,13 @@ function write_header(io::IO, file::File) bytes_written += edf_write(io, expected_bytes_written, 8) bytes_written += edf_write(io, file.header.is_contiguous ? "EDF+C" : "EDF+D", 44) bytes_written += edf_write(io, file.header.record_count, 8) - bytes_written += edf_write(io, file.header.seconds_per_record, 8) + bytes_written += edf_write(io, file.header.seconds_per_record, 8; allow_scientific=true) bytes_written += edf_write(io, length(file.signals), 4) signal_headers = SignalHeader.(file.signals) - for (field_name, byte_limit) in SIGNAL_HEADER_FIELDS + for (field_name, byte_limit, allow_scientific) in SIGNAL_HEADER_FIELDS for signal_header in signal_headers field = getfield(signal_header, field_name) - bytes_written += edf_write(io, field, byte_limit) + bytes_written += edf_write(io, field, byte_limit; allow_scientific) end end bytes_written += edf_write(io, ' ', 32 * length(file.signals)) @@ -138,10 +136,11 @@ function write_tal(io::IO, tal::TimestampedAnnotationList) if !signbit(tal.onset_in_seconds) # otherwise, the `-` will already be in number string bytes_written += Base.write(io, '+') end + # We do not pass `allow_scientific=true`, since that is not allowed for onset or durations bytes_written += Base.write(io, _edf_repr(tal.onset_in_seconds)) if tal.duration_in_seconds !== nothing bytes_written += Base.write(io, 0x15) - bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds)) + bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds)) # again, no `allow_scientific=true` end if isempty(tal.annotations) bytes_written += Base.write(io, 0x14) diff --git a/test/runtests.jl b/test/runtests.jl index 3a6e18a..09f13d8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,6 @@ using EDF using EDF: TimestampedAnnotationList, PatientID, RecordingID, SignalHeader, - Signal, AnnotationsSignal + Signal, AnnotationsSignal, _edf_repr using Dates using FilePathsBase using Test @@ -40,6 +40,43 @@ deep_equal(a::T, b::S) where {T,S} = false const DATADIR = joinpath(@__DIR__, "data") @testset "Just Do It" begin + + @testset "Additional _edf_repr(x::Real, allow_scientific::Bool) tests" begin + + # Moderate sized numbers - should be the same either way + for allow_scientific in (true, false) + @test _edf_repr(0.123, allow_scientific) == "0.123" + @test _edf_repr(1.123, allow_scientific) == "1.123" + @test _edf_repr(10.123, allow_scientific) == "10.123" + + @test _edf_repr(123, allow_scientific) == "123" + @test _edf_repr(123 + eps(Float64), allow_scientific) == "123" + + # Moderate numbers, many digits + @test _edf_repr(0.8945620050698592, false) == "0.894562" + end + + # Large numbers / few digits + @test _edf_repr(0.123e10, true) == "1.23E+9" + # decimal version cannot handle it: + err = ArgumentError("cannot represent 1.23e9 in 8 ASCII characters") + @test_throws err _edf_repr(0.123e10, false) + + # Large numbers / many digits + @test _edf_repr(0.8945620050698592e10, true) == "8.946E+9" + err = ArgumentError("cannot represent 8.945620050698591e9 in 8 ASCII characters") + @test_throws err _edf_repr(0.8945620050698592e10, false) + + # Small numbers / few digits + @test _edf_repr(0.123e-10, true) == "1.23E-11" + # decimal version underflows: + @test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.123e-10, false)) == "0" + + # Small numbers / many digits + @test _edf_repr(0.8945620050698592e-10, true) == "8.95E-11" + @test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.8945620050698592e-10, false)) == "0" + end + # test EDF.read(::AbstractString) edf = EDF.read(joinpath(DATADIR, "test.edf")) @test sprint(show, edf) == "EDF.File with 140 16-bit-encoded signals" @@ -68,7 +105,7 @@ const DATADIR = joinpath(@__DIR__, "data") [TimestampedAnnotationList(4.0, nothing, String[""]), TimestampedAnnotationList(2.5, 2.5, ["type A"])], [TimestampedAnnotationList(5.0, nothing, String[""])]] @test all(signal.records .== expected) - @test AnnotationsSignal(signal.records).samples_per_record == 16 + @test AnnotationsSignal(signal.records).samples_per_record == 14 end end @@ -113,19 +150,35 @@ const DATADIR = joinpath(@__DIR__, "data") end @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-0.0023405432)) == "-0.00234" - @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002340" + @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002341" @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343" @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054" @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1011.05432)) == "-1011.05" @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442" @test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441" @test EDF._edf_repr(34577777) == "34577777" - @test EDF._edf_repr(0.0345) == "0.034500" - @test EDF._edf_repr(-0.02) == "-0.02000" + @test EDF._edf_repr(0.0345) == "0.0345" + @test EDF._edf_repr(-0.02) == "-0.02" @test EDF._edf_repr(-187.74445) == "-187.744" - @test_throws ErrorException EDF._edf_repr(123456789) - @test_throws ErrorException EDF._edf_repr(-12345678) - @test_throws ErrorException EDF._edf_repr(0.00000000024) + @test_throws ArgumentError EDF._edf_repr(123456789) + @test_throws ArgumentError EDF._edf_repr(-12345678) + + @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(4.180821e-7)) == "0" + @test EDF._edf_repr(4.180821e-7, true) == "4.181E-7" + + @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(floatmin(Float64))) == "0" + @test EDF._edf_repr(floatmin(Float64), true) == "2.2E-308" + + @test_throws ArgumentError EDF._edf_repr(floatmax(Float64)) + @test EDF._edf_repr(floatmax(Float64), true) == "1.8E+308" + + # We still get errors if we too "big" (in the exponent) + @test_throws ArgumentError EDF._edf_repr(big"1e-999999", true) + @test_throws ArgumentError EDF._edf_repr(big"1e999999", true) + + # if we don't allow scientific notation, we allow rounding down here + @test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(0.00000000024)) == "0" + @test EDF._edf_repr(0.00000000024, true) == "2.4E-10" @test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4) uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf")) @@ -246,10 +299,10 @@ end @testset "BDF+ Files" begin # This is a `BDF+` file containing only trigger information. - # It is similiar to a `EDF Annotations` file except that + # It is similiar to a `EDF Annotations` file except that # The `ANNOTATIONS_SIGNAL_LABEL` is `BDF Annotations`. - # The test data has 1081 trigger events, and - # has 180 trials in total, and + # The test data has 1081 trigger events, and + # has 180 trials in total, and # The annotation `255` signifies the offset of a trial. # More information, contact: zhanlikan@hotmail.com evt = EDF.read(joinpath(DATADIR, "evt.bdf"))