Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow scientific notation in written numbers #70

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.4' # Earliest supported version
- '1.7' # Earliest supported version
- '1' # Latest release
- 'nightly'
os:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[compat]
BitIntegers = "0.2"
FilePathsBase = "0.9.13"
julia = "1.4"
julia = "1.7"

[extras]
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
Expand Down
20 changes: 11 additions & 9 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@ const SUPPORTED_SAMPLE_TYPES = Union{EDF_SAMPLE_TYPE,BDF_SAMPLE_TYPE}
##### `EDF.Signal`
#####

const SIGNAL_HEADER_FIELDS = [(:label, 16),
(:transducer_type, 80),
(:physical_dimension, 8),
(:physical_minimum, 8),
(:physical_maximum, 8),
(:digital_minimum, 8),
(:digital_maximum, 8),
(:prefilter, 80),
(:samples_per_record, 8)]
# List of triples of `(fieldname, byte_limit, allow_scientific_notation)`
# describing how these header fields should be written
const SIGNAL_HEADER_FIELDS = [(:label, 16, false),
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
(:transducer_type, 80, false),
(:physical_dimension, 8, false),
(:physical_minimum, 8, true),
(:physical_maximum, 8, true),
(:digital_minimum, 8, true),
(:digital_maximum, 8, true),
(:prefilter, 80, false),
(:samples_per_record, 8, false)]

"""
EDF.SignalHeader
Expand Down
75 changes: 37 additions & 38 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,50 @@
##### utilities
#####

_edf_repr(value::Union{String,Char}) = value
_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")

# XXX this is really really hacky and doesn't support use of scientific notation
# where appropriate; keep in mind if you do improve this to support scientific
# notation, that scientific is NOT allowed in EDF annotation onset/duration fields
function _edf_repr(x::Real)
result = missing
if isinteger(x)
str = string(trunc(Int, x))
if length(str) <= 8
result = str
end
else
fpart, ipart = modf(x)
ipart_str = string('-'^signbit(x), Int(abs(ipart))) # handles `-0.0` case
fpart_str = @sprintf "%.7f" abs(fpart)
fpart_str = fpart_str[3:end] # remove leading `0.`
if length(ipart_str) < 7
result = ipart_str * '.' * fpart_str[1:(7 - length(ipart_str))]
elseif length(ipart_str) <= 8
result = ipart_str
const FORMATS = (; G=[Printf.Format("%.$(i)G") for i in 8:-1:1],
F=[Printf.Format("%.$(i)F") for i in 8:-1:1])

function _edf_repr(x::Real, allow_scientific::Bool=false)
fmts = allow_scientific ? FORMATS.G : FORMATS.F
for fmt in fmts
str = Printf.format(fmt, x)
# Remove extra zero in scientific notation, e.g. `1E+01` becomes `1E+1`
# Also drop decimal if all digits are zero: `123.0000` to `123`
str = replace(str, r"(E[+-])0" => s"\1", r"\.0+$" => "")
# Removing trailing 0's after decimal place
if contains(str, '.') && !allow_scientific
str = rstrip(str, '0')
end
end
if !ismissing(result)
if all(c -> c in ('0', '.', '-'), result)
x == 0 && return result
else
return result
str = strip(str)
if length(str) <= 8
if str == "0" && x != 0
@warn "Underflow to zero when writing number `$x` to 8-character ASCII" maxlog=10
end
return String(str)
end
end
error("failed to fit number into EDF's 8 ASCII character limit: $x")
throw(ArgumentError("cannot represent $x in 8 ASCII characters"))
end

_edf_repr(value, ::Any) = _edf_repr(value)

_edf_repr(value::Union{String,Char}) = value
_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")

_edf_metadata_repr(::Missing) = 'X'
_edf_metadata_repr(x) = _edf_repr(x)

function _edf_repr(metadata::T) where T<:Union{PatientID,RecordingID}
function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}}
header = T <: RecordingID ? String["Startdate"] : String[]
# None of the fields of `PatientID` or `RecordingID` are floating point, so we don't need
# to worry about passing `allow_scientific=true`.
return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], ' ')
end

function edf_write(io::IO, value, byte_limit::Integer)
edf_value = _edf_repr(value)
sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: $value")
function edf_write(io::IO, value, byte_limit::Integer; allow_scientific=false)
edf_value = _edf_repr(value, allow_scientific)
sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: `$value`. Representation: `$edf_value`")
bytes_written = Base.write(io, edf_value)
while bytes_written < byte_limit
bytes_written += Base.write(io, UInt8(' '))
Expand Down Expand Up @@ -86,13 +84,13 @@ function write_header(io::IO, file::File)
bytes_written += edf_write(io, expected_bytes_written, 8)
bytes_written += edf_write(io, file.header.is_contiguous ? "EDF+C" : "EDF+D", 44)
bytes_written += edf_write(io, file.header.record_count, 8)
bytes_written += edf_write(io, file.header.seconds_per_record, 8)
bytes_written += edf_write(io, file.header.seconds_per_record, 8; allow_scientific=true)
bytes_written += edf_write(io, length(file.signals), 4)
signal_headers = SignalHeader.(file.signals)
for (field_name, byte_limit) in SIGNAL_HEADER_FIELDS
for (field_name, byte_limit, allow_scientific) in SIGNAL_HEADER_FIELDS
for signal_header in signal_headers
field = getfield(signal_header, field_name)
bytes_written += edf_write(io, field, byte_limit)
bytes_written += edf_write(io, field, byte_limit; allow_scientific)
end
end
bytes_written += edf_write(io, ' ', 32 * length(file.signals))
Expand Down Expand Up @@ -138,10 +136,11 @@ function write_tal(io::IO, tal::TimestampedAnnotationList)
if !signbit(tal.onset_in_seconds) # otherwise, the `-` will already be in number string
bytes_written += Base.write(io, '+')
end
# We do not pass `allow_scientific=true`, since that is not allowed for onset or durations
bytes_written += Base.write(io, _edf_repr(tal.onset_in_seconds))
if tal.duration_in_seconds !== nothing
bytes_written += Base.write(io, 0x15)
bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds))
bytes_written += Base.write(io, _edf_repr(tal.duration_in_seconds)) # again, no `allow_scientific=true`
end
if isempty(tal.annotations)
bytes_written += Base.write(io, 0x14)
Expand Down
75 changes: 64 additions & 11 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using EDF
using EDF: TimestampedAnnotationList, PatientID, RecordingID, SignalHeader,
Signal, AnnotationsSignal
Signal, AnnotationsSignal, _edf_repr
using Dates
using FilePathsBase
using Test
Expand Down Expand Up @@ -40,6 +40,43 @@ deep_equal(a::T, b::S) where {T,S} = false
const DATADIR = joinpath(@__DIR__, "data")

@testset "Just Do It" begin

@testset "Additional _edf_repr(x::Real, allow_scientific::Bool) tests" begin

# Moderate sized numbers - should be the same either way
for allow_scientific in (true, false)
@test _edf_repr(0.123, allow_scientific) == "0.123"
@test _edf_repr(1.123, allow_scientific) == "1.123"
@test _edf_repr(10.123, allow_scientific) == "10.123"

@test _edf_repr(123, allow_scientific) == "123"
@test _edf_repr(123 + eps(Float64), allow_scientific) == "123"

# Moderate numbers, many digits
@test _edf_repr(0.8945620050698592, false) == "0.894562"
end

# Large numbers / few digits
@test _edf_repr(0.123e10, true) == "1.23E+9"
# decimal version cannot handle it:
err = ArgumentError("cannot represent 1.23e9 in 8 ASCII characters")
@test_throws err _edf_repr(0.123e10, false)

# Large numbers / many digits
@test _edf_repr(0.8945620050698592e10, true) == "8.946E+9"
err = ArgumentError("cannot represent 8.945620050698591e9 in 8 ASCII characters")
@test_throws err _edf_repr(0.8945620050698592e10, false)

# Small numbers / few digits
@test _edf_repr(0.123e-10, true) == "1.23E-11"
# decimal version underflows:
@test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.123e-10, false)) == "0"

# Small numbers / many digits
@test _edf_repr(0.8945620050698592e-10, true) == "8.95E-11"
@test (@test_logs (:warn, r"Underflow to zero") _edf_repr(0.8945620050698592e-10, false)) == "0"
end

# test EDF.read(::AbstractString)
edf = EDF.read(joinpath(DATADIR, "test.edf"))
@test sprint(show, edf) == "EDF.File with 140 16-bit-encoded signals"
Expand Down Expand Up @@ -68,7 +105,7 @@ const DATADIR = joinpath(@__DIR__, "data")
[TimestampedAnnotationList(4.0, nothing, String[""]), TimestampedAnnotationList(2.5, 2.5, ["type A"])],
[TimestampedAnnotationList(5.0, nothing, String[""])]]
@test all(signal.records .== expected)
@test AnnotationsSignal(signal.records).samples_per_record == 16
@test AnnotationsSignal(signal.records).samples_per_record == 14
kleinschmidt marked this conversation as resolved.
Show resolved Hide resolved
end
end

Expand Down Expand Up @@ -113,19 +150,35 @@ const DATADIR = joinpath(@__DIR__, "data")
end

@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-0.0023405432)) == "-0.00234"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002340"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(0.0023405432)) == "0.002341"
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1.002343)) == "1.002343"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(1011.05432)) == "1011.054"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1011.05432)) == "-1011.05"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.5)) == "-1013442"
@test EDF._edf_repr(EDF._nearest_representable_edf_time_value(-1013441.3)) == "-1013441"
@test EDF._edf_repr(34577777) == "34577777"
@test EDF._edf_repr(0.0345) == "0.034500"
@test EDF._edf_repr(-0.02) == "-0.02000"
@test EDF._edf_repr(0.0345) == "0.0345"
@test EDF._edf_repr(-0.02) == "-0.02"
@test EDF._edf_repr(-187.74445) == "-187.744"
@test_throws ErrorException EDF._edf_repr(123456789)
@test_throws ErrorException EDF._edf_repr(-12345678)
@test_throws ErrorException EDF._edf_repr(0.00000000024)
@test_throws ArgumentError EDF._edf_repr(123456789)
@test_throws ArgumentError EDF._edf_repr(-12345678)

@test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(4.180821e-7)) == "0"
@test EDF._edf_repr(4.180821e-7, true) == "4.181E-7"

@test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(floatmin(Float64))) == "0"
@test EDF._edf_repr(floatmin(Float64), true) == "2.2E-308"

@test_throws ArgumentError EDF._edf_repr(floatmax(Float64))
@test EDF._edf_repr(floatmax(Float64), true) == "1.8E+308"

# We still get errors if we too "big" (in the exponent)
@test_throws ArgumentError EDF._edf_repr(big"1e-999999", true)
@test_throws ArgumentError EDF._edf_repr(big"1e999999", true)

# if we don't allow scientific notation, we allow rounding down here
@test (@test_logs (:warn, r"Underflow to zero") EDF._edf_repr(0.00000000024)) == "0"
@test EDF._edf_repr(0.00000000024, true) == "2.4E-10"
@test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4)

uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf"))
Expand Down Expand Up @@ -246,10 +299,10 @@ end

@testset "BDF+ Files" begin
# This is a `BDF+` file containing only trigger information.
# It is similiar to a `EDF Annotations` file except that
# It is similiar to a `EDF Annotations` file except that
# The `ANNOTATIONS_SIGNAL_LABEL` is `BDF Annotations`.
# The test data has 1081 trigger events, and
# has 180 trials in total, and
# The test data has 1081 trigger events, and
# has 180 trials in total, and
Comment on lines -249 to +305
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

revert these whitesapce changes to keep blame clean

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've configured vscode to remove trailing whitespace, so I will need to do this at the end (otherwise if I touch the file again it will re-introduce it). So your comment is noted but I won't address it until the PR is ready to merge.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is there no autoformatting set up on this repo? it's possible given that it's kind of ancient...

# The annotation `255` signifies the offset of a trial.
# More information, contact: zhanlikan@hotmail.com
evt = EDF.read(joinpath(DATADIR, "evt.bdf"))
Expand Down
Loading