Skip to content

Commit

Permalink
respond to review feedback
Browse files Browse the repository at this point in the history
  • Loading branch information
ericphanson committed Oct 16, 2023
1 parent ff6fc63 commit 769fcda
Show file tree
Hide file tree
Showing 5 changed files with 62 additions and 126 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.6' # Earliest supported version
- '1.7' # Earliest supported version
- '1' # Latest release
- 'nightly'
os:
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[compat]
BitIntegers = "0.2"
FilePathsBase = "0.9.13"
julia = "1.6"
julia = "1.7"

[extras]
FilePathsBase = "48062228-2e41-5def-b9a4-89aafe57970f"
Expand Down
2 changes: 2 additions & 0 deletions src/types.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ const SUPPORTED_SAMPLE_TYPES = Union{EDF_SAMPLE_TYPE,BDF_SAMPLE_TYPE}
##### `EDF.Signal`
#####

# List of triples of `(fieldname, byte_limit, allow_scientific_notation)`
# describing how these header fields should be written
const SIGNAL_HEADER_FIELDS = [(:label, 16, false),
(:transducer_type, 80, false),
(:physical_dimension, 8, false),
Expand Down
120 changes: 27 additions & 93 deletions src/write.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,115 +2,49 @@
##### utilities
#####

# `allow_scientific` is only meaningful for `value::Number`. We allow passing it though,
# so `edf_write` can be more generic.
_edf_repr(value::Union{String,Char}; allow_scientific=nothing) = value
_edf_repr(date::Date; allow_scientific=nothing) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
_edf_repr(date::DateTime; allow_scientific=nothing) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")

"""
sprintf_G_under_8(x) -> String
Return a string of length at most 8, written in either scientific notation (using capital-'E'),
or in decimal format, using as much precision as possible.
"""
function sprintf_G_under_8(x)
shorten = str -> begin
if contains(str, 'E')
# Remove extraneous 0's in exponential notation
str = replace(str, "E-0" => "E-")
str = replace(str, "E+0" => "E+")
# Remove any leading/trailing whitespace
return strip(str)
else
# Decimal format. Call out to `trim_F`
return trim_F(str)
const FORMATS = (; G=[Printf.Format("%.$(i)G") for i in 8:-1:1],
F=[Printf.Format("%.$(i)F") for i in 8:-1:1])

function _edf_repr(x::Real, allow_scientific::Bool=false)
fmts = allow_scientific ? FORMATS.G : FORMATS.F
for fmt in fmts
str = Printf.format(fmt, x)
# Remove extra zero in scientific notation, e.g. `1E+01` becomes `1E+1`
# Also drop decimal if all digits are zero: `123.0000` to `123`
str = replace(str, r"(E[+-])0" => s"\1", r"\.0+$" => "")
# Removing trailing 0's after decimal place
if contains(str, '.') && !allow_scientific
str = rstrip(str, '0')
end
str = strip(str)
if length(str) <= 8
if str == "0" && x != 0
@warn "Underflow to zero when writing number `$x` to 8-character ASCII" maxlog=10
end
return String(str)
end
end
# Strategy:
# `@printf("%0.NG", x)` means:
# - `G`: Use the shortest representation: %E or %F. That is, scientific notation (with capital E) or decimals, whichever is shorted
# - `.N`: (for literal `N`, like `5`): the maximum number of significant digits to be printed
# However, `@printf("%0.NG", x)` may have more than `N` characters (e.g. presence of E, and the values for the exponent, the decimal place, etc)
# So we start from 8 (most precision), and stop as soon as we get under 8 characters
sig_8 = shorten(@sprintf("%.8G", x))
length(sig_8) <= 8 && return sig_8
sig_7 = shorten(@sprintf("%.7G", x))
length(sig_7) <= 8 && return sig_7
sig_6 = shorten(@sprintf("%.6G", x))
length(sig_6) <= 8 && return sig_6
sig_5 = shorten(@sprintf("%.5G", x))
length(sig_5) <= 8 && return sig_5
sig_4 = shorten(@sprintf("%.4G", x))
length(sig_4) <= 8 && return sig_4
sig_3 = shorten(@sprintf("%.3G", x))
length(sig_3) <= 8 && return sig_3
sig_2 = shorten(@sprintf("%.2G", x))
length(sig_2) <= 8 && return sig_2
sig_1 = shorten(@sprintf("%.1G", x))
length(sig_1) <= 8 && return sig_1
error("failed to fit number into EDF's 8 ASCII character limit: $x")
end

function trim_F(str)
if contains(str, '.')
# Remove trailing 0's after the decimal point
str = rstrip(str, '0')
# If the `.` is at the end now, strip it
str = rstrip(str, '.')
end
# Removing leading or trailing whitespace
str = strip(str)
return str
throw(ArgumentError("cannot represent $x in 8 ASCII characters"))
end

function sprintf_F_under_8(x)
shorten = trim_F
# Strategy:
# `@printf("%0.NF", x)` means:
# - `F`: print with decimals
# - `.N`: (for literal `N`, like `5`): the maximum number of digits to print after the decimal
# However, `@printf("%0.NF", x)` may have more than `N` characters (e.g. digits to the left of the decimal point)
# So we start from 8 (most precision), and stop as soon as we get under 8 characters
sig_8 = shorten(@sprintf("%.8F", x))
length(sig_8) <= 8 && return sig_8
sig_7 = shorten(@sprintf("%.7F", x))
length(sig_7) <= 8 && return sig_7
sig_6 = shorten(@sprintf("%.6F", x))
length(sig_6) <= 8 && return sig_6
sig_5 = shorten(@sprintf("%.5F", x))
length(sig_5) <= 8 && return sig_5
sig_4 = shorten(@sprintf("%.4F", x))
length(sig_4) <= 8 && return sig_4
sig_3 = shorten(@sprintf("%.3F", x))
length(sig_3) <= 8 && return sig_3
sig_2 = shorten(@sprintf("%.2F", x))
length(sig_2) <= 8 && return sig_2
sig_1 = shorten(@sprintf("%.1F", x))
length(sig_1) <= 8 && return sig_1
error("failed to fit number into EDF's 8 ASCII character limit: $x")
end
_edf_repr(value, ::Any) = _edf_repr(value)

function _edf_repr(x::Real; allow_scientific=false)
if allow_scientific
return sprintf_G_under_8(x)
else
return sprintf_F_under_8(x)
end
end
_edf_repr(value::Union{String,Char}) = value
_edf_repr(date::Date) = uppercase(Dates.format(date, dateformat"dd-u-yyyy"))
_edf_repr(date::DateTime) = Dates.format(date, dateformat"dd\.mm\.yyHH\.MM\.SS")

_edf_metadata_repr(::Missing) = 'X'
_edf_metadata_repr(x) = _edf_repr(x)

function _edf_repr(metadata::T; allow_scientific=nothing) where {T<:Union{PatientID,RecordingID}}
function _edf_repr(metadata::T) where {T<:Union{PatientID,RecordingID}}
header = T <: RecordingID ? String["Startdate"] : String[]
# None of the fields of `PatientID` or `RecordingID` are floating point, so we don't need
# to worry about passing `allow_scientific=true`.
return join([header; [_edf_metadata_repr(getfield(metadata, name)) for name in fieldnames(T)]], ' ')
end

function edf_write(io::IO, value, byte_limit::Integer; allow_scientific=false)
edf_value = _edf_repr(value; allow_scientific)
edf_value = _edf_repr(value, allow_scientific)
sizeof(edf_value) > byte_limit && error("EDF value exceeded byte limit (of $byte_limit bytes) while writing: `$value`. Representation: `$edf_value`")
bytes_written = Base.write(io, edf_value)
while bytes_written < byte_limit
Expand Down
62 changes: 31 additions & 31 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using EDF
using EDF: TimestampedAnnotationList, PatientID, RecordingID, SignalHeader,
Signal, AnnotationsSignal, sprintf_G_under_8, sprintf_F_under_8
Signal, AnnotationsSignal, _edf_repr
using Dates
using FilePathsBase
using Test
Expand Down Expand Up @@ -41,40 +41,40 @@ const DATADIR = joinpath(@__DIR__, "data")

@testset "Just Do It" begin

@testset "sprintf functionality" begin
# Moderate sized numbers
@test sprintf_G_under_8(0.123) == "0.123"
@test sprintf_F_under_8(0.123) == "0.123"
@testset "_edf_repr(x::Real, allow_scientific::Bool)" begin

@test sprintf_G_under_8(1.123) == "1.123"
@test sprintf_F_under_8(1.123) == "1.123"
# Moderate sized numbers - should be the same either way
for allow_scientific in (true, false)
@test _edf_repr(0.123, allow_scientific) == "0.123"
@test _edf_repr(1.123, allow_scientific) == "1.123"
@test _edf_repr(10.123, allow_scientific) == "10.123"

@test sprintf_G_under_8(10.123) == "10.123"
@test sprintf_F_under_8(10.123) == "10.123"
@test _edf_repr(123, allow_scientific) == "123"
@test _edf_repr(123 + eps(Float64), allow_scientific) == "123"

# Moderate numbers, many digits
@test sprintf_G_under_8(0.8945620050698592) == "0.894562"
@test sprintf_F_under_8(0.8945620050698592) == "0.894562"
# Moderate numbers, many digits
@test _edf_repr(0.8945620050698592, false) == "0.894562"
end

# Large numbers / few digits
@test sprintf_G_under_8(0.123e10) == "1.23E+9"
@test _edf_repr(0.123e10, true) == "1.23E+9"
# decimal version cannot handle it:
err = ErrorException("failed to fit number into EDF's 8 ASCII character limit: 1.23e9")
@test_throws err sprintf_F_under_8(0.123e10)
err = ArgumentError("cannot represent 1.23e9 in 8 ASCII characters")
@test_throws err _edf_repr(0.123e10, false)

# Large numbers / many digits
@test sprintf_G_under_8(0.8945620050698592e10) == "8.946E+9"
err = ErrorException("failed to fit number into EDF's 8 ASCII character limit: 8.945620050698591e9")
@test_throws err sprintf_F_under_8(0.8945620050698592e10)
@test _edf_repr(0.8945620050698592e10, true) == "8.946E+9"
err = ArgumentError("cannot represent 8.945620050698591e9 in 8 ASCII characters")
@test_throws err _edf_repr(0.8945620050698592e10, false)

# Small numbers / few digits
@test sprintf_G_under_8(0.123e-10) == "1.23E-11"
@test _edf_repr(0.123e-10, true) == "1.23E-11"
# decimal version underflows:
@test sprintf_F_under_8(0.123e-10) == "0"
@test _edf_repr(0.123e-10, false) == "0"

# Small numbers / many digits
@test sprintf_G_under_8(0.8945620050698592e-10) == "8.95E-11"
@test sprintf_F_under_8(0.8945620050698592e-10) == "0"
@test _edf_repr(0.8945620050698592e-10, true) == "8.95E-11"
@test _edf_repr(0.8945620050698592e-10, false) == "0"
end

# test EDF.read(::AbstractString)
Expand Down Expand Up @@ -160,25 +160,25 @@ const DATADIR = joinpath(@__DIR__, "data")
@test EDF._edf_repr(0.0345) == "0.0345"
@test EDF._edf_repr(-0.02) == "-0.02"
@test EDF._edf_repr(-187.74445) == "-187.744"
@test_throws ErrorException EDF._edf_repr(123456789)
@test_throws ErrorException EDF._edf_repr(-12345678)
@test_throws ArgumentError EDF._edf_repr(123456789)
@test_throws ArgumentError EDF._edf_repr(-12345678)

@test EDF._edf_repr(4.180821e-7) == "0"
@test EDF._edf_repr(4.180821e-7; allow_scientific=true) == "4.181E-7"
@test EDF._edf_repr(4.180821e-7, true) == "4.181E-7"

@test EDF._edf_repr(floatmin(Float64)) == "0"
@test EDF._edf_repr(floatmin(Float64); allow_scientific=true) == "2.2E-308"
@test EDF._edf_repr(floatmin(Float64), true) == "2.2E-308"

@test_throws ErrorException EDF._edf_repr(floatmax(Float64)) == "0"
@test EDF._edf_repr(floatmax(Float64); allow_scientific=true) == "1.8E+308"
@test_throws ArgumentError EDF._edf_repr(floatmax(Float64))
@test EDF._edf_repr(floatmax(Float64), true) == "1.8E+308"

# We still get errors if we too "big" (in the exponent)
@test_throws ErrorException EDF._edf_repr(big"1e-999999"; allow_scientific=true)
@test_throws ErrorException EDF._edf_repr(big"1e999999"; allow_scientific=true)
@test_throws ArgumentError EDF._edf_repr(big"1e-999999", true)
@test_throws ArgumentError EDF._edf_repr(big"1e999999", true)

# if we don't allow scientific notation, we allow rounding down here
@test EDF._edf_repr(0.00000000024) == "0"
@test EDF._edf_repr(0.00000000024; allow_scientific=true) == "2.4E-10"
@test EDF._edf_repr(0.00000000024, true) == "2.4E-10"
@test_throws ErrorException EDF.edf_write(IOBuffer(), "hahahahaha", 4)

uneven = EDF.read(joinpath(DATADIR, "test_uneven_samp.edf"))
Expand Down

0 comments on commit 769fcda

Please sign in to comment.