From 903c1d39a795431faffd2f0eea77def32f56aceb Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:03:05 +0100 Subject: [PATCH 1/7] add AWSS3 as weakdep --- Project.toml | 13 ++++++++++--- ext/OndaAWSS3Ext.jl | 24 +++++++++++++++++++++++ test/awss3.jl | 47 +++++++++++++++++++++++++++++++++++++++++++++ test/runtests.jl | 2 ++ 4 files changed, 83 insertions(+), 3 deletions(-) create mode 100644 ext/OndaAWSS3Ext.jl create mode 100644 test/awss3.jl diff --git a/Project.toml b/Project.toml index b836c490..4e9df7c1 100644 --- a/Project.toml +++ b/Project.toml @@ -1,8 +1,7 @@ name = "Onda" uuid = "e853f5be-6863-11e9-128d-476edb89bfb5" authors = ["Beacon Biosignals, Inc."] -version = "0.15.2" - +version = "0.15.3" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" @@ -17,6 +16,12 @@ TimeSpans = "bb34ddd2-327f-4c4a-bfb0-c98fc494ece1" TranscodingStreams = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +[weakdeps] +AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" + +[extensions] +OndaAWSS3Ext = "AWSS3" + [compat] Arrow = "1.6.2, 2" CodecZstd = "0.6, 0.7" @@ -30,9 +35,11 @@ TranscodingStreams = "0.9" julia = "1.6" [extras] +AWSS3 = "1c724243-ef5b-51ab-93f4-b0a88ac62a95" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" FLAC_jll = "1d38b3a6-207b-531b-80e8-c83f48dafa73" +Minio = "4281f0d9-7ae0-406e-9172-b7277c1efa20" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["FLAC_jll", "DataFrames", "Test"] +test = ["AWSS3", "FLAC_jll", "DataFrames", "Minio", "Test"] diff --git a/ext/OndaAWSS3Ext.jl b/ext/OndaAWSS3Ext.jl new file mode 100644 index 00000000..6c2c0bce --- /dev/null +++ b/ext/OndaAWSS3Ext.jl @@ -0,0 +1,24 @@ +module OndaAWSS3Ext + +using AWSS3: S3Path +using Onda: Onda + +""" + Onda.read_byte_range(path::S3Path, byte_offset, byte_count) + +Implement method needed for Onda to read a byte range from an S3 path. Uses +`AWSS3.s3_get` under the hood. + +""" +function Onda.read_byte_range(path::S3Path, byte_offset, byte_count) + # s3_get byte_range is 1-indexed, so we need to add one + byte_range = range(byte_offset + 1; length=byte_count) + return read(path; byte_range) +end + +# avoid method ambiguity +function Onda.read_byte_range(path::S3Path, ::Missing, ::Missing) + return read(path) +end + +end # module diff --git a/test/awss3.jl b/test/awss3.jl new file mode 100644 index 00000000..f040c616 --- /dev/null +++ b/test/awss3.jl @@ -0,0 +1,47 @@ +function minio_server(body, dirs=[mktempdir()]; address="localhost:9005") + server = Minio.Server(dirs; address) + + try + run(server; wait=false) + sleep(0.5) # give the server just a bit of time, though it is amazingly fast to start + + config = MinioConfig( + "http://$address"; username="minioadmin", password="minioadmin" + ) + body(config) + finally + # Make sure we kill the server even if a test failed. + kill(server) + end +end + +@testset "AWSS3 usage" begin + minio_server() do config + s3_create_bucket(config, "test-bucket") + + file_format = "lpcm.zst" + file_path = S3Path("s3://test-bucket/prefix/samples.$(file_format)"; config) + recording_uuid = uuid4() + start = Second(0) + + info = SamplesInfoV2(sensor_type="eeg", + channels=["a", "b"], + sample_unit="unit", + sample_resolution_in_unit=1.0, + sample_offset_in_unit=0.0, + sample_type=Int16, + sample_rate=100.0) + samples = Samples(zeros(sample_type(info), 2, 300), info, true) + + signal = Onda.store(file_path, file_format, samples, recording_uuid, start) + @test signal.file_path isa S3Path + + loaded_samples = Onda.load(signal; encoded=true) + @test samples == loaded_samples + + # Load subspan to exercise method + span = TimeSpan(0, Second(1)) + loaded_span = Onda.load(signal, span; encoded=true) + @test loaded_samples[:, span] == loaded_span + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 5490b070..07c0a5d8 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using Compat: @compat using Test, UUIDs, Dates, Onda, Legolas, Arrow, Tables, TimeSpans, DataFrames, Random using Tables: rowmerge +using AWSS3, Minio # for testing AWSS3 package extension function has_rows(a, b) for name in propertynames(b) @@ -17,5 +18,6 @@ include("signals.jl") include("serialization.jl") include("samples.jl") include("deprecations.jl") +include("awss3.jl") include(joinpath(dirname(@__DIR__), "examples", "flac.jl")) include(joinpath(dirname(@__DIR__), "examples", "tour.jl")) From c9a251a2ca983942497421074b15bf43faab6e18 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:07:08 +0100 Subject: [PATCH 2/7] add compat for awss3 --- Project.toml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Project.toml b/Project.toml index 4e9df7c1..db62ec83 100644 --- a/Project.toml +++ b/Project.toml @@ -24,11 +24,13 @@ OndaAWSS3Ext = "AWSS3" [compat] Arrow = "1.6.2, 2" +AWSS3 = "0.9, 0.10, 0.11" CodecZstd = "0.6, 0.7" Compat = "3.32, 4" DataFrames = "1.2" FLAC_jll = "1.3.3" Legolas = "0.5" +Minio = "0.2" Tables = "1.4" TimeSpans = "0.3.4" TranscodingStreams = "0.9" From 30de50e98d5012375e206b15bcd7c7bbf7e1367b Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:48:49 +0100 Subject: [PATCH 3/7] Update test/awss3.jl Co-authored-by: Dave Kleinschmidt --- test/awss3.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/awss3.jl b/test/awss3.jl index f040c616..47ea6ef5 100644 --- a/test/awss3.jl +++ b/test/awss3.jl @@ -31,7 +31,7 @@ end sample_offset_in_unit=0.0, sample_type=Int16, sample_rate=100.0) - samples = Samples(zeros(sample_type(info), 2, 300), info, true) + samples = Samples(rand(sample_type(info), 2, 300), info, true) signal = Onda.store(file_path, file_format, samples, recording_uuid, start) @test signal.file_path isa S3Path From 57465367a54654f771a7749beb5f712fa0bc8d52 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 16:57:27 +0100 Subject: [PATCH 4/7] don't use compressed lpcm so we can do byte range requests. Add test --- test/awss3.jl | 15 ++++++++++++++- test/runtests.jl | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/test/awss3.jl b/test/awss3.jl index 47ea6ef5..da43f26b 100644 --- a/test/awss3.jl +++ b/test/awss3.jl @@ -15,11 +15,16 @@ function minio_server(body, dirs=[mktempdir()]; address="localhost:9005") end end +# Test we are loading the `OndaAWSS3Ext` extension in the tests here +if VERSION >= v"1.9" + @test Base.get_extension(Onda, :OndaAWSS3Ext) isa Module +end + @testset "AWSS3 usage" begin minio_server() do config s3_create_bucket(config, "test-bucket") - file_format = "lpcm.zst" + file_format = "lpcm" file_path = S3Path("s3://test-bucket/prefix/samples.$(file_format)"; config) recording_uuid = uuid4() start = Second(0) @@ -43,5 +48,13 @@ end span = TimeSpan(0, Second(1)) loaded_span = Onda.load(signal, span; encoded=true) @test loaded_samples[:, span] == loaded_span + + bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), + stop(signal.span) + Nanosecond(Second(2))) + # this throws a BoundsError without our extension (since Onda falls back to + # loading EVERYTHING and then indexing. with our utils, it passes the + # byte range to AWS which says it's invalid + @test_throws AWSException Onda.load(signal, bad_span) + end end diff --git a/test/runtests.jl b/test/runtests.jl index 07c0a5d8..1ca2034b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -2,6 +2,7 @@ using Compat: @compat using Test, UUIDs, Dates, Onda, Legolas, Arrow, Tables, TimeSpans, DataFrames, Random using Tables: rowmerge using AWSS3, Minio # for testing AWSS3 package extension +using AWSS3.AWS: AWSException function has_rows(a, b) for name in propertynames(b) From 8e4c56470b08462c18713b495a892906232cfb95 Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:22:11 +0100 Subject: [PATCH 5/7] version-gate test --- test/awss3.jl | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/test/awss3.jl b/test/awss3.jl index da43f26b..82dae086 100644 --- a/test/awss3.jl +++ b/test/awss3.jl @@ -49,12 +49,14 @@ end loaded_span = Onda.load(signal, span; encoded=true) @test loaded_samples[:, span] == loaded_span - bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), - stop(signal.span) + Nanosecond(Second(2))) - # this throws a BoundsError without our extension (since Onda falls back to - # loading EVERYTHING and then indexing. with our utils, it passes the - # byte range to AWS which says it's invalid - @test_throws AWSException Onda.load(signal, bad_span) + if VERSION >= v"1.9" # This test requires the package extension to work correctly + bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), + stop(signal.span) + Nanosecond(Second(2))) + # this throws a BoundsError without our extension (since Onda falls back to + # loading EVERYTHING and then indexing. with our utils, it passes the + # byte range to AWS which says it's invalid + @test_throws AWSException Onda.load(signal, bad_span) + end end end From f4f57e71eeb4d23e35b6ad537b9ecb092893dd8b Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 18:52:00 +0100 Subject: [PATCH 6/7] try compressed also --- test/awss3.jl | 67 ++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 33 deletions(-) diff --git a/test/awss3.jl b/test/awss3.jl index 82dae086..48a6893f 100644 --- a/test/awss3.jl +++ b/test/awss3.jl @@ -24,39 +24,40 @@ end minio_server() do config s3_create_bucket(config, "test-bucket") - file_format = "lpcm" - file_path = S3Path("s3://test-bucket/prefix/samples.$(file_format)"; config) - recording_uuid = uuid4() - start = Second(0) - - info = SamplesInfoV2(sensor_type="eeg", - channels=["a", "b"], - sample_unit="unit", - sample_resolution_in_unit=1.0, - sample_offset_in_unit=0.0, - sample_type=Int16, - sample_rate=100.0) - samples = Samples(rand(sample_type(info), 2, 300), info, true) - - signal = Onda.store(file_path, file_format, samples, recording_uuid, start) - @test signal.file_path isa S3Path - - loaded_samples = Onda.load(signal; encoded=true) - @test samples == loaded_samples - - # Load subspan to exercise method - span = TimeSpan(0, Second(1)) - loaded_span = Onda.load(signal, span; encoded=true) - @test loaded_samples[:, span] == loaded_span - - if VERSION >= v"1.9" # This test requires the package extension to work correctly - bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), - stop(signal.span) + Nanosecond(Second(2))) - # this throws a BoundsError without our extension (since Onda falls back to - # loading EVERYTHING and then indexing. with our utils, it passes the - # byte range to AWS which says it's invalid - @test_throws AWSException Onda.load(signal, bad_span) + for (file_format, exc) in (("lpcm", AWSException), ("lpcm.zst", InexactError)) + file_path = S3Path("s3://test-bucket/prefix/samples.$(file_format)"; config) + recording_uuid = uuid4() + start = Second(0) + + info = SamplesInfoV2(sensor_type="eeg", + channels=["a", "b"], + sample_unit="unit", + sample_resolution_in_unit=1.0, + sample_offset_in_unit=0.0, + sample_type=Int16, + sample_rate=100.0) + samples = Samples(rand(sample_type(info), 2, 300), info, true) + + signal = Onda.store(file_path, file_format, samples, recording_uuid, start) + @test signal.file_path isa S3Path + + loaded_samples = Onda.load(signal; encoded=true) + @test samples == loaded_samples + + # Load subspan to exercise method + span = TimeSpan(0, Second(1)) + loaded_span = Onda.load(signal, span; encoded=true) + @test loaded_samples[:, span] == loaded_span + + if VERSION >= v"1.9" # This test requires the package extension to work correctly + bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), + stop(signal.span) + Nanosecond(Second(2))) + # this throws a BoundsError without our extension (since Onda falls back to + # loading EVERYTHING and then indexing. with our utils, it passes the + # byte range to AWS which says it's invalid. + # For compressed data, Onda does byte range requests. + @test_throws exc Onda.load(signal, bad_span) + end end - end end From 9fceea025110fb58a3c4d5a8fdb7dc9025da59dd Mon Sep 17 00:00:00 2001 From: Eric Hanson <5846501+ericphanson@users.noreply.github.com> Date: Thu, 16 Nov 2023 19:08:38 +0100 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Dave Kleinschmidt --- test/awss3.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/awss3.jl b/test/awss3.jl index 48a6893f..9de3ba87 100644 --- a/test/awss3.jl +++ b/test/awss3.jl @@ -30,12 +30,12 @@ end start = Second(0) info = SamplesInfoV2(sensor_type="eeg", - channels=["a", "b"], - sample_unit="unit", - sample_resolution_in_unit=1.0, - sample_offset_in_unit=0.0, - sample_type=Int16, - sample_rate=100.0) + channels=["a", "b"], + sample_unit="unit", + sample_resolution_in_unit=1.0, + sample_offset_in_unit=0.0, + sample_type=Int16, + sample_rate=100.0) samples = Samples(rand(sample_type(info), 2, 300), info, true) signal = Onda.store(file_path, file_format, samples, recording_uuid, start) @@ -51,7 +51,7 @@ end if VERSION >= v"1.9" # This test requires the package extension to work correctly bad_span = TimeSpan(stop(signal.span) + Nanosecond(Second(1)), - stop(signal.span) + Nanosecond(Second(2))) + stop(signal.span) + Nanosecond(Second(2))) # this throws a BoundsError without our extension (since Onda falls back to # loading EVERYTHING and then indexing. with our utils, it passes the # byte range to AWS which says it's invalid.