From 1dedb46d5eda0b2433ef58ac5d6759c76c5bdb28 Mon Sep 17 00:00:00 2001 From: Nathan Zimmerberg <39104088+nhz2@users.noreply.github.com> Date: Tue, 8 Oct 2024 19:00:22 -0400 Subject: [PATCH] Port new go test and change EOCD search to match yauzl (#83) * add new go test file * Use yauzl eocd search method --- src/reader.jl | 36 +++++++----------- .../testdata/comment-truncated.zip | Bin 0 -> 216 bytes test/test_ported-go-tests.jl | 1 + test/test_reader.jl | 2 +- 4 files changed, 16 insertions(+), 23 deletions(-) create mode 100644 test/examples from go/testdata/comment-truncated.zip diff --git a/src/reader.jl b/src/reader.jl index eefc537..7ac82c2 100644 --- a/src/reader.jl +++ b/src/reader.jl @@ -345,40 +345,32 @@ end # If this fails, io isn't a zip file, io isn't seekable, # or the end of the zip file was corrupted +# Using yauzl method https://github.com/thejoshwolfe/yauzl/blob/51010ce4e8c7e6345efe195e1b4150518f37b393/index.js#L111-L113 function find_end_of_central_directory_record(io::IO)::Int64 seekend(io) fsize = position(io) - # First assume comment is length zero fsize ≥ 22 || throw(ArgumentError("io isn't a zip file. Too small")) - seek(io, fsize-22) - b = read!(io, zeros(UInt8, 22)) - check_comment_len_valid(b, comment_len) = ( - EOCDSig == @view(b[end-21-comment_len:end-18-comment_len]) && - comment_len%UInt8 == b[end-1-comment_len] && - UInt8(comment_len>>8) == b[end-comment_len] - ) - if check_comment_len_valid(b, 0) - # No Zip comment fast path - fsize-22 - else - # There maybe is a Zip comment slow path - fsize > 22 || throw(ArgumentError("io isn't a zip file.")) - max_comment_len::Int = min(0xFFFF, fsize-22) - seek(io, fsize - (max_comment_len+22)) - b = read!(io, zeros(UInt8, (max_comment_len+22))) - comment_len = 1 - while comment_len < max_comment_len && !check_comment_len_valid(b, comment_len) - comment_len += 1 + for comment_len in 0:Int(min(0xFFFF, fsize-22)) + seek(io, fsize-22-comment_len) + if readle(io, UInt32) != 0x06054b50 + continue end - if !check_comment_len_valid(b, comment_len) + skip(io, 16) + if readle(io, UInt16) == comment_len + return fsize-22-comment_len + else throw(ArgumentError(""" io isn't a zip file. It may be a zip file with a corrupted ending. """ )) end - fsize-22-comment_len end + throw(ArgumentError(""" + io isn't a zip file. + It may be a zip file with a corrupted ending. + """ + )) end function check_EOCD64_used(io::IO, eocd_offset)::Bool diff --git a/test/examples from go/testdata/comment-truncated.zip b/test/examples from go/testdata/comment-truncated.zip new file mode 100644 index 0000000000000000000000000000000000000000..1bc19a85575964f378a8a30f198ed6ba5360aa7d GIT binary patch literal 216 zcmWIWW@cf4gUWrGI~jpI5C#dmdHT2ppr}ax{CO=%28Pozb5c_hOA-UT8JU2>aDc83 pE&*nMbOm^`vVk~^KxhP{)xa|7=AgR>tO!m(+=pt;1fVP<0{|c)7RUeq literal 0 HcmV?d00001 diff --git a/test/test_ported-go-tests.jl b/test/test_ported-go-tests.jl index e41db7c..edd9507 100644 --- a/test/test_ported-go-tests.jl +++ b/test/test_ported-go-tests.jl @@ -30,6 +30,7 @@ end "readme.notzip", "test-baddirsz.zip", "test-badbase.zip", + "comment-truncated.zip", ] for filename in same_content_files diff --git a/test/test_reader.jl b/test/test_reader.jl index 2e4d678..9e40321 100644 --- a/test/test_reader.jl +++ b/test/test_reader.jl @@ -66,7 +66,7 @@ using SHA: sha256 @test_throws ArgumentError find_eocd(io) io = IOBuffer("PK\x05\x06"^30000*"ab") - @test find_eocd(io) == 100700 + @test_throws ArgumentError find_eocd(io) io = IOBuffer("PK\x05\x06"*"\0"^16*"\xff\xff"*"a"^(2^16-1)) @test find_eocd(io) == 0