From 57ab3fae7ea5edab53486fbacbd30a1b768e6301 Mon Sep 17 00:00:00 2001 From: Petr Pucil Date: Sat, 27 Jul 2024 00:48:02 +0200 Subject: [PATCH] Add bytes_terminate_multi() and read_bytes_term_multi() See https://github.com/kaitai-io/kaitai_struct/issues/187 bytes_terminate_multi() is essentially identical to the existing implementation in Java: https://github.com/kaitai-io/kaitai_struct_java_runtime/blob/deb426e24ff1b75d537b7d903f5a971cae540987/src/main/java/io/kaitai/struct/KaitaiStream.java#L353-L365 read_bytes_term_multi() is similar to the existing implementation in Python: https://github.com/kaitai-io/kaitai_struct_python_runtime/blob/07aea9c6cdb1cc5be8677004680382602d7323f3/kaitaistruct.py#L434-L457 --- kaitai/kaitaistream.cpp | 61 ++++++++++++++++++++++++++++++++++++++++- kaitai/kaitaistream.h | 2 ++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/kaitai/kaitaistream.cpp b/kaitai/kaitaistream.cpp index 9332ae5..b3c3dfc 100644 --- a/kaitai/kaitaistream.cpp +++ b/kaitai/kaitaistream.cpp @@ -55,6 +55,7 @@ #include // std::memcpy #include // std::streamsize #include // std::istream // IWYU pragma: keep +#include // std::numeric_limits #include // std::stringstream, std::ostringstream // IWYU pragma: keep #include // std::runtime_error, std::invalid_argument, std::out_of_range #include // std::string, std::getline @@ -518,6 +519,46 @@ std::string kaitai::kstream::read_bytes_term(char term, bool include, bool consu return result; } +std::string kaitai::kstream::read_bytes_term_multi(std::string term, bool include, bool consume, bool eos_error) { + std::size_t term_len = term.length(); + if (term_len > static_cast(std::numeric_limits::max())) { + throw std::runtime_error("read_bytes_term_multi: terminator too long"); + } + std::streamsize unit_size = static_cast(term_len); + + std::string result; + std::string c(term_len, ' '); + m_io->exceptions(std::istream::badbit); + while (true) { + // Note: this requires std::string to be backed with a + // contiguous buffer. Officially, it's only a requirement since + // C++11 (C++98 and C++03 didn't have this requirement), but all + // major implementations had contiguous buffers anyway. + m_io->read(&c[0], unit_size); + if (m_io->eof()) { + m_io->clear(); + exceptions_enable(); + if (eos_error) { + throw std::runtime_error("read_bytes_term_multi: encountered EOF"); + } + result.append(c, 0, static_cast(m_io->gcount())); + return result; + } + + if (c == term) { + exceptions_enable(); + if (include) + result += c; + if (!consume) + m_io->seekg(-unit_size, std::istream::cur); + + return result; + } + + result += c; + } +} + std::string kaitai::kstream::ensure_fixed_contents(std::string expected) { std::string actual = read_bytes(expected.length()); @@ -553,6 +594,25 @@ std::string kaitai::kstream::bytes_terminate(std::string src, char term, bool in return src.substr(0, new_len); } +std::string kaitai::kstream::bytes_terminate_multi(std::string src, std::string term, bool include) { + std::size_t len = src.length(); + std::size_t unit_size = term.length(); + std::size_t last_unit_start = len > unit_size ? len - unit_size : 0; + for (std::size_t i = 0; i <= last_unit_start; i += unit_size) { + bool match = true; + for (std::size_t j = 0; j < unit_size; j++) { + if (src[i + j] != term[j]) { + match = false; + break; + } + } + if (match) { + return src.substr(0, i + (include ? unit_size : 0)); + } + } + return src; +} + // ======================================================================== // Byte array processing // ======================================================================== @@ -805,7 +865,6 @@ std::string kaitai::kstream::bytes_to_str(const std::string src, const char *src } #elif defined(KS_STR_ENCODING_WIN32API) #include -#include // Unbreak std::numeric_limits::max, as otherwise MSVC substitutes "useful" max() macro. #undef max diff --git a/kaitai/kaitaistream.h b/kaitai/kaitaistream.h index 58b4dee..5f3fa98 100644 --- a/kaitai/kaitaistream.h +++ b/kaitai/kaitaistream.h @@ -171,10 +171,12 @@ class kstream { std::string read_bytes(std::streamsize len); std::string read_bytes_full(); std::string read_bytes_term(char term, bool include, bool consume, bool eos_error); + std::string read_bytes_term_multi(std::string term, bool include, bool consume, bool eos_error); std::string ensure_fixed_contents(std::string expected); static std::string bytes_strip_right(std::string src, char pad_byte); static std::string bytes_terminate(std::string src, char term, bool include); + static std::string bytes_terminate_multi(std::string src, std::string term, bool include); static std::string bytes_to_str(const std::string src, const char *src_enc); //@}