From 316d7da0a8bc86e642e28116c3338b3f3fb96350 Mon Sep 17 00:00:00 2001 From: George Mossessian Date: Wed, 2 Dec 2020 19:55:25 -0600 Subject: [PATCH 1/2] batch reading on load to avoid memory issue --- lib/dawgdic/dictionary.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/lib/dawgdic/dictionary.h b/lib/dawgdic/dictionary.h index 118f2ef..e57f562 100644 --- a/lib/dawgdic/dictionary.h +++ b/lib/dawgdic/dictionary.h @@ -49,10 +49,17 @@ class Dictionary { } SizeType size = static_cast(base_size); - std::vector units_buf(size); - if (!input->read(reinterpret_cast(&units_buf[0]), - sizeof(DictionaryUnit) * size)) { - return false; + std::vector units_buf; + SizeType bufsize = 1000; + SizeType cur_idx = 0; + while (cur_idx < size) { + SizeType size_to_read = std::min(size, bufsize); + units_buf.resize(units_buf.size() + size_to_read); + if (!input->read(reinterpret_cast(&units_buf[cur_idx]), + sizeof(DictionaryUnit) * size_to_read)) { + return false; + } + cur_idx += size_to_read; } SwapUnitsBuf(&units_buf); From a44973c15c65a185aa165411aa3f546f8aaa4791 Mon Sep 17 00:00:00 2001 From: George Mossessian Date: Wed, 2 Dec 2020 21:15:15 -0600 Subject: [PATCH 2/2] fix integer overflow issue and cleanup code --- lib/dawgdic/dictionary.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/lib/dawgdic/dictionary.h b/lib/dawgdic/dictionary.h index e57f562..999b62e 100644 --- a/lib/dawgdic/dictionary.h +++ b/lib/dawgdic/dictionary.h @@ -50,16 +50,21 @@ class Dictionary { SizeType size = static_cast(base_size); std::vector units_buf; - SizeType bufsize = 1000; - SizeType cur_idx = 0; - while (cur_idx < size) { - SizeType size_to_read = std::min(size, bufsize); - units_buf.resize(units_buf.size() + size_to_read); - if (!input->read(reinterpret_cast(&units_buf[cur_idx]), + + // read the file in batches to avoid a corrupted file from asking to allocate + // a very large amount of memory + SizeType batch_size = 1000; + while( size > 0 ) { + SizeType size_to_read = std::min(size, batch_size); + SizeType cur_size = units_buf.size(); + units_buf.resize(cur_size + size_to_read); + if (!input->read(reinterpret_cast(&units_buf[cur_size]), sizeof(DictionaryUnit) * size_to_read)) { return false; } - cur_idx += size_to_read; + // subtract size_to_read (not batch_size) + // so size does not integer overflow on becoming negative + size -= size_to_read; } SwapUnitsBuf(&units_buf);