From fdcafe3038944527d18be593306fa9d8c17e4f10 Mon Sep 17 00:00:00 2001 From: Vincent La Date: Wed, 3 Apr 2024 00:08:57 -0700 Subject: [PATCH] Fix CSV floating point serialization issue Fix CSV serialization issue (particularly impacting multiples of 10) caused by inaccurate digit calculation --- CMakeLists.txt | 4 +-- include/internal/csv_writer.hpp | 27 +++++++++++++++-- single_include/csv.hpp | 54 +++++++++++++++++++++++++-------- single_include_test/csv.hpp | 54 +++++++++++++++++++++++++-------- tests/test_write_csv.cpp | 40 +++++++++++++++++------- 5 files changed, 139 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3137aab4..29c1d32c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,7 +63,7 @@ if (CSV_DEVELOPER) if (MSVC) target_link_options(csv PUBLIC /PROFILE) endif() - + # More error messages. if (UNIX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} \ @@ -73,7 +73,7 @@ if (CSV_DEVELOPER) endif() # Generate a single header library - if(CMAKE_VERSION VERSION_LESS "3.12" OR ${CMAKE_SYSTEM_NAME} MATCHES "Windows") + if(CMAKE_VERSION VERSION_LESS "3.12") find_package(PythonInterp 3 QUIET) else() find_package(Python3 COMPONENTS Interpreter) diff --git a/include/internal/csv_writer.hpp b/include/internal/csv_writer.hpp index d34cace2..4f30d224 100644 --- a/include/internal/csv_writer.hpp +++ b/include/internal/csv_writer.hpp @@ -17,6 +17,27 @@ namespace csv { namespace internals { static int DECIMAL_PLACES = 5; + /** + * Calculate the number of digits in a number + */ + template< + typename T, + csv::enable_if_t::value, int> = 0 + > + int num_digits(T x) + { + x = abs(x); + + int digits = 0; + + while (x >= 1) { + x /= 10; + digits++; + } + + return digits; + } + /** to_string() for unsigned integers */ template::value, int> = 0> @@ -65,12 +86,12 @@ namespace csv { if (value < 0) result = "-"; if (integral_part == 0) { + result = "0"; } else { - for (int n_digits = (int)(std::log(integral_part) / std::log(10)); - n_digits + 1 > 0; n_digits --) { - int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits)); + for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) { + int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1)); result += (char)('0' + digit); } } diff --git a/single_include/csv.hpp b/single_include/csv.hpp index da165928..650ae199 100644 --- a/single_include/csv.hpp +++ b/single_include/csv.hpp @@ -5301,9 +5301,9 @@ namespace csv { return DataType::CSV_NULL; bool ws_allowed = true, - neg_allowed = true, dot_allowed = true, digit_allowed = true, + is_negative = false, has_digit = false, prob_float = false; @@ -5333,7 +5333,7 @@ namespace csv { return DataType::CSV_STRING; } - neg_allowed = false; + is_negative = true; break; case '.': if (!dot_allowed) { @@ -5357,7 +5357,7 @@ namespace csv { return _process_potential_exponential( in.substr(exponent_start_idx), - neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part), + is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part, out ); } @@ -5391,7 +5391,7 @@ namespace csv { if (has_digit) { long double number = integral_part + decimal_part; if (out) { - *out = neg_allowed ? number : -number; + *out = is_negative ? -number : number; } return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number); @@ -6532,6 +6532,27 @@ namespace csv { namespace internals { static int DECIMAL_PLACES = 5; + /** + * Calculate the number of digits in a number + */ + template< + typename T, + csv::enable_if_t::value, int> = 0 + > + int num_digits(T x) + { + x = abs(x); + + int digits = 0; + + while (x >= 1) { + x /= 10; + digits++; + } + + return digits; + } + /** to_string() for unsigned integers */ template::value, int> = 0> @@ -6566,6 +6587,10 @@ namespace csv { csv::enable_if_t::value, int> = 0 > inline std::string to_string(T value) { +#ifdef __clang__ + return std::to_string(value); +#else + // TODO: Figure out why the below code doesn't work on clang std::string result; T integral_part; @@ -6576,12 +6601,12 @@ namespace csv { if (value < 0) result = "-"; if (integral_part == 0) { + result = "0"; } else { - for (int n_digits = (int)(std::log(integral_part) / std::log(10)); - n_digits + 1 > 0; n_digits --) { - int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits)); + for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) { + int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1)); result += (char)('0' + digit); } } @@ -6601,6 +6626,7 @@ namespace csv { } return result; +#endif } } @@ -6608,9 +6634,11 @@ namespace csv { * * @param precision Number of decimal places */ +#ifndef __clang___ inline static void set_decimal_places(int precision) { internals::DECIMAL_PLACES = precision; } +#endif /** @name CSV Writing */ ///@{ @@ -7723,7 +7751,7 @@ namespace csv { for (; start < this->sv.size() && this->sv[start] == ' '; start++); for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++); - unsigned long long int value = 0; + unsigned long long int value_ = 0; size_t digits = (end - start); size_t base16_exponent = digits - 1; @@ -7774,11 +7802,11 @@ namespace csv { return false; } - value += digit * pow(16, base16_exponent); + value_ += digit * pow(16, base16_exponent); base16_exponent--; } - parsedValue = value; + parsedValue = value_; return true; } @@ -7972,7 +8000,8 @@ namespace csv { } // create a result string of necessary size - std::string result(s.size() + space, '\\'); + size_t result_size = s.size() + space; + std::string result(result_size, '\\'); std::size_t pos = 0; for (const auto& c : s) @@ -8047,7 +8076,7 @@ namespace csv { if (c >= 0x00 && c <= 0x1f) { // print character c as \uxxxx - sprintf(&result[pos + 1], "u%04x", int(c)); + snprintf(&result[pos + 1], result_size - pos - 1, "u%04x", int(c)); pos += 6; // overwrite trailing null character result[pos] = '\\'; @@ -8137,6 +8166,7 @@ namespace csv { return ret; } } + /** @file * Calculates statistics from CSV files */ diff --git a/single_include_test/csv.hpp b/single_include_test/csv.hpp index da165928..650ae199 100644 --- a/single_include_test/csv.hpp +++ b/single_include_test/csv.hpp @@ -5301,9 +5301,9 @@ namespace csv { return DataType::CSV_NULL; bool ws_allowed = true, - neg_allowed = true, dot_allowed = true, digit_allowed = true, + is_negative = false, has_digit = false, prob_float = false; @@ -5333,7 +5333,7 @@ namespace csv { return DataType::CSV_STRING; } - neg_allowed = false; + is_negative = true; break; case '.': if (!dot_allowed) { @@ -5357,7 +5357,7 @@ namespace csv { return _process_potential_exponential( in.substr(exponent_start_idx), - neg_allowed ? integral_part + decimal_part : -(integral_part + decimal_part), + is_negative ? -(integral_part + decimal_part) : integral_part + decimal_part, out ); } @@ -5391,7 +5391,7 @@ namespace csv { if (has_digit) { long double number = integral_part + decimal_part; if (out) { - *out = neg_allowed ? number : -number; + *out = is_negative ? -number : number; } return prob_float ? DataType::CSV_DOUBLE : _determine_integral_type(number); @@ -6532,6 +6532,27 @@ namespace csv { namespace internals { static int DECIMAL_PLACES = 5; + /** + * Calculate the number of digits in a number + */ + template< + typename T, + csv::enable_if_t::value, int> = 0 + > + int num_digits(T x) + { + x = abs(x); + + int digits = 0; + + while (x >= 1) { + x /= 10; + digits++; + } + + return digits; + } + /** to_string() for unsigned integers */ template::value, int> = 0> @@ -6566,6 +6587,10 @@ namespace csv { csv::enable_if_t::value, int> = 0 > inline std::string to_string(T value) { +#ifdef __clang__ + return std::to_string(value); +#else + // TODO: Figure out why the below code doesn't work on clang std::string result; T integral_part; @@ -6576,12 +6601,12 @@ namespace csv { if (value < 0) result = "-"; if (integral_part == 0) { + result = "0"; } else { - for (int n_digits = (int)(std::log(integral_part) / std::log(10)); - n_digits + 1 > 0; n_digits --) { - int digit = (int)(std::fmod(integral_part, pow10(n_digits + 1)) / pow10(n_digits)); + for (int n_digits = num_digits(integral_part); n_digits > 0; n_digits --) { + int digit = (int)(std::fmod(integral_part, pow10(n_digits)) / pow10(n_digits - 1)); result += (char)('0' + digit); } } @@ -6601,6 +6626,7 @@ namespace csv { } return result; +#endif } } @@ -6608,9 +6634,11 @@ namespace csv { * * @param precision Number of decimal places */ +#ifndef __clang___ inline static void set_decimal_places(int precision) { internals::DECIMAL_PLACES = precision; } +#endif /** @name CSV Writing */ ///@{ @@ -7723,7 +7751,7 @@ namespace csv { for (; start < this->sv.size() && this->sv[start] == ' '; start++); for (end = start; end < this->sv.size() && this->sv[end] != ' '; end++); - unsigned long long int value = 0; + unsigned long long int value_ = 0; size_t digits = (end - start); size_t base16_exponent = digits - 1; @@ -7774,11 +7802,11 @@ namespace csv { return false; } - value += digit * pow(16, base16_exponent); + value_ += digit * pow(16, base16_exponent); base16_exponent--; } - parsedValue = value; + parsedValue = value_; return true; } @@ -7972,7 +8000,8 @@ namespace csv { } // create a result string of necessary size - std::string result(s.size() + space, '\\'); + size_t result_size = s.size() + space; + std::string result(result_size, '\\'); std::size_t pos = 0; for (const auto& c : s) @@ -8047,7 +8076,7 @@ namespace csv { if (c >= 0x00 && c <= 0x1f) { // print character c as \uxxxx - sprintf(&result[pos + 1], "u%04x", int(c)); + snprintf(&result[pos + 1], result_size - pos - 1, "u%04x", int(c)); pos += 6; // overwrite trailing null character result[pos] = '\\'; @@ -8137,6 +8166,7 @@ namespace csv { return ret; } } + /** @file * Calculates statistics from CSV files */ diff --git a/tests/test_write_csv.cpp b/tests/test_write_csv.cpp index 66261111..b059d11b 100644 --- a/tests/test_write_csv.cpp +++ b/tests/test_write_csv.cpp @@ -11,20 +11,38 @@ using std::vector; using std::string; #ifndef __clang__ -TEST_CASE("Numeric Converter Tests", "[test_convert_number]") { - // Large numbers: integer larger than uint64 capacity - REQUIRE(csv::internals::to_string(200000000000000000000.0) == "200000000000000000000.0"); - REQUIRE(csv::internals::to_string(310000000000000000000.0) == "310000000000000000000.0"); +TEST_CASE("Numeric Converter Tsts", "[test_convert_number]") { + SECTION("num_digits") { + REQUIRE(csv::internals::num_digits(99.0) == 2); + REQUIRE(csv::internals::num_digits(100.0) == 3); + } + + SECTION("Large Numbers") { + // Large numbers: integer larger than uint64 capacity + REQUIRE(csv::internals::to_string(200000000000000000000.0) == "200000000000000000000.0"); + REQUIRE(csv::internals::to_string(310000000000000000000.0) == "310000000000000000000.0"); + } - // Test setting precision - REQUIRE(csv::internals::to_string(1.234) == "1.23400"); - REQUIRE(csv::internals::to_string(20.0045) == "20.00450"); + SECTION("Custom Precision") { + // Test setting precision + REQUIRE(csv::internals::to_string(1.234) == "1.23400"); + REQUIRE(csv::internals::to_string(20.0045) == "20.00450"); - set_decimal_places(2); - REQUIRE(csv::internals::to_string(1.234) == "1.23"); + set_decimal_places(2); + REQUIRE(csv::internals::to_string(1.234) == "1.23"); - // Reset - set_decimal_places(5); + // Reset + set_decimal_places(5); + } + + SECTION("Numbers Close to 10^n - Regression") { + REQUIRE(csv::internals::to_string(10.0) == "10.0"); + REQUIRE(csv::internals::to_string(100.0) == "100.0"); + REQUIRE(csv::internals::to_string(1000.0) == "1000.0"); + REQUIRE(csv::internals::to_string(10000.0) == "10000.0"); + REQUIRE(csv::internals::to_string(100000.0) == "100000.0"); + REQUIRE(csv::internals::to_string(1000000.0) == "1000000.0"); + } } #endif