From d3f73b8d1caf640feb43799c8ee715dfef563ced Mon Sep 17 00:00:00 2001 From: Vincent La Date: Wed, 21 Aug 2019 01:05:14 -0700 Subject: [PATCH] Add support for unsigned integer conversions (#47) * Added support for unsigned integer conversions * Updated single header * Fix uninitialized variable warning in test --- README.md | 6 +++-- include/csv.hpp | 2 +- include/internal/csv_row.hpp | 47 +++++++++++++++++++--------------- single_include/csv.hpp | 49 ++++++++++++++++++++---------------- tests/test_csv_field.cpp | 25 ++++++++++++++++++ tests/test_data_type.cpp | 2 +- 6 files changed, 86 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 0c2ba914..5a553cef 100644 --- a/README.md +++ b/README.md @@ -144,7 +144,8 @@ for (auto& row: reader) { If your CSV has lots of numeric values, you can also have this parser (lazily) convert them to the proper data type. - * Type checking is performed on conversions to prevent undefined behavior and integer overflow. + * Type checking is performed on conversions to prevent undefined behavior and integer overflow + * Negative numbers cannot be blindly converted to unsigned integer types * `get()`, `get()`, and `get()` are capable of parsing numbers written in scientific notation. * **Note:** Conversions to floating point types are not currently checked for loss of precision. @@ -159,7 +160,8 @@ CSVReader reader("very_big_file.csv"); for (auto& row: reader) { if (row["timestamp"].is_int()) { - // Can use get<>() with any signed integer type + // Can use get<>() with any integer type, but negative + // numbers cannot be converted to unsigned types row["timestamp"].get(); // .. diff --git a/include/csv.hpp b/include/csv.hpp index 6ed5b615..b3e0bf4a 100644 --- a/include/csv.hpp +++ b/include/csv.hpp @@ -1,5 +1,5 @@ /* -CSV for C++, version 1.2.0 +CSV for C++, version 1.2.1 https://github.com/vincentlaucsb/csv-parser MIT License diff --git a/include/internal/csv_row.hpp b/include/internal/csv_row.hpp index a8f2bc05..2f9bf8e2 100644 --- a/include/internal/csv_row.hpp +++ b/include/internal/csv_row.hpp @@ -22,11 +22,12 @@ namespace csv { static const std::string ERROR_OVERFLOW = "Overflow error."; static const std::string ERROR_FLOAT_TO_INT = "Attempted to convert a floating point value to an integral type."; + static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; } /** * @class CSVField - * @brief Data type representing individual CSV values. + * @brief Data type representing individual CSV values. * CSVFields can be obtained by using CSVRow::operator[] */ class CSVField { @@ -66,19 +67,25 @@ namespace csv { * numeric value. * */ - template T get() { - static_assert(!std::is_unsigned::value, "Conversions to unsigned types are not supported."); - - IF_CONSTEXPR (std::is_arithmetic::value) { + template T get() { + IF_CONSTEXPR(std::is_arithmetic::value) { + // Note: this->type() also converts the CSV value to float if (this->type() <= CSV_STRING) { throw std::runtime_error(internals::ERROR_NAN); } } IF_CONSTEXPR(std::is_integral::value) { + // Note: this->is_float() also converts the CSV value to float if (this->is_float()) { throw std::runtime_error(internals::ERROR_FLOAT_TO_INT); } + + IF_CONSTEXPR(std::is_unsigned::value) { + if (this->value < 0) { + throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED); + } + } } // Allow fallthrough from previous if branch @@ -90,7 +97,7 @@ namespace csv { return static_cast(this->value); } - + /** Compares the contents of this field to a numeric value. If this * field does not contain a numeric value, then all comparisons return * false. @@ -125,7 +132,7 @@ namespace csv { return internals::is_equal(out, static_cast(other), 0.000001L); } - + /** Returns true if field is an empty string or string of whitespace characters */ CONSTEXPR bool is_null() { return type() == CSV_NULL; } @@ -179,7 +186,7 @@ namespace csv { }; /** Constructor for testing */ - CSVRow(const std::string& str, const std::vector& splits, + CSVRow(const std::string& str, const std::vector& splits, const std::shared_ptr& col_names) : CSVRow(internals::BufferPtr(new internals::RawRowBuffer(str, splits, col_names))) {}; @@ -207,21 +214,21 @@ namespace csv { */ class iterator { public: - #ifndef DOXYGEN_SHOULD_SKIP_THIS +#ifndef DOXYGEN_SHOULD_SKIP_THIS using value_type = CSVField; using difference_type = int; // Using CSVField * as pointer type causes segfaults in MSVC debug builds // but using shared_ptr as pointer type won't compile in g++ - #ifdef _MSC_BUILD - using pointer = std::shared_ptr ; - #else +#ifdef _MSC_BUILD + using pointer = std::shared_ptr; +#else using pointer = CSVField * ; - #endif +#endif using reference = CSVField & ; using iterator_category = std::random_access_iterator_tag; - #endif +#endif iterator(const CSVRow*, int i); @@ -238,9 +245,9 @@ namespace csv { bool operator==(const iterator&) const; bool operator!=(const iterator& other) const { return !operator==(other); } - #ifndef NDEBUG +#ifndef NDEBUG friend CSVRow; - #endif +#endif private: const CSVRow * daddy = nullptr; // Pointer to parent @@ -254,7 +261,7 @@ namespace csv { /** @name Iterators * @brief Each iterator points to a CSVField object. */ - ///@{ + ///@{ iterator begin() const; iterator end() const; reverse_iterator rbegin() const; @@ -265,8 +272,8 @@ namespace csv { /** Get the index in CSVRow's text buffer where the n-th field begins */ unsigned short split_at(size_t n) const; - internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */ - csv::string_view row_str = ""; /**< Text data for this row */ + internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */ + csv::string_view row_str = ""; /**< Text data for this row */ size_t start; /**< Where in split buffer this row begins */ unsigned short n_cols; /**< Numbers of columns this row has */ }; @@ -280,7 +287,7 @@ namespace csv { /** Retrieve a view over this field's string * - * @warning This string_view is only guaranteed to be valid as long as this + * @warning This string_view is only guaranteed to be valid as long as this * CSVRow is still alive. */ template<> diff --git a/single_include/csv.hpp b/single_include/csv.hpp index 50ea5144..b5187601 100644 --- a/single_include/csv.hpp +++ b/single_include/csv.hpp @@ -1,6 +1,6 @@ #pragma once /* -CSV for C++, version 1.2.0 +CSV for C++, version 1.2.1 https://github.com/vincentlaucsb/csv-parser MIT License @@ -3717,11 +3717,12 @@ namespace csv { static const std::string ERROR_OVERFLOW = "Overflow error."; static const std::string ERROR_FLOAT_TO_INT = "Attempted to convert a floating point value to an integral type."; + static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; } /** * @class CSVField - * @brief Data type representing individual CSV values. + * @brief Data type representing individual CSV values. * CSVFields can be obtained by using CSVRow::operator[] */ class CSVField { @@ -3761,19 +3762,25 @@ namespace csv { * numeric value. * */ - template T get() { - static_assert(!std::is_unsigned::value, "Conversions to unsigned types are not supported."); - - IF_CONSTEXPR (std::is_arithmetic::value) { + template T get() { + IF_CONSTEXPR(std::is_arithmetic::value) { + // Note: this->type() also converts the CSV value to float if (this->type() <= CSV_STRING) { throw std::runtime_error(internals::ERROR_NAN); } } IF_CONSTEXPR(std::is_integral::value) { + // Note: this->is_float() also converts the CSV value to float if (this->is_float()) { throw std::runtime_error(internals::ERROR_FLOAT_TO_INT); } + + IF_CONSTEXPR(std::is_unsigned::value) { + if (this->value < 0) { + throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED); + } + } } // Allow fallthrough from previous if branch @@ -3785,7 +3792,7 @@ namespace csv { return static_cast(this->value); } - + /** Compares the contents of this field to a numeric value. If this * field does not contain a numeric value, then all comparisons return * false. @@ -3820,7 +3827,7 @@ namespace csv { return internals::is_equal(out, static_cast(other), 0.000001L); } - + /** Returns true if field is an empty string or string of whitespace characters */ CONSTEXPR bool is_null() { return type() == CSV_NULL; } @@ -3874,7 +3881,7 @@ namespace csv { }; /** Constructor for testing */ - CSVRow(const std::string& str, const std::vector& splits, + CSVRow(const std::string& str, const std::vector& splits, const std::shared_ptr& col_names) : CSVRow(internals::BufferPtr(new internals::RawRowBuffer(str, splits, col_names))) {}; @@ -3902,21 +3909,21 @@ namespace csv { */ class iterator { public: - #ifndef DOXYGEN_SHOULD_SKIP_THIS +#ifndef DOXYGEN_SHOULD_SKIP_THIS using value_type = CSVField; using difference_type = int; // Using CSVField * as pointer type causes segfaults in MSVC debug builds // but using shared_ptr as pointer type won't compile in g++ - #ifdef _MSC_BUILD - using pointer = std::shared_ptr ; - #else +#ifdef _MSC_BUILD + using pointer = std::shared_ptr; +#else using pointer = CSVField * ; - #endif +#endif using reference = CSVField & ; using iterator_category = std::random_access_iterator_tag; - #endif +#endif iterator(const CSVRow*, int i); @@ -3933,9 +3940,9 @@ namespace csv { bool operator==(const iterator&) const; bool operator!=(const iterator& other) const { return !operator==(other); } - #ifndef NDEBUG +#ifndef NDEBUG friend CSVRow; - #endif +#endif private: const CSVRow * daddy = nullptr; // Pointer to parent @@ -3949,7 +3956,7 @@ namespace csv { /** @name Iterators * @brief Each iterator points to a CSVField object. */ - ///@{ + ///@{ iterator begin() const; iterator end() const; reverse_iterator rbegin() const; @@ -3960,8 +3967,8 @@ namespace csv { /** Get the index in CSVRow's text buffer where the n-th field begins */ unsigned short split_at(size_t n) const; - internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */ - csv::string_view row_str = ""; /**< Text data for this row */ + internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */ + csv::string_view row_str = ""; /**< Text data for this row */ size_t start; /**< Where in split buffer this row begins */ unsigned short n_cols; /**< Numbers of columns this row has */ }; @@ -3975,7 +3982,7 @@ namespace csv { /** Retrieve a view over this field's string * - * @warning This string_view is only guaranteed to be valid as long as this + * @warning This string_view is only guaranteed to be valid as long as this * CSVRow is still alive. */ template<> diff --git a/tests/test_csv_field.cpp b/tests/test_csv_field.cpp index bd634a3e..6d995aed 100644 --- a/tests/test_csv_field.cpp +++ b/tests/test_csv_field.cpp @@ -59,6 +59,14 @@ TEST_CASE("CSVField get<>() - Integral Value", "[test_csv_field_get_int]") { REQUIRE(ex_caught); } +// Test converting a small integer to unsigned and signed integer types +TEMPLATE_TEST_CASE("CSVField get<>() - Integral Value to Int", "[test_csv_field_get_int]", + unsigned char, unsigned short, unsigned int, unsigned long long, + char, short, int, long long int) { + CSVField savage("21"); + REQUIRE(savage.get() == 21); +} + TEST_CASE("CSVField get<>() - Floating Point Value", "[test_csv_field_get_float]") { CSVField euler("2.718"); REQUIRE(euler.get<>() == "2.718"); @@ -69,6 +77,7 @@ TEST_CASE("CSVField get<>() - Floating Point Value", "[test_csv_field_get_float] } TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_get_float_as_int]", + unsigned char, unsigned short, unsigned int, unsigned long long int, signed char, short, int, long long int) { CSVField euler("2.718"); bool ex_caught = false; @@ -84,6 +93,22 @@ TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_ REQUIRE(ex_caught); } +TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Negative to Unsigned", "[test_csv_field_no_unsigned_neg]", + unsigned char, unsigned short, unsigned int, unsigned long long int) { + CSVField neg("-1337"); + bool ex_caught = false; + + try { + neg.get(); + } + catch (std::runtime_error& err) { + REQUIRE(err.what() == csv::internals::ERROR_NEG_TO_UNSIGNED); + ex_caught = true; + } + + REQUIRE(ex_caught); +} + TEST_CASE("CSVField Equality Operator", "[test_csv_field_operator==]") { CSVField field("3.14"); REQUIRE(field == "3.14"); diff --git a/tests/test_data_type.cpp b/tests/test_data_type.cpp index bfeb3393..20c08f10 100644 --- a/tests/test_data_type.cpp +++ b/tests/test_data_type.cpp @@ -76,7 +76,7 @@ TEST_CASE( "Recognize Sub-Unit Double Values", "[regression_double]" ) { TEST_CASE( "Recognize Double Values", "[regression_double2]" ) { // Test converting double values back and forth - long double out; + long double out = -1.0; std::string s; for (long double i = 0; i <= 2.0; i += 0.01) {