Skip to content

Commit

Permalink
Add support for unsigned integer conversions (#47)
Browse files Browse the repository at this point in the history
* Added support for unsigned integer conversions

* Updated single header

* Fix uninitialized variable warning in test
  • Loading branch information
vincentlaucsb committed Aug 21, 2019
1 parent 7db184d commit d3f73b8
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 45 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,8 @@ for (auto& row: reader) {
If your CSV has lots of numeric values, you can also have this parser (lazily)
convert them to the proper data type.
* Type checking is performed on conversions to prevent undefined behavior and integer overflow.
* Type checking is performed on conversions to prevent undefined behavior and integer overflow
* Negative numbers cannot be blindly converted to unsigned integer types
* `get<float>()`, `get<double>()`, and `get<long double>()` are capable of parsing numbers written in scientific notation.
* **Note:** Conversions to floating point types are not currently checked for loss of precision.
Expand All @@ -159,7 +160,8 @@ CSVReader reader("very_big_file.csv");
for (auto& row: reader) {
if (row["timestamp"].is_int()) {
// Can use get<>() with any signed integer type
// Can use get<>() with any integer type, but negative
// numbers cannot be converted to unsigned types
row["timestamp"].get<int>();
// ..
Expand Down
2 changes: 1 addition & 1 deletion include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
CSV for C++, version 1.2.0
CSV for C++, version 1.2.1
https://github.com/vincentlaucsb/csv-parser
MIT License
Expand Down
47 changes: 27 additions & 20 deletions include/internal/csv_row.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,12 @@ namespace csv {
static const std::string ERROR_OVERFLOW = "Overflow error.";
static const std::string ERROR_FLOAT_TO_INT =
"Attempted to convert a floating point value to an integral type.";
static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types.";
}

/**
* @class CSVField
* @brief Data type representing individual CSV values.
* @brief Data type representing individual CSV values.
* CSVFields can be obtained by using CSVRow::operator[]
*/
class CSVField {
Expand Down Expand Up @@ -66,19 +67,25 @@ namespace csv {
* numeric value.
*
*/
template<typename T=std::string> T get() {
static_assert(!std::is_unsigned<T>::value, "Conversions to unsigned types are not supported.");

IF_CONSTEXPR (std::is_arithmetic<T>::value) {
template<typename T = std::string> T get() {
IF_CONSTEXPR(std::is_arithmetic<T>::value) {
// Note: this->type() also converts the CSV value to float
if (this->type() <= CSV_STRING) {
throw std::runtime_error(internals::ERROR_NAN);
}
}

IF_CONSTEXPR(std::is_integral<T>::value) {
// Note: this->is_float() also converts the CSV value to float
if (this->is_float()) {
throw std::runtime_error(internals::ERROR_FLOAT_TO_INT);
}

IF_CONSTEXPR(std::is_unsigned<T>::value) {
if (this->value < 0) {
throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED);
}
}
}

// Allow fallthrough from previous if branch
Expand All @@ -90,7 +97,7 @@ namespace csv {

return static_cast<T>(this->value);
}

/** Compares the contents of this field to a numeric value. If this
* field does not contain a numeric value, then all comparisons return
* false.
Expand Down Expand Up @@ -125,7 +132,7 @@ namespace csv {

return internals::is_equal(out, static_cast<long double>(other), 0.000001L);
}

/** Returns true if field is an empty string or string of whitespace characters */
CONSTEXPR bool is_null() { return type() == CSV_NULL; }

Expand Down Expand Up @@ -179,7 +186,7 @@ namespace csv {
};

/** Constructor for testing */
CSVRow(const std::string& str, const std::vector<unsigned short>& splits,
CSVRow(const std::string& str, const std::vector<unsigned short>& splits,
const std::shared_ptr<internals::ColNames>& col_names)
: CSVRow(internals::BufferPtr(new internals::RawRowBuffer(str, splits, col_names))) {};

Expand Down Expand Up @@ -207,21 +214,21 @@ namespace csv {
*/
class iterator {
public:
#ifndef DOXYGEN_SHOULD_SKIP_THIS
#ifndef DOXYGEN_SHOULD_SKIP_THIS
using value_type = CSVField;
using difference_type = int;

// Using CSVField * as pointer type causes segfaults in MSVC debug builds
// but using shared_ptr as pointer type won't compile in g++
#ifdef _MSC_BUILD
using pointer = std::shared_ptr<CSVField> ;
#else
#ifdef _MSC_BUILD
using pointer = std::shared_ptr<CSVField>;
#else
using pointer = CSVField * ;
#endif
#endif

using reference = CSVField & ;
using iterator_category = std::random_access_iterator_tag;
#endif
#endif

iterator(const CSVRow*, int i);

Expand All @@ -238,9 +245,9 @@ namespace csv {
bool operator==(const iterator&) const;
bool operator!=(const iterator& other) const { return !operator==(other); }

#ifndef NDEBUG
#ifndef NDEBUG
friend CSVRow;
#endif
#endif

private:
const CSVRow * daddy = nullptr; // Pointer to parent
Expand All @@ -254,7 +261,7 @@ namespace csv {
/** @name Iterators
* @brief Each iterator points to a CSVField object.
*/
///@{
///@{
iterator begin() const;
iterator end() const;
reverse_iterator rbegin() const;
Expand All @@ -265,8 +272,8 @@ namespace csv {
/** Get the index in CSVRow's text buffer where the n-th field begins */
unsigned short split_at(size_t n) const;

internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */
csv::string_view row_str = ""; /**< Text data for this row */
internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */
csv::string_view row_str = ""; /**< Text data for this row */
size_t start; /**< Where in split buffer this row begins */
unsigned short n_cols; /**< Numbers of columns this row has */
};
Expand All @@ -280,7 +287,7 @@ namespace csv {

/** Retrieve a view over this field's string
*
* @warning This string_view is only guaranteed to be valid as long as this
* @warning This string_view is only guaranteed to be valid as long as this
* CSVRow is still alive.
*/
template<>
Expand Down
49 changes: 28 additions & 21 deletions single_include/csv.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#pragma once
/*
CSV for C++, version 1.2.0
CSV for C++, version 1.2.1
https://github.com/vincentlaucsb/csv-parser
MIT License
Expand Down Expand Up @@ -3717,11 +3717,12 @@ namespace csv {
static const std::string ERROR_OVERFLOW = "Overflow error.";
static const std::string ERROR_FLOAT_TO_INT =
"Attempted to convert a floating point value to an integral type.";
static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types.";
}

/**
* @class CSVField
* @brief Data type representing individual CSV values.
* @brief Data type representing individual CSV values.
* CSVFields can be obtained by using CSVRow::operator[]
*/
class CSVField {
Expand Down Expand Up @@ -3761,19 +3762,25 @@ namespace csv {
* numeric value.
*
*/
template<typename T=std::string> T get() {
static_assert(!std::is_unsigned<T>::value, "Conversions to unsigned types are not supported.");

IF_CONSTEXPR (std::is_arithmetic<T>::value) {
template<typename T = std::string> T get() {
IF_CONSTEXPR(std::is_arithmetic<T>::value) {
// Note: this->type() also converts the CSV value to float
if (this->type() <= CSV_STRING) {
throw std::runtime_error(internals::ERROR_NAN);
}
}

IF_CONSTEXPR(std::is_integral<T>::value) {
// Note: this->is_float() also converts the CSV value to float
if (this->is_float()) {
throw std::runtime_error(internals::ERROR_FLOAT_TO_INT);
}

IF_CONSTEXPR(std::is_unsigned<T>::value) {
if (this->value < 0) {
throw std::runtime_error(internals::ERROR_NEG_TO_UNSIGNED);
}
}
}

// Allow fallthrough from previous if branch
Expand All @@ -3785,7 +3792,7 @@ namespace csv {

return static_cast<T>(this->value);
}

/** Compares the contents of this field to a numeric value. If this
* field does not contain a numeric value, then all comparisons return
* false.
Expand Down Expand Up @@ -3820,7 +3827,7 @@ namespace csv {

return internals::is_equal(out, static_cast<long double>(other), 0.000001L);
}

/** Returns true if field is an empty string or string of whitespace characters */
CONSTEXPR bool is_null() { return type() == CSV_NULL; }

Expand Down Expand Up @@ -3874,7 +3881,7 @@ namespace csv {
};

/** Constructor for testing */
CSVRow(const std::string& str, const std::vector<unsigned short>& splits,
CSVRow(const std::string& str, const std::vector<unsigned short>& splits,
const std::shared_ptr<internals::ColNames>& col_names)
: CSVRow(internals::BufferPtr(new internals::RawRowBuffer(str, splits, col_names))) {};

Expand Down Expand Up @@ -3902,21 +3909,21 @@ namespace csv {
*/
class iterator {
public:
#ifndef DOXYGEN_SHOULD_SKIP_THIS
#ifndef DOXYGEN_SHOULD_SKIP_THIS
using value_type = CSVField;
using difference_type = int;

// Using CSVField * as pointer type causes segfaults in MSVC debug builds
// but using shared_ptr as pointer type won't compile in g++
#ifdef _MSC_BUILD
using pointer = std::shared_ptr<CSVField> ;
#else
#ifdef _MSC_BUILD
using pointer = std::shared_ptr<CSVField>;
#else
using pointer = CSVField * ;
#endif
#endif

using reference = CSVField & ;
using iterator_category = std::random_access_iterator_tag;
#endif
#endif

iterator(const CSVRow*, int i);

Expand All @@ -3933,9 +3940,9 @@ namespace csv {
bool operator==(const iterator&) const;
bool operator!=(const iterator& other) const { return !operator==(other); }

#ifndef NDEBUG
#ifndef NDEBUG
friend CSVRow;
#endif
#endif

private:
const CSVRow * daddy = nullptr; // Pointer to parent
Expand All @@ -3949,7 +3956,7 @@ namespace csv {
/** @name Iterators
* @brief Each iterator points to a CSVField object.
*/
///@{
///@{
iterator begin() const;
iterator end() const;
reverse_iterator rbegin() const;
Expand All @@ -3960,8 +3967,8 @@ namespace csv {
/** Get the index in CSVRow's text buffer where the n-th field begins */
unsigned short split_at(size_t n) const;

internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */
csv::string_view row_str = ""; /**< Text data for this row */
internals::BufferPtr buffer = nullptr; /**< Memory buffer containing data for this row. */
csv::string_view row_str = ""; /**< Text data for this row */
size_t start; /**< Where in split buffer this row begins */
unsigned short n_cols; /**< Numbers of columns this row has */
};
Expand All @@ -3975,7 +3982,7 @@ namespace csv {

/** Retrieve a view over this field's string
*
* @warning This string_view is only guaranteed to be valid as long as this
* @warning This string_view is only guaranteed to be valid as long as this
* CSVRow is still alive.
*/
template<>
Expand Down
25 changes: 25 additions & 0 deletions tests/test_csv_field.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ TEST_CASE("CSVField get<>() - Integral Value", "[test_csv_field_get_int]") {
REQUIRE(ex_caught);
}

// Test converting a small integer to unsigned and signed integer types
TEMPLATE_TEST_CASE("CSVField get<>() - Integral Value to Int", "[test_csv_field_get_int]",
unsigned char, unsigned short, unsigned int, unsigned long long,
char, short, int, long long int) {
CSVField savage("21");
REQUIRE(savage.get<TestType>() == 21);
}

TEST_CASE("CSVField get<>() - Floating Point Value", "[test_csv_field_get_float]") {
CSVField euler("2.718");
REQUIRE(euler.get<>() == "2.718");
Expand All @@ -69,6 +77,7 @@ TEST_CASE("CSVField get<>() - Floating Point Value", "[test_csv_field_get_float]
}

TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_get_float_as_int]",
unsigned char, unsigned short, unsigned int, unsigned long long int,
signed char, short, int, long long int) {
CSVField euler("2.718");
bool ex_caught = false;
Expand All @@ -84,6 +93,22 @@ TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Float to Int", "[test_csv_field_
REQUIRE(ex_caught);
}

TEMPLATE_TEST_CASE("CSVField get<>() - Disallow Negative to Unsigned", "[test_csv_field_no_unsigned_neg]",
unsigned char, unsigned short, unsigned int, unsigned long long int) {
CSVField neg("-1337");
bool ex_caught = false;

try {
neg.get<TestType>();
}
catch (std::runtime_error& err) {
REQUIRE(err.what() == csv::internals::ERROR_NEG_TO_UNSIGNED);
ex_caught = true;
}

REQUIRE(ex_caught);
}

TEST_CASE("CSVField Equality Operator", "[test_csv_field_operator==]") {
CSVField field("3.14");
REQUIRE(field == "3.14");
Expand Down
2 changes: 1 addition & 1 deletion tests/test_data_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ TEST_CASE( "Recognize Sub-Unit Double Values", "[regression_double]" ) {

TEST_CASE( "Recognize Double Values", "[regression_double2]" ) {
// Test converting double values back and forth
long double out;
long double out = -1.0;
std::string s;

for (long double i = 0; i <= 2.0; i += 0.01) {
Expand Down

0 comments on commit d3f73b8

Please sign in to comment.