diff --git a/README.md b/README.md index 5a553cef..a593e26d 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,7 @@ * [Specifying the CSV Format](#specifying-the-csv-format) * [Trimming Whitespace](#trimming-whitespace) * [Setting Column Names](#setting-column-names) + * [Converting to JSON](#converting-to-json) * [Parsing an In-Memory String](#parsing-an-in-memory-string) * [Writing CSV Files](#writing-csv-files) * [Contributing](#contributing) @@ -160,7 +161,7 @@ CSVReader reader("very_big_file.csv"); for (auto& row: reader) { if (row["timestamp"].is_int()) { - // Can use get<>() with any integer type, but negative + // Can use get<>() with any integer type, but negative // numbers cannot be converted to unsigned types row["timestamp"].get(); @@ -170,6 +171,35 @@ for (auto& row: reader) { ``` +### Converting to JSON +You can serialize individual rows as JSON objects, where the keys are column names, or as +JSON arrays (which don't contain column names). The outputted JSON contains properly escaped +strings with minimal whitespace and no quoting for numeric values. How these JSON fragments are +assembled into a larger JSON document is an exercise left for the user. + +```cpp +# include +# include "csv.hpp" + +using namespace csv; + +... + +CSVReader reader("very_big_file.csv"); +std::stringstream my_json; + +for (auto& row: reader) { + my_json << row.to_json() << std::endl; + my_json << row.to_json_array() << std::endl; + + // You can pass in a vector of column names to + // slice or rearrange the outputted JSON + my_json << row.to_json({ "A", "B", "C" }) << std::endl; + my_json << row.to_json_array({ "C", "B", "A" }) << std::endl; +} + +``` + ### Specifying the CSV Format Although the CSV parser has a decent guessing mechanism, in some cases it is preferrable to specify the exact parameters of a file. @@ -214,7 +244,6 @@ CSVFormat format; format.set_column_names(col_names); ``` - ### Parsing an In-Memory String ```cpp diff --git a/docs/source/Doxy.md b/docs/source/Doxy.md index 4547bd45..3b291414 100644 --- a/docs/source/Doxy.md +++ b/docs/source/Doxy.md @@ -30,6 +30,8 @@ For quick examples, go to this project's [GitHub page](https://github.com/vincen * csv::CSVRow::iterator * csv::CSVRow::begin() * csv::CSVRow::end() + * csv::CSVRow::to_json() + * csv::CSVRow::to_json_array() * csv::CSVField * csv::CSVField::get(): \copybrief csv::CSVField::get() * csv::CSVField::operator==() diff --git a/include/csv.hpp b/include/csv.hpp index b3e0bf4a..dcfcab1e 100644 --- a/include/csv.hpp +++ b/include/csv.hpp @@ -1,5 +1,5 @@ /* -CSV for C++, version 1.2.1 +CSV for C++, version 1.2.2 https://github.com/vincentlaucsb/csv-parser MIT License diff --git a/include/internal/CMakeLists.txt b/include/internal/CMakeLists.txt index 99fae94a..9a37fb17 100644 --- a/include/internal/CMakeLists.txt +++ b/include/internal/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(csv csv_reader_iterator.cpp csv_row.hpp csv_row.cpp + csv_row_json.cpp csv_stat.cpp csv_stat.hpp csv_utility.cpp diff --git a/include/internal/csv_format.cpp b/include/internal/csv_format.cpp index 6911e2e4..9f67fc32 100644 --- a/include/internal/csv_format.cpp +++ b/include/internal/csv_format.cpp @@ -2,6 +2,9 @@ * Defines an object used to store CSV format settings */ +#include +#include + #include "csv_format.hpp" namespace csv { @@ -31,21 +34,25 @@ namespace csv { CSVFormat& CSVFormat::delimiter(char delim) { this->possible_delimiters = { delim }; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::delimiter(const std::vector & delim) { this->possible_delimiters = delim; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::quote(char quote) { this->quote_char = quote; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::trim(const std::vector & chars) { this->trim_chars = chars; + this->assert_no_char_overlap(); return *this; } @@ -70,4 +77,47 @@ namespace csv { this->unicode_detect = detect; return *this; } + + void CSVFormat::assert_no_char_overlap() + { + auto delims = std::set( + this->possible_delimiters.begin(), this->possible_delimiters.end()), + trims = std::set( + this->trim_chars.begin(), this->trim_chars.end()); + + // Stores intersection of possible delimiters and trim characters + std::vector intersection = {}; + + // Find which characters overlap, if any + std::set_intersection( + delims.begin(), delims.end(), + trims.begin(), trims.end(), + std::back_inserter(intersection)); + + // Make sure quote character is not contained in possible delimiters + // or whitespace characters + if (delims.find(this->quote_char) != delims.end() || + trims.find(this->quote_char) != trims.end()) { + intersection.push_back(this->quote_char); + } + + if (!intersection.empty()) { + std::string err_msg = "There should be no overlap between the quote character, " + "the set of possible delimiters " + "and the set of whitespace characters. Offending characters: "; + + // Create a pretty error message with the list of overlapping + // characters + for (size_t i = 0; i < intersection.size(); i++) { + err_msg += "'"; + err_msg += intersection[i]; + err_msg += "'"; + + if (i + 1 < intersection.size()) + err_msg += ", "; + } + + throw std::runtime_error(err_msg + '.'); + } + } } \ No newline at end of file diff --git a/include/internal/csv_format.hpp b/include/internal/csv_format.hpp index 431e0c6f..c99b77b2 100644 --- a/include/internal/csv_format.hpp +++ b/include/internal/csv_format.hpp @@ -24,22 +24,30 @@ namespace csv { /** Settings for parsing a RFC 4180 CSV file */ CSVFormat() = default; - /** Sets the delimiter of the CSV file */ + /** Sets the delimiter of the CSV file + * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap + */ CSVFormat& delimiter(char delim); /** Sets a list of potential delimiters * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap * @param[in] delim An array of possible delimiters to try parsing the CSV with */ CSVFormat& delimiter(const std::vector & delim); /** Sets the whitespace characters to be trimmed * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap * @param[in] ws An array of whitespace characters that should be trimmed */ CSVFormat& trim(const std::vector & ws); - /** Sets the quote character */ + /** Sets the quote character + * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap + */ CSVFormat& quote(char quote); /** Sets the column names. @@ -89,6 +97,9 @@ namespace csv { return this->possible_delimiters.size() > 1; } + /**< Throws an error if delimiters and trim characters overlap */ + void assert_no_char_overlap(); + /**< Set of possible delimiters */ std::vector possible_delimiters = { ',' }; diff --git a/include/internal/csv_row.hpp b/include/internal/csv_row.hpp index 2f9bf8e2..c80d25f2 100644 --- a/include/internal/csv_row.hpp +++ b/include/internal/csv_row.hpp @@ -23,6 +23,8 @@ namespace csv { static const std::string ERROR_FLOAT_TO_INT = "Attempted to convert a floating point value to an integral type."; static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; + + std::string json_escape_string(csv::string_view s) noexcept; } /** @@ -201,6 +203,8 @@ namespace csv { CSVField operator[](size_t n) const; CSVField operator[](const std::string&) const; csv::string_view get_string_view(size_t n) const; + std::string to_json(const std::vector& subset = {}) const; + std::string to_json_array(const std::vector& subset = {}) const; /** Convert this CSVRow into a vector of strings. * **Note**: This is a less efficient method of diff --git a/include/internal/csv_row_json.cpp b/include/internal/csv_row_json.cpp new file mode 100644 index 00000000..aabd548e --- /dev/null +++ b/include/internal/csv_row_json.cpp @@ -0,0 +1,257 @@ +#include "csv_row.hpp" + +namespace csv { + /* + The implementations for json_extra_space() and json_escape_string() + were modified from source code for JSON for Modern C++. + + The respective license is below: + + The code is licensed under the [MIT + License](http://opensource.org/licenses/MIT): + + Copyright © 2013-2015 Niels Lohmann. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + + namespace internals { + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t json_extra_space(csv::string_view& s) noexcept + { + std::size_t result = 0; + + + for (const auto& c : s) + { + switch (c) + { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + result += 1; + break; + } + + + default: + { + if (c >= 0x00 && c <= 0x1f) + { + // from c (1 byte) to \uxxxx (6 bytes) + result += 5; + } + break; + } + } + } + + + return result; + } + + std::string json_escape_string(csv::string_view s) noexcept + { + const auto space = json_extra_space(s); + if (space == 0) + { + return std::string(s); + } + + // create a result string of necessary size + std::string result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + + default: + { + if (c >= 0x00 && c <= 0x1f) + { + // print character c as \uxxxx + sprintf(&result[pos + 1], "u%04x", int(c)); + pos += 6; + // overwrite trailing null character + result[pos] = '\\'; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; + } + } + + /** Convert a CSV row to a JSON object, i.e. + * `{"col1":"value1","col2":"value2"}` + * + * @note All strings are properly escaped. Numeric values are not quoted. + * @param[in] subset A subset of columns to contain in the JSON. + * Leave empty for original columns. + */ + std::string CSVRow::to_json(const std::vector& subset) const { + std::vector col_names = subset; + if (subset.empty()) { + col_names = this->buffer->col_names->get_col_names(); + } + + const size_t n_cols = col_names.size(); + std::string ret = "{"; + + for (size_t i = 0; i < n_cols; i++) { + auto& col = col_names[i]; + auto field = this->operator[](col); + + // TODO: Possible performance enhancements by caching escaped column names + ret += '"' + internals::json_escape_string(col) + "\":"; + + // Add quotes around strings but not numbers + if (field.is_num()) + ret += internals::json_escape_string(field.get()); + else + ret += '"' + internals::json_escape_string(field.get()) + '"'; + + // Do not add comma after last string + if (i + 1 < n_cols) + ret += ','; + } + + ret += '}'; + return ret; + } + + /** Convert a CSV row to a JSON array, i.e. + * `["value1","value2",...]` + * + * @note All strings are properly escaped. Numeric values are not quoted. + * @param[in] subset A subset of columns to contain in the JSON. + * Leave empty for all columns. + */ + std::string CSVRow::to_json_array(const std::vector& subset) const { + std::vector col_names = subset; + if (subset.empty()) + col_names = this->buffer->col_names->get_col_names(); + + const size_t n_cols = col_names.size(); + std::string ret = "["; + + for (size_t i = 0; i < n_cols; i++) { + auto field = this->operator[](col_names[i]); + + // Add quotes around strings but not numbers + if (field.is_num()) + ret += internals::json_escape_string(field.get()); + else + ret += '"' + internals::json_escape_string(field.get()) + '"'; + + // Do not add comma after last string + if (i + 1 < n_cols) + ret += ','; + } + + ret += ']'; + return ret; + } +} \ No newline at end of file diff --git a/single_include/csv.hpp b/single_include/csv.hpp index b5187601..dc86999c 100644 --- a/single_include/csv.hpp +++ b/single_include/csv.hpp @@ -1,6 +1,6 @@ #pragma once /* -CSV for C++, version 1.2.1 +CSV for C++, version 1.2.2 https://github.com/vincentlaucsb/csv-parser MIT License @@ -2950,22 +2950,30 @@ namespace csv { /** Settings for parsing a RFC 4180 CSV file */ CSVFormat() = default; - /** Sets the delimiter of the CSV file */ + /** Sets the delimiter of the CSV file + * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap + */ CSVFormat& delimiter(char delim); /** Sets a list of potential delimiters * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap * @param[in] delim An array of possible delimiters to try parsing the CSV with */ CSVFormat& delimiter(const std::vector & delim); /** Sets the whitespace characters to be trimmed * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap * @param[in] ws An array of whitespace characters that should be trimmed */ CSVFormat& trim(const std::vector & ws); - /** Sets the quote character */ + /** Sets the quote character + * + * @throws `std::runtime_error` thrown if trim, quote, or possible delimiting characters overlap + */ CSVFormat& quote(char quote); /** Sets the column names. @@ -3015,6 +3023,9 @@ namespace csv { return this->possible_delimiters.size() > 1; } + /**< Throws an error if delimiters and trim characters overlap */ + void assert_no_char_overlap(); + /**< Set of possible delimiters */ std::vector possible_delimiters = { ',' }; @@ -3718,6 +3729,8 @@ namespace csv { static const std::string ERROR_FLOAT_TO_INT = "Attempted to convert a floating point value to an integral type."; static const std::string ERROR_NEG_TO_UNSIGNED = "Negative numbers cannot be converted to unsigned types."; + + std::string json_escape_string(csv::string_view s) noexcept; } /** @@ -3896,6 +3909,8 @@ namespace csv { CSVField operator[](size_t n) const; CSVField operator[](const std::string&) const; csv::string_view get_string_view(size_t n) const; + std::string to_json(const std::vector& subset = {}) const; + std::string to_json_array(const std::vector& subset = {}) const; /** Convert this CSVRow into a vector of strings. * **Note**: This is a less efficient method of @@ -4354,6 +4369,9 @@ namespace csv { * Defines an object used to store CSV format settings */ +#include +#include + namespace csv { CSVFormat create_default_csv_strict() { @@ -4382,21 +4400,25 @@ namespace csv { CSVFormat& CSVFormat::delimiter(char delim) { this->possible_delimiters = { delim }; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::delimiter(const std::vector & delim) { this->possible_delimiters = delim; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::quote(char quote) { this->quote_char = quote; + this->assert_no_char_overlap(); return *this; } CSVFormat& CSVFormat::trim(const std::vector & chars) { this->trim_chars = chars; + this->assert_no_char_overlap(); return *this; } @@ -4421,6 +4443,49 @@ namespace csv { this->unicode_detect = detect; return *this; } + + void CSVFormat::assert_no_char_overlap() + { + auto delims = std::set( + this->possible_delimiters.begin(), this->possible_delimiters.end()), + trims = std::set( + this->trim_chars.begin(), this->trim_chars.end()); + + // Stores intersection of possible delimiters and trim characters + std::vector intersection = {}; + + // Find which characters overlap, if any + std::set_intersection( + delims.begin(), delims.end(), + trims.begin(), trims.end(), + std::back_inserter(intersection)); + + // Make sure quote character is not contained in possible delimiters + // or whitespace characters + if (delims.find(this->quote_char) != delims.end() || + trims.find(this->quote_char) != trims.end()) { + intersection.push_back(this->quote_char); + } + + if (!intersection.empty()) { + std::string err_msg = "There should be no overlap between the quote character, " + "the set of possible delimiters " + "and the set of whitespace characters. Offending characters: "; + + // Create a pretty error message with the list of overlapping + // characters + for (size_t i = 0; i < intersection.size(); i++) { + err_msg += "'"; + err_msg += intersection[i]; + err_msg += "'"; + + if (i + 1 < intersection.size()) + err_msg += ", "; + } + + throw std::runtime_error(err_msg + '.'); + } + } } /** @file * @brief Defines functionality needed for basic CSV parsing @@ -5279,6 +5344,262 @@ namespace csv { } #pragma endregion CSVRow Iterator } + +namespace csv { + /* + The implementations for json_extra_space() and json_escape_string() + were modified from source code for JSON for Modern C++. + + The respective license is below: + + The code is licensed under the [MIT + License](http://opensource.org/licenses/MIT): + + Copyright © 2013-2015 Niels Lohmann. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation files + (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + */ + + namespace internals { + /*! + @brief calculates the extra space to escape a JSON string + + @param[in] s the string to escape + @return the number of characters required to escape string @a s + + @complexity Linear in the length of string @a s. + */ + static std::size_t json_extra_space(csv::string_view& s) noexcept + { + std::size_t result = 0; + + + for (const auto& c : s) + { + switch (c) + { + case '"': + case '\\': + case '\b': + case '\f': + case '\n': + case '\r': + case '\t': + { + // from c (1 byte) to \x (2 bytes) + result += 1; + break; + } + + + default: + { + if (c >= 0x00 && c <= 0x1f) + { + // from c (1 byte) to \uxxxx (6 bytes) + result += 5; + } + break; + } + } + } + + + return result; + } + + std::string json_escape_string(csv::string_view s) noexcept + { + const auto space = json_extra_space(s); + if (space == 0) + { + return std::string(s); + } + + // create a result string of necessary size + std::string result(s.size() + space, '\\'); + std::size_t pos = 0; + + for (const auto& c : s) + { + switch (c) + { + // quotation mark (0x22) + case '"': + { + result[pos + 1] = '"'; + pos += 2; + break; + } + + + // reverse solidus (0x5c) + case '\\': + { + // nothing to change + pos += 2; + break; + } + + + // backspace (0x08) + case '\b': + { + result[pos + 1] = 'b'; + pos += 2; + break; + } + + + // formfeed (0x0c) + case '\f': + { + result[pos + 1] = 'f'; + pos += 2; + break; + } + + + // newline (0x0a) + case '\n': + { + result[pos + 1] = 'n'; + pos += 2; + break; + } + + + // carriage return (0x0d) + case '\r': + { + result[pos + 1] = 'r'; + pos += 2; + break; + } + + + // horizontal tab (0x09) + case '\t': + { + result[pos + 1] = 't'; + pos += 2; + break; + } + + + default: + { + if (c >= 0x00 && c <= 0x1f) + { + // print character c as \uxxxx + sprintf(&result[pos + 1], "u%04x", int(c)); + pos += 6; + // overwrite trailing null character + result[pos] = '\\'; + } + else + { + // all other characters are added as-is + result[pos++] = c; + } + break; + } + } + } + + return result; + } + } + + /** Convert a CSV row to a JSON object, i.e. + * `{"col1":"value1","col2":"value2"}` + * + * @note All strings are properly escaped. Numeric values are not quoted. + * @param[in] subset A subset of columns to contain in the JSON. + * Leave empty for original columns. + */ + std::string CSVRow::to_json(const std::vector& subset) const { + std::vector col_names = subset; + if (subset.empty()) { + col_names = this->buffer->col_names->get_col_names(); + } + + const size_t n_cols = col_names.size(); + std::string ret = "{"; + + for (size_t i = 0; i < n_cols; i++) { + auto& col = col_names[i]; + auto field = this->operator[](col); + + // TODO: Possible performance enhancements by caching escaped column names + ret += '"' + internals::json_escape_string(col) + "\":"; + + // Add quotes around strings but not numbers + if (field.is_num()) + ret += internals::json_escape_string(field.get()); + else + ret += '"' + internals::json_escape_string(field.get()) + '"'; + + // Do not add comma after last string + if (i + 1 < n_cols) + ret += ','; + } + + ret += '}'; + return ret; + } + + /** Convert a CSV row to a JSON array, i.e. + * `["value1","value2",...]` + * + * @note All strings are properly escaped. Numeric values are not quoted. + * @param[in] subset A subset of columns to contain in the JSON. + * Leave empty for all columns. + */ + std::string CSVRow::to_json_array(const std::vector& subset) const { + std::vector col_names = subset; + if (subset.empty()) + col_names = this->buffer->col_names->get_col_names(); + + const size_t n_cols = col_names.size(); + std::string ret = "["; + + for (size_t i = 0; i < n_cols; i++) { + auto field = this->operator[](col_names[i]); + + // Add quotes around strings but not numbers + if (field.is_num()) + ret += internals::json_escape_string(field.get()); + else + ret += '"' + internals::json_escape_string(field.get()) + '"'; + + // Do not add comma after last string + if (i + 1 < n_cols) + ret += ','; + } + + ret += ']'; + return ret; + } +} /** @file * Calculates statistics from CSV files */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5cd8189a..8516fd60 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,31 +1,33 @@ add_executable(csv_test "") target_sources(csv_test - PRIVATE - ${CSV_INCLUDE_DIR}/csv.hpp - catch.hpp - main.cpp - test_csv_buffer.cpp - test_csv_field.cpp - test_csv_iterator.cpp - test_csv_row.cpp - test_csv_stat.cpp - test_read_csv.cpp - test_write_csv.cpp - test_data_type.cpp + PRIVATE + ${CSV_INCLUDE_DIR}/csv.hpp + catch.hpp + main.cpp + test_csv_buffer.cpp + test_csv_field.cpp + test_csv_format.cpp + test_csv_iterator.cpp + test_csv_row.cpp + test_csv_row_json.cpp + test_csv_stat.cpp + test_read_csv.cpp + test_write_csv.cpp + test_data_type.cpp ) target_link_libraries(csv_test csv) if(MSVC) - # Workaround to enable debugging unit tests in Visual Studio - add_custom_command( - TARGET csv_test POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_directory - ${CSV_TEST_DIR}/data $/tests/data - ) + # Workaround to enable debugging unit tests in Visual Studio + add_custom_command( + TARGET csv_test POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_directory + ${CSV_TEST_DIR}/data $/tests/data + ) endif() add_test( - NAME test - COMMAND csv_test - WORKING_DIRECTORY ${CSV_ROOT_DIR} + NAME test + COMMAND csv_test + WORKING_DIRECTORY ${CSV_ROOT_DIR} ) \ No newline at end of file diff --git a/tests/test_csv_format.cpp b/tests/test_csv_format.cpp new file mode 100644 index 00000000..5be90857 --- /dev/null +++ b/tests/test_csv_format.cpp @@ -0,0 +1,61 @@ +#include "catch.hpp" +#include "csv.hpp" +using namespace csv; + +static std::string err_preamble = "There should be no overlap between " + "the quote character, the set of possible " + "delimiters and the set of whitespace characters."; + +// Assert that an error is thrown if whitespace, delimiter, and quote +TEST_CASE("CSVFormat - Overlapping Characters", "[csv_format_overlap]") { + CSVFormat format; + bool err_caught = false; + + SECTION("Tab") { + try { + format.delimiter('\t').quote('"').trim({ '\t' }); + } + catch (std::runtime_error& err) { + err_caught = true; + REQUIRE(err.what() == std::string(err_preamble + " Offending characters: '\t'.")); + } + + REQUIRE(err_caught); + } + + SECTION("Tab with multiple other characters") { + try { + format.delimiter({ ',', '\t' }).quote('"').trim({ ' ', '\t' }); + } + catch (std::runtime_error& err) { + err_caught = true; + REQUIRE(err.what() == std::string(err_preamble + " Offending characters: '\t'.")); + } + + REQUIRE(err_caught); + } + + SECTION("Repeated quote") { + try { + format.delimiter({ ',', '"' }).quote('"').trim({ ' ', '\t' }); + } + catch (std::runtime_error& err) { + err_caught = true; + REQUIRE(err.what() == std::string(err_preamble + " Offending characters: '\"'.")); + } + + REQUIRE(err_caught); + } + + SECTION("Multiple offenders") { + try { + format.delimiter({ ',', '\t', ' ' }).quote('"').trim({ ' ', '\t' }); + } + catch (std::runtime_error& err) { + err_caught = true; + REQUIRE(err.what() == std::string(err_preamble + " Offending characters: '\t', ' '.")); + } + + REQUIRE(err_caught); + } +} \ No newline at end of file diff --git a/tests/test_csv_row_json.cpp b/tests/test_csv_row_json.cpp new file mode 100644 index 00000000..f94c4bf3 --- /dev/null +++ b/tests/test_csv_row_json.cpp @@ -0,0 +1,90 @@ +#include "catch.hpp" +#include "csv.hpp" +using namespace csv; + +/** Construct a CSVRow object for testing given column names and CSV fields */ +CSVRow make_csv_row(std::vector data, std::vector col_names) { + // Concatenate vector or strings into one large string + std::string concat; + std::vector splits = {}; + + for (auto& field : data) { + concat += field; + splits.push_back(concat.size()); + } + + return CSVRow(concat, splits, std::make_shared(col_names)); +} + +TEST_CASE("json_escape_string() Test", "[json_escape_string]") { + using internals::json_escape_string; + + // Assert that special characters are escaped properly + REQUIRE(json_escape_string("Quote\"Quote") == "Quote\\\"Quote"); + REQUIRE(json_escape_string("RSolidus\\RSolidus") + == "RSolidus\\\\RSolidus"); + REQUIRE(json_escape_string("Backspace\bBackspace") + == "Backspace\\bBackspace"); + REQUIRE(json_escape_string("Formfeed\fFormfeed") + == "Formfeed\\fFormfeed"); + REQUIRE(json_escape_string("Newline\nNewline") + == "Newline\\nNewline"); + REQUIRE(json_escape_string("CarriageReturn\rCarriageReturn") + == "CarriageReturn\\rCarriageReturn"); + REQUIRE(json_escape_string("Tab\tTab") + == "Tab\\tTab"); + + // Assert that control characters are escaped properly + REQUIRE(json_escape_string("Null\0Null") + == "Null\u0000Null"); +} + +TEST_CASE("CSVRow to_json() Test", "[csv_row_to_json]") { + CSVRow row = make_csv_row( + { "Col 1", "Col 2" }, // Fields + { "A", "B" } // Column names + ); + + REQUIRE(row.to_json() == "{\"A\":\"Col 1\",\"B\":\"Col 2\"}"); +} + +TEST_CASE("CSVRow to_json() Test with Numbers", "[csv_numeric_row_to_json]") { + CSVRow row = make_csv_row( + { "1234.3", "234" }, // Fields + { "A", "B"} // Column names + ); + + REQUIRE(row.to_json() == "{\"A\":1234.3,\"B\":234}"); +} + +TEST_CASE("CSVRow to_json() Test - Mixed", "[csv_mixed_row_to_json]") { + CSVRow row = make_csv_row( + { "1234.3", "234", "ABCD", "AB1", "1337" }, // Fields + { "A", "B", "C", "D", "E" } // Column names + ); + + SECTION("Full Row") { + REQUIRE(row.to_json() == "{\"A\":1234.3,\"B\":234,\"C\":\"ABCD\",\"D\":\"AB1\",\"E\":1337}"); + } + + SECTION("Subset") { + REQUIRE(row.to_json({ "B", "C" }) == "{\"B\":234,\"C\":\"ABCD\"}"); + REQUIRE(row.to_json({ "B", "A" }) == "{\"B\":234,\"A\":1234.3}"); + } +} + +TEST_CASE("CSVRow to_json_array() Test() - Mixed", "[csv_mixed_row_to_json_array]") { + CSVRow row = make_csv_row( + { "1234.3", "234", "ABCD", "AB1", "1337" }, // Fields + { "A", "B", "C", "D", "E" } // Column names + ); + + SECTION("Full Row") { + REQUIRE(row.to_json_array() == "[1234.3,234,\"ABCD\",\"AB1\",1337]"); + } + + SECTION("Subset") { + REQUIRE(row.to_json_array({ "B", "C" }) == "[234,\"ABCD\"]"); + REQUIRE(row.to_json_array({ "B", "A" }) == "[234,1234.3]"); + } +} \ No newline at end of file