Skip to content

Commit

Permalink
Fix merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
vincentlaucsb committed Sep 27, 2020
1 parent 15b70e1 commit 645de37
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 373 deletions.
4 changes: 0 additions & 4 deletions include/internal/col_names.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ namespace csv {
return CSV_NOT_FOUND;
}

<<<<<<< HEAD
CSV_INLINE size_t ColNames::size() const noexcept {
=======
CSV_INLINE size_t ColNames::size() const {
>>>>>>> master
return this->col_names.size();
}

Expand Down
155 changes: 0 additions & 155 deletions include/internal/raw_csv_data.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "raw_csv_data.hpp"

namespace csv {
<<<<<<< HEAD
namespace internals {
CSV_INLINE void BasicCSVParser::parse(csv::string_view in) {
this->set_data_ptr(std::make_shared<RawCSVData>());
Expand Down Expand Up @@ -128,160 +127,6 @@ namespace csv {

break;
}
=======
CSV_INLINE void BasicCSVParser::parse(csv::string_view in, std::deque<CSVRow>& records) {
using internals::ParseFlags;

this->set_data_ptr(std::make_shared<RawCSVData>());
this->data_ptr->col_names = this->col_names;
this->_records = &records;

// Check for previous fragments
if (this->current_row.data && this->current_row.size() > 0 || this->field_length > 0) {
// Make a separate data buffer for the fragment row
auto temp_str = this->current_row.data->data.substr(this->current_row.data_start);

this->current_row.data = this->data_ptr;
this->current_row.data_start = 0;
this->current_row.row_length = 0;
this->current_row.field_bounds_index = 0;

this->field_start = -1;
this->field_length = 0;

auto& fragment_data = this->current_row.data;
fragment_data->data.reserve(temp_str.size() + in.size());
fragment_data->data = temp_str;
fragment_data->data += in;

in = csv::string_view(fragment_data->data);
}
else {
this->data_ptr->data.assign(in.data(), in.size());
this->current_row = CSVRow(this->data_ptr);
}

this->parse_loop(in);
}

CSV_INLINE void BasicCSVParser::push_field()
{
// Push field
this->fields->push_back({
this->field_start > 0 ? (unsigned int)this->field_start : 0,
this->field_length
});
this->current_row.row_length++;

if (this->field_has_double_quote) {
this->current_row.data->has_double_quotes.insert(this->data_ptr->fields.size() - 1);
this->field_has_double_quote = false;
}

// Reset field state
this->field_start = -1;
this->field_length = 0;
}

CONSTEXPR void BasicCSVParser::parse_field(csv::string_view in, size_t& i, const size_t& current_row_start, bool quote_escape) {
using internals::ParseFlags;

// Trim off leading whitespace
while (i < in.size() && ws_flag(in[i])) i++;

if (this->field_start < 0) {
this->field_start = (int)(i - current_row_start);
}

// Optimization: Since NOT_SPECIAL characters tend to occur in contiguous
// sequences, use the loop below to avoid having to go through the outer
// switch statement as much as possible
if (quote_escape) {
while (i < in.size() && parse_flag(in[i]) != ParseFlags::QUOTE) i++;
}
else {
while (i < in.size() && parse_flag(in[i]) == ParseFlags::NOT_SPECIAL) i++;
}

this->field_length = i - (this->field_start + current_row_start);

// Trim off trailing whitespace, this->field_length constraint matters
// when field is entirely whitespace
for (size_t j = i - 1; ws_flag(in[j]) && this->field_length > 0; j--) this->field_length--;
}

CSV_INLINE void BasicCSVParser::parse_loop(csv::string_view in)
{
using internals::ParseFlags;

// Parser state
size_t current_row_start = 0;
bool quote_escape = false;

size_t in_size = in.size();
for (size_t i = 0; i < in_size; ) {
if (quote_escape) {
// TODO: Clean up these conditions
if (parse_flag(in[i]) == ParseFlags::QUOTE) {
if (i + 1 == in.size() || (i + 1 < in.size() && parse_flag(in[i + 1]) >= ParseFlags::DELIMITER)) {
quote_escape = false;
i++;
continue;
}

// Case: Escaped quote
this->field_length++;
i++;

if (i < in.size() && parse_flag(in[i]) == ParseFlags::QUOTE) {
i++;
this->field_length++;
this->field_has_double_quote = true;
}

continue;
}

this->parse_field(in, i, current_row_start, quote_escape);
}
else {
switch (parse_flag(in[i])) {
case ParseFlags::DELIMITER:
this->push_field();
i++;
break;

case ParseFlags::NEWLINE:
i++;

// Catches CRLF (or LFLF)
if (i < in.size() && parse_flag(in[i]) == ParseFlags::NEWLINE) i++;

// End of record -> Write record
this->push_field();
this->push_row(*this->_records);
this->current_row = CSVRow(this->data_ptr);
this->current_row.data_start = i;
this->current_row.field_bounds_index = this->data_ptr->fields.size();
current_row_start = i;
break;

case ParseFlags::NOT_SPECIAL:
this->parse_field(in, i, current_row_start, quote_escape);
break;
default: // Quote
if (this->field_length == 0) {
quote_escape = true;
i++;
break;
}

// Unescaped quote
this->field_length++;
i++;

break;
>>>>>>> master
}
}
}
Expand Down
103 changes: 0 additions & 103 deletions include/internal/raw_csv_data.hpp
Original file line number Diff line number Diff line change
@@ -1,29 +1,20 @@
#pragma once
#include <array>
<<<<<<< HEAD
#include <condition_variable>
#include <deque>
#include <memory>
#include <mutex>
=======
#include <deque>
#include <memory>
>>>>>>> master
#include <unordered_map>
#include <unordered_set>
#include <vector>

<<<<<<< HEAD
#include "../external/mio.hpp"
=======
>>>>>>> master
#include "col_names.hpp"
#include "compatibility.hpp"
#include "csv_row.hpp"

namespace csv {
namespace internals {
<<<<<<< HEAD
/** A std::deque wrapper which allows multiple read and write threads to concurrently
* access it along with providing read threads the ability to wait for the deque
* to become populated
Expand Down Expand Up @@ -233,98 +224,4 @@ namespace csv {
}
};
}
=======
/** @typedef ParseFlags
* An enum used for describing the significance of each character
* with respect to CSV parsing
*/
enum class ParseFlags {
NOT_SPECIAL, /**< Characters with no special meaning */
QUOTE, /**< Characters which may signify a quote escape */
DELIMITER, /**< Characters which may signify a new field */
NEWLINE /**< Characters which may signify a new row */
};

using ParseFlagMap = std::array<ParseFlags, 256>;
using WhitespaceMap = std::array<bool, 256>;
}

/** A class for parsing raw CSV data */
class BasicCSVParser {
public:
BasicCSVParser() = default;
BasicCSVParser(internals::ColNamesPtr _col_names) : col_names(_col_names) {};
BasicCSVParser(internals::ParseFlagMap parse_flags, internals::WhitespaceMap ws_flags) :
_parse_flags(parse_flags), _ws_flags(ws_flags) {};

void parse(csv::string_view in, std::deque<CSVRow>& records);
void end_feed(std::deque<CSVRow>& records) {
using internals::ParseFlags;

bool empty_last_field = this->current_row.data
&& !this->current_row.data->data.empty()
&& parse_flag(this->current_row.data->data.back()) == ParseFlags::DELIMITER;

if (this->field_length > 0 || empty_last_field) {
this->push_field();
}

if (this->current_row.size() > 0) {
this->push_row(records);
}
}

void set_parse_flags(internals::ParseFlagMap parse_flags) {
_parse_flags = parse_flags;
}

void set_ws_flags(internals::WhitespaceMap ws_flags) {
_ws_flags = ws_flags;
}

private:
CONSTEXPR internals::ParseFlags parse_flag(const char ch) const {
return _parse_flags.data()[ch + 128];
}

CONSTEXPR bool ws_flag(const char ch) const {
return _ws_flags.data()[ch + 128];
}

void push_field();
CONSTEXPR void parse_field(csv::string_view in, size_t& i, const size_t& current_row_start, bool quote_escape = false);

void parse_loop(csv::string_view in);

void push_row(std::deque<CSVRow>& records) {
current_row.row_length = current_row.data->fields.size() - current_row.field_bounds_index;
records.push_back(std::move(current_row));
};

void set_data_ptr(RawCSVDataPtr ptr) {
this->data_ptr = ptr;
this->fields = &(ptr->fields);
}

/** An array where the (i + 128)th slot gives the ParseFlags for ASCII character i */
internals::ParseFlagMap _parse_flags;

/** An array where the (i + 128)th slot determines whether ASCII character i should
* be trimmed
*/
internals::WhitespaceMap _ws_flags;

internals::ColNamesPtr col_names = nullptr;

CSVRow current_row;
int field_start = -1;
size_t field_length = 0;
bool field_has_double_quote = false;

RawCSVDataPtr data_ptr = nullptr;
internals::CSVFieldArray* fields = nullptr;

std::deque<CSVRow>* _records = nullptr;
};
>>>>>>> master
}
Loading

0 comments on commit 645de37

Please sign in to comment.