Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

S3select: replacing a "naked loop" with std::copy_n #42

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions example/generate_rand_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,11 @@ int main(int argc, char** argv)
{
if (argc<3)
{
printf("%s <num-of-rows> <num-of-columns> \n", argv[0]);
printf("%s <num-of-rows> <num-of-columns>\n", argv[0]);
return -1;
}

srand(1234);
int line_no=0;
for(int i=0; i<atoi(argv[1]); i++)
{
printf("%d,", i);
Expand Down
80 changes: 37 additions & 43 deletions include/s3select.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <iostream>
#include <string>
#include <list>
#include <vector>
#include "s3select_oper.h"
#include "s3select_functions.h"
#include "s3select_csv_parser.h"
Expand All @@ -30,17 +31,17 @@ class s3select_projections
std::vector<base_statement*> m_projections;

public:
bool is_aggregate()
bool is_aggregate() const
{
//TODO iterate on projections , and search for aggregate
//for(auto p : m_projections){}

return false;
}

bool semantic()
bool semantic() const
{
//TODO check aggragtion function are not nested
//TODO check aggragation function are not nested
return false;
}

Expand All @@ -56,8 +57,9 @@ static s3select_reserved_word g_s3select_reserve_word;//read-only
struct actionQ
{
// upon parser is accepting a token (lets say some number),
// it push it into dedicated queue, later those tokens are poped out to build some "higher" contruct (lets say 1 + 2)
// those containers are used only for parsing phase and not for runtime.
// it pushes it into a dedicated queue. These tokens are later popped out to
// build some "higher" construct (1 + 2 for example).
// The containers below are only used during the parsing phase, not for runtime.

std::vector<mulldiv_operation::muldiv_t> muldivQ;
std::vector<addsub_operation::addsub_op_t> addsubQ;
Expand All @@ -71,7 +73,6 @@ struct actionQ
std::vector<std::string> trimTypeQ;
projection_alias alias_map;
std::string from_clause;
std::vector<std::string> schema_columns;
s3select_projections projections;

uint64_t in_set_count;
Expand Down Expand Up @@ -385,11 +386,11 @@ struct s3select : public bsc::grammar<s3select>
{
for (const auto& e : get_projections_list())
{
base_statement* aggr;
const base_statement* aggr;

if ((aggr = e->get_aggregate()) != nullptr)
{
if (aggr->is_nested_aggregate(aggr))
if (aggr->is_nested_aggregate())
{
error_description = "nested aggregation function is illegal i.e. sum(...sum ...)";
throw base_s3select_exception(error_description, base_s3select_exception::s3select_exp_en_t::FATAL);
Expand All @@ -399,7 +400,7 @@ struct s3select : public bsc::grammar<s3select>
}
}

if (aggr_flow == true)
if (aggr_flow)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

== true/false is style issue

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

'== true' is a bit over the line dividing 'style' and 'irregular and strange'...

And, BTW - the 'if' statement semantics is 'if the expression in parens evaluates to 'true' then do ...'.
No new semantic meaning is conveyed by the '== true'. It just adds to the reader's burden.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

indeed.
there is nothing logical or practical about /x == true/
i probably wanted to remind myself its a boolean and not some number
will change that.

this style appears in our code-base.
[ find ./src -name '.cc' -o -name '.h' | xargs egrep '== false|== true' | grep -v s3select | wc ] ;

for (const auto& e : get_projections_list())
{
auto skip_expr = e->get_aggregate();
Expand All @@ -416,7 +417,7 @@ struct s3select : public bsc::grammar<s3select>

int parse_query(const char* input_query)
{
if(get_projections_list().empty() == false)
if(!get_projections_list().empty())
{
return 0; //already parsed
}
Expand Down Expand Up @@ -762,13 +763,11 @@ void push_mulop::builder(s3select* self, const char* a, const char* b) const
}
}

void push_addsub_binop::builder(s3select* self, [[maybe_unused]] const char* a,[[maybe_unused]] const char* b) const
void push_addsub_binop::builder(s3select* self, [[maybe_unused]] const char* a, [[maybe_unused]] const char* b) const
{
base_statement* l = nullptr, *r = nullptr;

r = self->getAction()->exprQ.back();
base_statement* r = self->getAction()->exprQ.back();
self->getAction()->exprQ.pop_back();
l = self->getAction()->exprQ.back();
base_statement* l = self->getAction()->exprQ.back();
self->getAction()->exprQ.pop_back();
addsub_operation::addsub_op_t o = self->getAction()->addsubQ.back();
self->getAction()->addsubQ.pop_back();
Expand All @@ -778,11 +777,9 @@ void push_addsub_binop::builder(s3select* self, [[maybe_unused]] const char* a,[

void push_mulldiv_binop::builder(s3select* self, [[maybe_unused]] const char* a, [[maybe_unused]] const char* b) const
{
base_statement* vl = nullptr, *vr = nullptr;

vr = self->getAction()->exprQ.back();
base_statement* vr = self->getAction()->exprQ.back();
self->getAction()->exprQ.pop_back();
vl = self->getAction()->exprQ.back();
base_statement* vl = self->getAction()->exprQ.back();
self->getAction()->exprQ.pop_back();
mulldiv_operation::muldiv_t o = self->getAction()->muldivQ.back();
self->getAction()->muldivQ.pop_back();
Expand Down Expand Up @@ -832,7 +829,7 @@ void push_function_expr::builder(s3select* self, const char* a, const char* b) c
void push_compare_operator::builder(s3select* self, const char* a, const char* b) const
{
std::string token(a, b);
arithmetic_operand::cmp_t c = arithmetic_operand::cmp_t::NA;
arithmetic_operand::cmp_t c;

if (token == "==")
{
Expand Down Expand Up @@ -865,7 +862,7 @@ void push_compare_operator::builder(s3select* self, const char* a, const char* b
void push_logical_operator::builder(s3select* self, const char* a, const char* b) const
{
std::string token(a, b);
logical_operand::oplog_t l = logical_operand::oplog_t::NA;
logical_operand::oplog_t l;

if (token == "and")
{
Expand Down Expand Up @@ -899,17 +896,18 @@ void push_arithmetic_predicate::builder(s3select* self, const char* a, const cha
void push_logical_predicate::builder(s3select* self, const char* a, const char* b) const
{
std::string token(a, b);
base_statement* tl = nullptr;
base_statement* tr = nullptr;

base_statement* tl = nullptr, *tr = nullptr;
logical_operand::oplog_t oplog = self->getAction()->logical_compareQ.back();
self->getAction()->logical_compareQ.pop_back();

if (self->getAction()->condQ.empty() == false)
if (!self->getAction()->condQ.empty())
{
tr = self->getAction()->condQ.back();
self->getAction()->condQ.pop_back();
}
if (self->getAction()->condQ.empty() == false)
if (!self->getAction()->condQ.empty())
{
tl = self->getAction()->condQ.back();
self->getAction()->condQ.pop_back();
Expand All @@ -923,9 +921,9 @@ void push_logical_predicate::builder(s3select* self, const char* a, const char*
void push_negation::builder(s3select* self, const char* a, const char* b) const
{
std::string token(a, b);
base_statement* pred = nullptr;
base_statement* pred{nullptr};

if (self->getAction()->condQ.empty() == false)
if (!self->getAction()->condQ.empty())
{
pred = self->getAction()->condQ.back();
self->getAction()->condQ.pop_back();
Expand Down Expand Up @@ -985,7 +983,7 @@ void push_alias_projection::builder(s3select* self, const char* a, const char* b

//mapping alias name to base-statement
bool res = self->getAction()->alias_map.insert_new_entry(alias_name, bs);
if (res == false)
if (!res)
{
throw base_s3select_exception(std::string("alias <") + alias_name + std::string("> is already been used in query"), base_s3select_exception::s3select_exp_en_t::FATAL);
}
Expand Down Expand Up @@ -1181,16 +1179,16 @@ void push_data_type::builder(s3select* self, const char* a, const char* b) const

if(cast_operator("int"))
{
self->getAction()->dataTypeQ.push_back("int");
self->getAction()->dataTypeQ.emplace_back("int");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, it performs better.
should note, those functions (AST builder) used for building the AST and are not intensive.

}else if(cast_operator("float"))
{
self->getAction()->dataTypeQ.push_back("float");
self->getAction()->dataTypeQ.emplace_back("float");
}else if(cast_operator("string"))
{
self->getAction()->dataTypeQ.push_back("string");
self->getAction()->dataTypeQ.emplace_back("string");
}else if(cast_operator("timestamp"))
{
self->getAction()->dataTypeQ.push_back("timestamp");
self->getAction()->dataTypeQ.emplace_back("timestamp");
}
}

Expand Down Expand Up @@ -1311,7 +1309,6 @@ class base_s3object

protected:
scratch_area* m_sa;
std::string m_obj_name;

public:
explicit base_s3object(scratch_area* m) : m_sa(m){}
Expand Down Expand Up @@ -1468,10 +1465,10 @@ class csv_object : public base_s3object

int getMatchRow( std::string& result) //TODO virtual ? getResult
{
int number_of_tokens = 0;
int number_of_tokens;


if (m_aggr_flow == true)
if (m_aggr_flow)
{
do
{
Expand Down Expand Up @@ -1503,7 +1500,7 @@ class csv_object : public base_s3object
}

if (!m_where_clause || m_where_clause->eval().i64() == true)
for (auto i : m_projections)
for (auto& i : m_projections)
{
i->eval();
}
Expand Down Expand Up @@ -1546,15 +1543,15 @@ class csv_object : public base_s3object
int extract_csv_header_info()
{

if (m_csv_defintion.ignore_header_info == true)
if (m_csv_defintion.ignore_header_info)
{
while(*m_stream && (*m_stream != m_csv_defintion.row_delimiter ))
{
m_stream++;
}
m_stream++;
}
else if(m_csv_defintion.use_header_info == true)
else if(m_csv_defintion.use_header_info)
{
size_t num_of_tokens = getNextRow();//TODO validate number of tokens

Expand Down Expand Up @@ -1632,11 +1629,10 @@ class csv_object : public base_s3object
p_obj_chunk--; //scan until end-of previous line in chunk
}

u_int32_t skip_last_bytes = (&(csv_stream[stream_length - 1]) - p_obj_chunk);
int32_t skip_last_bytes = (&(csv_stream[stream_length - 1]) - p_obj_chunk);
m_last_line.assign(p_obj_chunk + 1, p_obj_chunk + 1 + skip_last_bytes); //save it for next chunk

m_previous_line = true;//it means to skip last line

}

return run_s3select_on_object(result, csv_stream, stream_length, m_skip_first_line, m_previous_line, (m_processed_bytes >= obj_size));
Expand All @@ -1646,14 +1642,12 @@ class csv_object : public base_s3object
public:
int run_s3select_on_object(std::string& result, const char* csv_stream, size_t stream_length, bool skip_first_line, bool skip_last_line, bool do_aggregate)
{


m_stream = (char*)csv_stream;
m_end_stream = (char*)csv_stream + stream_length;
m_is_to_aggregate = do_aggregate;
m_skip_last_line = skip_last_line;

if(m_extract_csv_header_info == false)
if(!m_extract_csv_header_info)
{
extract_csv_header_info();
}
Expand Down Expand Up @@ -1698,6 +1692,6 @@ class csv_object : public base_s3object
}
};

};//namespace
} //namespace

#endif
Loading