diff --git a/test/queries_generator/aws_queries.txt b/test/queries_generator/aws_queries.txt new file mode 100644 index 00000000..c3249158 --- /dev/null +++ b/test/queries_generator/aws_queries.txt @@ -0,0 +1,10 @@ +select lower(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object; +select to_timestamp('1999-11-04T20:27:03.479340Z') from s3object; +select date_add(day, 8, to_timestamp('1982-02-03T00:20:12.42831Z')) from s3object; +select substring(upper(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## '), cast((avg(cast(_3 as int)-cast(_2 as int)) - 3) as int), min(cast(_1 as int)-cast(_1 as int)) + 7) from s3object; +select 10-cast(_2 as int) from s3object; +select character_length(to_string(to_timestamp('1963-06-22T05:35:39.954350Z'), 'SSSSS HH ')) from s3object; +select cast(_2 as int)-2 from s3object; +select char_length(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from s3object; +select to_string(date_add(second, 5, to_timestamp('1938-09-24T10:57:42.142042Z')), 'hh ') from s3object; +select to_timestamp('1931-12-23T02:54:25.451925Z') from s3object; diff --git a/test/queries_generator/aws_results/output1.csv b/test/queries_generator/aws_results/output1.csv new file mode 100644 index 00000000..2da8a1fc --- /dev/null +++ b/test/queries_generator/aws_results/output1.csv @@ -0,0 +1,10 @@ + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## + %%abcdefghijklmnopqrstuvwxyz## diff --git a/test/queries_generator/aws_results/output10.csv b/test/queries_generator/aws_results/output10.csv new file mode 100644 index 00000000..b0a6a8a9 --- /dev/null +++ b/test/queries_generator/aws_results/output10.csv @@ -0,0 +1,10 @@ +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z +1931-12-23T02:54:25.451925Z diff --git a/test/queries_generator/aws_results/output2.csv b/test/queries_generator/aws_results/output2.csv new file mode 100644 index 00000000..ceb48596 --- /dev/null +++ b/test/queries_generator/aws_results/output2.csv @@ -0,0 +1,10 @@ +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z +1999-11-04T20:27:03.479340Z diff --git a/test/queries_generator/aws_results/output3.csv b/test/queries_generator/aws_results/output3.csv new file mode 100644 index 00000000..ce1a7527 --- /dev/null +++ b/test/queries_generator/aws_results/output3.csv @@ -0,0 +1,10 @@ +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z +1982-02-11T00:20:12.42831Z diff --git a/test/queries_generator/aws_results/output4.csv b/test/queries_generator/aws_results/output4.csv new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/test/queries_generator/aws_results/output4.csv @@ -0,0 +1 @@ + diff --git a/test/queries_generator/aws_results/output5.csv b/test/queries_generator/aws_results/output5.csv new file mode 100644 index 00000000..72c01e00 --- /dev/null +++ b/test/queries_generator/aws_results/output5.csv @@ -0,0 +1,10 @@ +-42916 +-21159 +-35571 +-38378 +-42792 +-45572 +-8538 +-22623 +-38429 +-6601 diff --git a/test/queries_generator/aws_results/output6.csv b/test/queries_generator/aws_results/output6.csv new file mode 100644 index 00000000..f9e7242e --- /dev/null +++ b/test/queries_generator/aws_results/output6.csv @@ -0,0 +1,10 @@ +9 +9 +9 +9 +9 +9 +9 +9 +9 +9 diff --git a/test/queries_generator/aws_results/output7.csv b/test/queries_generator/aws_results/output7.csv new file mode 100644 index 00000000..738f6b83 --- /dev/null +++ b/test/queries_generator/aws_results/output7.csv @@ -0,0 +1,10 @@ +42924 +21167 +35579 +38386 +42800 +45580 +8546 +22631 +38437 +6609 diff --git a/test/queries_generator/aws_results/output8.csv b/test/queries_generator/aws_results/output8.csv new file mode 100644 index 00000000..6d3ca06f --- /dev/null +++ b/test/queries_generator/aws_results/output8.csv @@ -0,0 +1,10 @@ +34 +34 +34 +34 +34 +34 +34 +34 +34 +34 diff --git a/test/queries_generator/aws_results/output9.csv b/test/queries_generator/aws_results/output9.csv new file mode 100644 index 00000000..786a90cc --- /dev/null +++ b/test/queries_generator/aws_results/output9.csv @@ -0,0 +1,10 @@ +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 diff --git a/test/queries_generator/generate_aws_cmds.cpp b/test/queries_generator/generate_aws_cmds.cpp new file mode 100644 index 00000000..b2cb69d0 --- /dev/null +++ b/test/queries_generator/generate_aws_cmds.cpp @@ -0,0 +1,26 @@ +#include +#include + +using namespace std; + +int main() +{ + fstream query_file, cmd_file; + query_file.open("aws_queries.txt", ios::in); + cmd_file.open("aws_cmds.sh", ios::out); + cmd_file << "#!/bin/sh\nset -x\nset -e\n\n"; + cmd_file << "mkdir -p aws_results\n"; + string bucket, csv_file, query, aws_cmd; + cout << "Enter bucket name: "; + cin >> bucket; + cout << "Enter file name: "; + cin >> csv_file; + for(int i = 1; getline(query_file, query); i++) + { + aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\""; + cmd_file << aws_cmd << endl; + } + cmd_file.close(); + query_file.close(); + return 0; +} diff --git a/test/queries_generator/queries.txt b/test/queries_generator/queries.txt new file mode 100644 index 00000000..123b124a --- /dev/null +++ b/test/queries_generator/queries.txt @@ -0,0 +1,10 @@ +select lower(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from stdin; +select to_timestamp('1999-11-04T20:27:03.479340Z') from stdin; +select date_add(day, int(8), to_timestamp('1982-02-03T00:20:12.42831Z')) from stdin; +select substring(upper(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## '), int(avg(int(_3)-int(_2)) - int(3)), min(int(_1)-int(_1)) + int(7)) from stdin; +select int(10)-int(_2) from stdin; +select character_length(to_string(to_timestamp('1963-06-22T05:35:39.954350Z'), 'SSSSS HH ')) from stdin; +select int(_2)-int(2) from stdin; +select char_length(lower(' %%AbCdEfGhIjKlMnOpQrStUvWxYz## ')) from stdin; +select to_string(date_add(second, int(5), to_timestamp('1938-09-24T10:57:42.142042Z')), 'hh ') from stdin; +select to_timestamp('1931-12-23T02:54:25.451925Z') from stdin; diff --git a/test/queries_generator/queries_generator.cpp b/test/queries_generator/queries_generator.cpp new file mode 100644 index 00000000..0e57ff24 --- /dev/null +++ b/test/queries_generator/queries_generator.cpp @@ -0,0 +1,325 @@ +#include +#include +#include +#include +#define NUM_COLUMN 3 + +using namespace std; + +enum Return_type { INTEGER = 0, + STRING = 1, + TIMESTAMP = 2, + MIX_COL_NUM = 3, + COLUMN = 4, + NUMBER = 5}; + +auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];}; + +auto random_compare_op = []() +{vector op={">", "<", ">=", "<=", "==", "!="}; + return op[ rand() % op.size() ]; +}; + +auto random_date_part = []() +{vector op={"year", "month", "day", "hour", "minute", "second"}; + return op[ rand() % op.size() ]; +}; + +/*auto random_date_part_extract = []() +{vector op={"year", "month", "day", "hour", "minute", "second", + "timezone_hour", "timezone_minute"}; + return op[ rand() % op.size() ]; +};*/ + +string random_timestamp_string(string& aws_expr) +{ + auto year = [](){return rand()%100 + 1900;}; + auto month = [](){return 1 + rand()%12;}; + auto day = [](){return 1 + rand()%28;}; + auto hours = [](){return rand()%24;}; + auto minutes = [](){return rand()%60;}; + auto seconds = [](){return rand()%60;}; + auto fraction_sec = [](){return rand()%1000000;}; + stringstream timestamp_str; + + timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" < op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "}; + return op[ rand() % op.size() ]; + }; + int loop = rand() % 10; + string frmt; + while(loop) + { + frmt += random_format(); + loop--; + } + return frmt; +} + +string random_col(string& aws_expr) +{ + int num = 1 + (rand() % NUM_COLUMN); + aws_expr = "cast(_" + to_string(num) + " as int)"; + return "int(_" + to_string(num) + ")"; +} + +string random_number(string& aws_expr) +{ + int num = rand() % 10 + 1; + aws_expr = to_string(num); + return "int(" + to_string(num) + ")"; +} + +string random_num_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + op = random_arth_op(); + ceph_expr = random_num_expr(depth-1, aws_expr1) + op + + random_num_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_num_col_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + if ((rand() % 2) == 0) + { + ceph_expr = random_col(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + else + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + } + op = random_arth_op(); + ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op + + random_num_col_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_query_expr(int depth, string& input_str, int type, string& aws_expr) +{ + string ceph_expr; + if (depth == 0) + { + switch (type) + { + case INTEGER: + ceph_expr = random_number(aws_expr); + break; + case STRING: + ceph_expr = "\'" + input_str + "\'"; + aws_expr = "\'" + input_str + "\'"; + break; + case MIX_COL_NUM: + ceph_expr = random_num_col_expr(depth, aws_expr); + break; + case TIMESTAMP: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr + "\')"; + break; + } + return ceph_expr; + } + + int option; + if (type == INTEGER) //return type is int + { + string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2; + switch (option = rand() % 9) + { + case 0: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")"; + aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 + + ") as int)"; + break; + case 1: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 2: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 3: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 4: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 5: + ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "char_length(" + aws_expr1 + ")"; + break; + case 6: + ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "character_length(" + aws_expr1 + ")"; + break; + case 7: + op1 = random_date_part(); + ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ")"; + aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")"; + break; + case 8: + op1 = random_date_part(); + ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + } + } + else if (type == STRING) // return type is string + { + string aws_expr1, aws_expr2, aws_expr3; + switch (option = rand() % 4) + { + case 0: + ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "lower(" + aws_expr1 + ")"; + break; + case 1: + ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "upper(" + aws_expr1 + ")"; + break; + case 2: + ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " + + random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")"; + aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")"; + break; + case 3: + aws_expr2 = random_tm_format_string(); + ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1) + + ", \'" + aws_expr2 + "\')"; + aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')"; + break; + } + } + else if (type == TIMESTAMP) // return type is TIMESTAMP + { + string aws_expr1, aws_expr2, date_part; + switch (option = rand() % 2) + { + case 0: + date_part = random_date_part(); + ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " + + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + case 1: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr1 + "\')"; + break; + } + } + else if (type == MIX_COL_NUM) + { + ceph_expr = random_num_col_expr(depth-1, aws_expr); + } + else if (type == COLUMN) // return type integer column number + { + ceph_expr = random_col(aws_expr); + } + else if (type == NUMBER) // return type randon number + { + ceph_expr = random_number(aws_expr); + } + else + { + aws_expr = "error"; + ceph_expr = "error"; + } + return ceph_expr; +} + +int main() +{ + srand(time(0)); + int reps, depth; + fstream query_file, aws_query_file; + query_file.open("queries.txt", ios::out); + aws_query_file.open("aws_queries.txt", ios::out); + string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## "; + cout << "Enter number of quries to be generated: "; + cin >> reps; + cout << "Enter depth of queries to be generated: "; + cin >> depth; + if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open + { + while (reps) + { + string aws_expr; + int type; + string ceph_query = "select "; + string aws_query = "select "; + /*int projection = rand() % 4; + while (projection > 1) + { + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, + type, aws_expr) + ", "; + aws_query = aws_query + aws_expr + ", "; + projection--; + }*/ + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, type, + aws_expr)+ " from stdin;"; + aws_query = aws_query + aws_expr + " from s3object;"; + query_file << ceph_query << endl; + aws_query_file << aws_query <