Skip to content

Commit

Permalink
Add option to normalize only utility statements
Browse files Browse the repository at this point in the history
  • Loading branch information
seanlinsley committed Aug 29, 2024
1 parent d49cc8e commit 43bad3c
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 9 deletions.
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ examples/normalize_error: examples/normalize_error.c $(ARLIB)
examples/simple_plpgsql: examples/simple_plpgsql.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ -g examples/simple_plpgsql.c $(ARLIB) $(TEST_LDFLAGS)

TESTS = test/complex test/concurrency test/deparse test/fingerprint test/fingerprint_opts test/normalize test/parse test/parse_opts test/parse_protobuf test/parse_protobuf_opts test/parse_plpgsql test/scan test/split
TESTS = test/complex test/concurrency test/deparse test/fingerprint test/fingerprint_opts test/normalize test/normalize_utility test/parse test/parse_opts test/parse_protobuf test/parse_protobuf_opts test/parse_plpgsql test/scan test/split
test: $(TESTS)
ifeq ($(VALGRIND),1)
$(VALGRIND_MEMCHECK) test/complex || (cat test/valgrind.log && false)
Expand All @@ -239,6 +239,7 @@ ifeq ($(VALGRIND),1)
$(VALGRIND_MEMCHECK) test/fingerprint || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/fingerprint_opts || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/normalize || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/normalize_utility || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/parse || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/parse_opts || (cat test/valgrind.log && false)
$(VALGRIND_MEMCHECK) test/parse_protobuf || (cat test/valgrind.log && false)
Expand All @@ -255,6 +256,7 @@ else
test/fingerprint
test/fingerprint_opts
test/normalize
test/normalize_utility
test/parse
test/parse_opts
test/parse_protobuf
Expand Down Expand Up @@ -287,6 +289,9 @@ test/fingerprint_opts: test/fingerprint_opts.c test/fingerprint_opts_tests.c $(A
test/normalize: test/normalize.c test/normalize_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/normalize.c $(ARLIB) $(TEST_LDFLAGS)

test/normalize_utility: test/normalize_utility.c test/normalize_utility_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/normalize_utility.c $(ARLIB) $(TEST_LDFLAGS)

test/parse: test/parse.c test/parse_tests.c $(ARLIB)
$(CC) $(TEST_CFLAGS) -o $@ test/parse.c $(ARLIB) $(TEST_LDFLAGS)

Expand Down
1 change: 1 addition & 0 deletions pg_query.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ extern "C" {
#endif

PgQueryNormalizeResult pg_query_normalize(const char* input);
PgQueryNormalizeResult pg_query_normalize_utility(const char* input);
PgQueryScanResult pg_query_scan(const char* input);
PgQueryParseResult pg_query_parse(const char* input);
PgQueryParseResult pg_query_parse_opts(const char* input, int parser_options);
Expand Down
43 changes: 42 additions & 1 deletion src/pg_query_normalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ typedef struct pgssConstLocations
int *param_refs;
int param_refs_buf_size;
int param_refs_count;

/* Should only utility statements be normalized? Set by pg_query_normalize_utility */
bool normalize_utility_only;
} pgssConstLocations;

/*
Expand Down Expand Up @@ -398,8 +401,10 @@ static bool const_record_walker(Node *node, pgssConstLocations *jstate)
case T_RawStmt:
return const_record_walker((Node *) ((RawStmt *) node)->stmt, jstate);
case T_VariableSetStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((VariableSetStmt *) node)->args, jstate);
case T_CopyStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((CopyStmt *) node)->query, jstate);
case T_ExplainStmt:
return const_record_walker((Node *) ((ExplainStmt *) node)->query, jstate);
Expand All @@ -408,10 +413,13 @@ static bool const_record_walker(Node *node, pgssConstLocations *jstate)
case T_AlterRoleStmt:
return const_record_walker((Node *) ((AlterRoleStmt *) node)->options, jstate);
case T_DeclareCursorStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((DeclareCursorStmt *) node)->query, jstate);
case T_CreateFunctionStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((CreateFunctionStmt *) node)->options, jstate);
case T_DoStmt:
if (jstate->normalize_utility_only) return false;
return const_record_walker((Node *) ((DoStmt *) node)->args, jstate);
case T_CreateSubscriptionStmt:
record_matching_string(jstate, ((CreateSubscriptionStmt *) node)->conninfo);
Expand All @@ -428,6 +436,7 @@ static bool const_record_walker(Node *node, pgssConstLocations *jstate)
return false;
case T_SelectStmt:
{
if (jstate->normalize_utility_only) return false;
SelectStmt *stmt = (SelectStmt *) node;
ListCell *lc;
List *fp_and_param_refs_list = NIL;
Expand Down Expand Up @@ -540,6 +549,26 @@ static bool const_record_walker(Node *node, pgssConstLocations *jstate)

return false;
}
case T_MergeStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_InsertStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_UpdateStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
case T_DeleteStmt:
{
if (jstate->normalize_utility_only) return false;
return raw_expression_tree_walker(node, const_record_walker, (void*) jstate);
}
default:
{
PG_TRY();
Expand All @@ -558,7 +587,7 @@ static bool const_record_walker(Node *node, pgssConstLocations *jstate)
return false;
}

PgQueryNormalizeResult pg_query_normalize(const char* input)
PgQueryNormalizeResult pg_query_normalize_ext(const char* input, bool normalize_utility_only)
{
MemoryContext ctx = NULL;
PgQueryNormalizeResult result = {0};
Expand Down Expand Up @@ -588,6 +617,7 @@ PgQueryNormalizeResult pg_query_normalize(const char* input)
jstate.param_refs = NULL;
jstate.param_refs_buf_size = 0;
jstate.param_refs_count = 0;
jstate.normalize_utility_only = normalize_utility_only;

/* Walk tree and record const locations */
const_record_walker((Node *) tree, &jstate);
Expand Down Expand Up @@ -621,6 +651,17 @@ PgQueryNormalizeResult pg_query_normalize(const char* input)
return result;
}

PgQueryNormalizeResult pg_query_normalize(const char* input)
{
return pg_query_normalize_ext(input, false);
}


PgQueryNormalizeResult pg_query_normalize_utility(const char* input)
{
return pg_query_normalize_ext(input, true);
}

void pg_query_free_normalize_result(PgQueryNormalizeResult result)
{
if (result.error) {
Expand Down
8 changes: 4 additions & 4 deletions test/normalize.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ int main() {
for (i = 0; i < testsLength; i += 2) {
PgQueryNormalizeResult result = pg_query_normalize(tests[i]);

if (result.error) {
ret_code = -1;
printf("%s\n", result.error->message);
} else if (strcmp(result.normalized_query, tests[i + 1]) == 0) {
if (result.error) {
ret_code = -1;
printf("%s\n", result.error->message);
} else if (strcmp(result.normalized_query, tests[i + 1]) == 0) {
printf(".");
} else {
ret_code = -1;
Expand Down
9 changes: 6 additions & 3 deletions test/normalize_tests.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
// NOTE: make sure to keep this in sync with normalize_utility_tests.c
const char* tests[] = {
"SELECT 1",
"SELECT $1",
"SELECT $1, 1",
"SELECT $1, $2",
"SELECT $1, 1",
"SELECT $1, $2",
"CREATE ROLE postgres PASSWORD 'xyz'",
"CREATE ROLE postgres PASSWORD $1",
"CREATE ROLE postgres ENCRYPTED PASSWORD 'xyz'",
Expand Down Expand Up @@ -45,6 +46,8 @@ const char* tests[] = {
"FETCH 1000 FROM cursor_a",
"CLOSE cursor_a",
"CLOSE cursor_a",
"SELECT 1; ALTER USER a WITH PASSWORD 'b'",
"SELECT $1; ALTER USER a WITH PASSWORD $2",
};

size_t testsLength = __LINE__ - 6;
size_t testsLength = __LINE__ - 7;
35 changes: 35 additions & 0 deletions test/normalize_utility.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#include <pg_query.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>

#include "normalize_utility_tests.c"

int main() {
size_t i;
bool ret_code = 0;

for (i = 0; i < testsLength; i += 2) {
PgQueryNormalizeResult result = pg_query_normalize_utility(tests[i]);

if (result.error) {
ret_code = -1;
printf("%s\n", result.error->message);
} else if (strcmp(result.normalized_query, tests[i + 1]) == 0) {
printf(".");
} else {
ret_code = -1;
printf("INVALID result for \"%s\"\nexpected: %s\nactual: %s\n", tests[i], tests[i + 1], result.normalized_query);
}

pg_query_free_normalize_result(result);
}

printf("\n");

pg_query_exit();

return ret_code;
}
54 changes: 54 additions & 0 deletions test/normalize_utility_tests.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// This contains all the same tests as normalize_tests.c, but only expecting that
// utility statments are normalized.
const char* tests[] = {
"SELECT 1",
"SELECT 1",
"SELECT $1, 1",
"SELECT $1, 1",
"CREATE ROLE postgres PASSWORD 'xyz'",
"CREATE ROLE postgres PASSWORD $1",
"CREATE ROLE postgres ENCRYPTED PASSWORD 'xyz'",
"CREATE ROLE postgres ENCRYPTED PASSWORD $1",
"ALTER ROLE foo WITH PASSWORD 'bar' VALID UNTIL 'infinity'",
"ALTER ROLE foo WITH PASSWORD $1 VALID UNTIL $2",
"ALTER ROLE postgres LOGIN SUPERUSER ENCRYPTED PASSWORD 'xyz'",
"ALTER ROLE postgres LOGIN SUPERUSER ENCRYPTED PASSWORD $1",
"SELECT a, SUM(b) FROM tbl WHERE c = 'foo' GROUP BY 1, 'bar' ORDER BY 1, 'cafe'",
"SELECT a, SUM(b) FROM tbl WHERE c = 'foo' GROUP BY 1, 'bar' ORDER BY 1, 'cafe'",
"select date_trunc($1, created_at at time zone $2), count(*) from users group by date_trunc('day', created_at at time zone 'US/Pacific')",
"select date_trunc($1, created_at at time zone $2), count(*) from users group by date_trunc('day', created_at at time zone 'US/Pacific')",
"select count(1), date_trunc('day', created_at at time zone 'US/Pacific'), 'something', 'somethingelse' from users group by date_trunc('day', created_at at time zone 'US/Pacific'), date_trunc('day', created_at), 'foobar', 'abcdef'",
"select count(1), date_trunc('day', created_at at time zone 'US/Pacific'), 'something', 'somethingelse' from users group by date_trunc('day', created_at at time zone 'US/Pacific'), date_trunc('day', created_at), 'foobar', 'abcdef'",
"SELECT CAST('abc' as varchar(50))",
"SELECT CAST('abc' as varchar(50))",
"CREATE OR REPLACE FUNCTION pg_temp.testfunc(OUT response \"mytable\", OUT sequelize_caught_exception text) RETURNS RECORD AS $func_12345$ BEGIN INSERT INTO \"mytable\" (\"mycolumn\") VALUES ('myvalue') RETURNING * INTO response; EXCEPTION WHEN unique_violation THEN GET STACKED DIAGNOSTICS sequelize_caught_exception = PG_EXCEPTION_DETAIL; END $func_12345$ LANGUAGE plpgsql; SELECT (testfunc.response).\"mycolumn\", testfunc.sequelize_caught_exception FROM pg_temp.testfunc(); DROP FUNCTION IF EXISTS pg_temp.testfunc();",
"CREATE OR REPLACE FUNCTION pg_temp.testfunc(OUT response \"mytable\", OUT sequelize_caught_exception text) RETURNS RECORD AS $func_12345$ BEGIN INSERT INTO \"mytable\" (\"mycolumn\") VALUES ('myvalue') RETURNING * INTO response; EXCEPTION WHEN unique_violation THEN GET STACKED DIAGNOSTICS sequelize_caught_exception = PG_EXCEPTION_DETAIL; END $func_12345$ LANGUAGE plpgsql; SELECT (testfunc.response).\"mycolumn\", testfunc.sequelize_caught_exception FROM pg_temp.testfunc(); DROP FUNCTION IF EXISTS pg_temp.testfunc();",
"CREATE PROCEDURE insert_data(a integer, b integer) LANGUAGE SQL AS $$ INSERT INTO tbl VALUES (a); INSERT INTO tbl VALUES (b); $$",
"CREATE PROCEDURE insert_data(a integer, b integer) LANGUAGE SQL AS $$ INSERT INTO tbl VALUES (a); INSERT INTO tbl VALUES (b); $$",
"DO $$DECLARE r record; BEGIN FOR r IN SELECT table_schema, table_name FROM information_schema.tables WHERE table_type = 'VIEW' AND table_schema = 'public' LOOP EXECUTE 'GRANT ALL ON ' || quote_ident(r.table_schema) || '.' || quote_ident(r.table_name) || ' TO webuser'; END LOOP; END$$",
"DO $$DECLARE r record; BEGIN FOR r IN SELECT table_schema, table_name FROM information_schema.tables WHERE table_type = 'VIEW' AND table_schema = 'public' LOOP EXECUTE 'GRANT ALL ON ' || quote_ident(r.table_schema) || '.' || quote_ident(r.table_name) || ' TO webuser'; END LOOP; END$$",
"CREATE SUBSCRIPTION mysub CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb' PUBLICATION mypublication, insert_only",
"CREATE SUBSCRIPTION mysub CONNECTION $1 PUBLICATION mypublication, insert_only",
"ALTER SUBSCRIPTION mysub SET PUBLICATION insert_only",
"ALTER SUBSCRIPTION mysub SET PUBLICATION insert_only",
"ALTER SUBSCRIPTION mysub CONNECTION 'host=192.168.1.50 port=5432 user=foo dbname=foodb'",
"ALTER SUBSCRIPTION mysub CONNECTION $1",
"CREATE USER MAPPING FOR bob SERVER foo OPTIONS (user 'bob', password 'secret')",
"CREATE USER MAPPING FOR bob SERVER foo OPTIONS (user $1, password $2)",
"ALTER USER MAPPING FOR bob SERVER foo OPTIONS (SET password 'public')",
"ALTER USER MAPPING FOR bob SERVER foo OPTIONS (SET password $1)",
"MERGE into measurement m USING new_measurement nm ON (m.city_id = nm.city_id and m.logdate=nm.logdate) WHEN MATCHED AND nm.peaktemp IS NULL THEN DELETE WHEN MATCHED THEN UPDATE SET peaktemp = greatest(m.peaktemp, nm.peaktemp), unitsales = m.unitsales + coalesce(nm.unitsales, 0) WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (city_id, logdate, peaktemp, unitsales)",
"MERGE into measurement m USING new_measurement nm ON (m.city_id = nm.city_id and m.logdate=nm.logdate) WHEN MATCHED AND nm.peaktemp IS NULL THEN DELETE WHEN MATCHED THEN UPDATE SET peaktemp = greatest(m.peaktemp, nm.peaktemp), unitsales = m.unitsales + coalesce(nm.unitsales, 0) WHEN NOT MATCHED THEN INSERT (city_id, logdate, peaktemp, unitsales) VALUES (city_id, logdate, peaktemp, unitsales)",
// These below are as expected, though questionable if upstream shouldn't be
// fixed as this could bloat pg_stat_statements
"DECLARE cursor_b CURSOR FOR SELECT * FROM x WHERE id = 123",
"DECLARE cursor_b CURSOR FOR SELECT * FROM x WHERE id = 123",
"FETCH 1000 FROM cursor_a",
"FETCH 1000 FROM cursor_a",
"CLOSE cursor_a",
"CLOSE cursor_a",
"SELECT 1; ALTER USER a WITH PASSWORD 'b'",
"SELECT 1; ALTER USER a WITH PASSWORD $1",
};

size_t testsLength = __LINE__ - 8;

0 comments on commit 43bad3c

Please sign in to comment.