Skip to content

Commit bae0fcd

Browse files
committed
Fixed lint problems and started implementing LIKE, BETWEEN, IN support
1 parent e4a10b7 commit bae0fcd

File tree

1 file changed

+76
-49
lines changed

1 file changed

+76
-49
lines changed

modules/fbs-sql-checker/api/distance/table_check.py

Lines changed: 76 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2,36 +2,35 @@
22
from . import constants as c
33
from . import table_distance as tab_dist
44
from . import format as f
5-
from . import distance_calc as distance_calc
65
import re
76
from . import result_log as log
87

98

109
def extract_tables(ref, query):
1110
ref_tab_name: list[str] = []
1211
query_tab_name: list[str] = []
13-
12+
1413
ref_alias_map: dict[str, str] = {}
1514
query_alias_map: dict[str, str] = {}
16-
15+
1716
ref_join_list: list[str] = []
1817
query_join_list: list[str] = []
19-
18+
2019
ref_comp_list: list[str] = []
2120
query_comp_list: list[str] = []
22-
21+
2322
ref_order_list: list[str] = []
2423
query_order_list: list[str] = []
25-
24+
2625
ref_group_list: list[str] = []
2726
query_group_list: list[str] = []
28-
27+
2928
ref_having_list: list[str] = []
3029
query_having_list: list[str] = []
31-
_token_iteration(ref, ref_alias_map, ref_tab_name, ref_join_list,
30+
_token_iteration(ref, ref_alias_map, ref_tab_name, ref_join_list,
3231
ref_comp_list, ref_order_list, ref_group_list,
3332
ref_having_list)
34-
_token_iteration(query, query_alias_map, query_tab_name,
33+
_token_iteration(query, query_alias_map, query_tab_name,
3534
query_join_list, query_comp_list, query_order_list,
3635
query_group_list, query_having_list)
3736

@@ -46,7 +45,7 @@ def extract_tables(ref, query):
4645
log.write_to_log(f"order by attributes: reference: {ref_order_list}; query: {query_order_list}\n")
4746

4847
comparison_distance = tab_dist.comparison_distance(ref_comp_list, query_comp_list)
49-
48+
5049
order_distance = tab_dist.group_and_order_by_distance(ref_order_list, query_order_list)
5150

5251
group_by_distance = tab_dist.group_and_order_by_distance(ref_group_list, query_group_list)
@@ -67,30 +66,31 @@ def _token_iteration(tokens: sqlparse.sql.Statement, tab_map: dict, name_list: l
6766
_token_iteration(token,tab_map,name_list,join_list,comp_list,order_list,group_list,having_list)
6867
# check and extract tables used after the FROM keyword
6968
if token.ttype == sqlparse.tokens.Keyword and token.value == c.FROM:
70-
extractedFrom = _extract_from(tokens, i, tab_map, name_list)
71-
for fromToken in extractedFrom:
69+
extracted_from = _extract_from(tokens, i, tab_map, name_list)
70+
for from_token in extracted_from:
7271
#check if FROM contains subquery
73-
if isinstance(fromToken,sqlparse.sql.Parenthesis):
72+
if isinstance(from_token,sqlparse.sql.Parenthesis):
7473
#call the method recursively to iterate through the tokens of the subquery
75-
_token_iteration(fromToken.tokens, tab_map, name_list, join_list, comp_list, order_list, group_list, having_list)
74+
_token_iteration(from_token.tokens, tab_map, name_list, join_list, comp_list, order_list, group_list, having_list)
7675
# check and extract the JOIN keywords and tables used after it
7776
if token.ttype == sqlparse.tokens.Keyword and token.value in c.JOIN_TYPES:
78-
extractedJoin = _extract_join(token, tokens, i, tab_map, name_list, join_list)
77+
extracted_join = _extract_join(token, tokens, i, tab_map, name_list, join_list)
7978
#check if JOIN contains subquery
80-
for subquery in extractedJoin:
79+
for subquery in extracted_join:
8180
if isinstance(subquery,sqlparse.sql.Parenthesis):
8281
#call the method recursively to iterate through the tokens of the subquery
8382
_token_iteration(subquery.tokens, tab_map, name_list, join_list, comp_list, order_list, group_list, having_list)
8483

8584
# check and extract the comparison equations after ON condition
8685
if token.ttype == sqlparse.tokens.Keyword and token.value == c.ON:
87-
extractedOn = _extract_on(tokens, i, comp_list)
88-
for onToken in extractedOn:
89-
_token_iteration(onToken, tab_map, name_list, join_list, comp_list, order_list, group_list,having_list)
86+
extracted_on = _extract_on(tokens, i, comp_list)
87+
if(extracted_on != None):
88+
for onToken in extracted_on:
89+
_token_iteration(onToken, tab_map, name_list, join_list, comp_list, order_list, group_list,having_list)
9090
# check and extract the WHERE keyword and comparison equations after it
9191
if isinstance(token, sqlparse.sql.Where):
92-
extractedWhere = _extract_where(token, comp_list, join_list)
93-
for whereToken in extractedWhere:
92+
extracted_where = _extract_where(token, comp_list, join_list)
93+
for whereToken in extracted_where:
9494
if isinstance(whereToken, sqlparse.sql.Parenthesis):
9595
_token_iteration(whereToken.tokens, tab_map, name_list, join_list, comp_list, order_list,group_list, having_list)
9696
# check and extract attributes and iterate through group by clause
@@ -101,22 +101,30 @@ def _token_iteration(tokens: sqlparse.sql.Statement, tab_map: dict, name_list: l
101101
_extract_order_by(tokens, i, order_list)
102102

103103

104+
def _search_for_subqueries(tokens):
105+
subquery_list = []
106+
for curToken in tokens:
107+
if isinstance(curToken,sqlparse.sql.Parenthesis):
108+
subquery_list.append(curToken)
109+
return subquery_list
110+
111+
104112
def _extract_from(tokens, i, tab_map, name_list):
105113
next_token = tokens[i + 2] # +2 to bypass whitespace token
106114
#List for all subqueries
107-
subQueryList = []
115+
subquery_list = []
108116
# if singular table used, append it to list
109117
if isinstance(next_token, sqlparse.sql.Identifier):
110118
_extract_table_elements(next_token, tab_map, name_list)
111-
subQueryList += _search_for_subqueries(next_token.tokens)
119+
subquery_list += _search_for_subqueries(next_token.tokens)
112120

113121
# if multiple tables used, iterate through them and save them to list
114122
elif isinstance(next_token, sqlparse.sql.IdentifierList):
115123
for t in list[sqlparse.sql.Identifier](next_token.get_identifiers()):
116124
_extract_table_elements(t, tab_map, name_list)
117-
subQueryList += _search_for_subqueries(t.tokens)
125+
subquery_list += _search_for_subqueries(t.tokens)
118126
#return list of subqueries
119-
return subQueryList
127+
return subquery_list
120128

121129

122130

@@ -132,39 +140,64 @@ def _extract_on(tokens, i, comp_list):
132140
if isinstance(next_token, sqlparse.sql.Comparison):
133141
# If it is a Comparison, format it to remove whitespaces
134142
# The formatted comparison is appended to comp_list
135-
queryList = _search_for_subqueries(next_token.tokens)
143+
query_list = _search_for_subqueries(next_token.tokens)
136144
comp_list.append(f.format_like(f.format_whitespace(next_token.value)))
137145

138-
#check if AND exists
139-
if(len(tokens)>i+4):
146+
#check if AND or OR exists
147+
if len(tokens)>i+4:
140148
next_token = tokens[i+4]
141-
if(next_token.value == c.AND):
142-
queryList+= _extract_on(tokens, i+4,comp_list)
143-
elif (next_token.value == c.OR):
144-
queryList += _extract_on(tokens, i + 4, comp_list)
145-
return queryList
149+
if next_token.value == c.AND:
150+
query_list+= _extract_on(tokens, i+4,comp_list)
151+
elif next_token.value == c.OR:
152+
query_list += _extract_on(tokens, i + 4, comp_list)
153+
154+
for j, token in enumerate(tokens):
155+
if token.value == c.BETWEEN:
156+
between_str = ""
157+
for k in range(j - 2, j + 7): # BETWEEN Bereich festlegen
158+
between_str += tokens[k].value
159+
comp_list.append(between_str)
160+
161+
elif token.value in {c.IN, c.LIKE}:
162+
in_like_str = ""
163+
for k in range(j - 2, j + 2): # IN und LIKE Bereich festlegen
164+
in_like_str += tokens[k].value
165+
comp_list.append(in_like_str)
166+
return query_list
167+
146168

147169

148170
def _extract_where(token, comp_list, join_list):
149171
_extract_and_format_between(token, comp_list)
150-
wherelist = []
151-
152-
for t in token.tokens:
172+
where_list = []
173+
print(token)
174+
for i, t in enumerate(token.tokens):
175+
print(t)
153176
# add comparison to the list if found
154177
if isinstance(t, sqlparse.sql.Comparison):
155178
#print("extr token comp ", t.tokens)
156179
comp_list.append(f.format_like(f.format_whitespace(t.value)))
157-
wherelist.append(_search_for_subqueries(t.tokens));
180+
where_list.append(_search_for_subqueries(t.tokens))
158181
# save everything inside a parenthesis
159182
if isinstance(t, sqlparse.sql.Parenthesis):
160183
#print(f"PARA {t.tokens}")
161184
comp_list.append(f.format_like(f.format_parenthesis(t.value)))
162-
185+
if t.value == c.BETWEEN:
186+
str = ""
187+
for j in range(i-2,i+7):
188+
str+=token.tokens[j].value
189+
comp_list.append(str)
190+
if(t.value == c.IN or t.value == c.LIKE):
191+
str = ""
192+
print("start")
193+
for j in range(i-2,i+2):
194+
str += token.tokens[j].value
195+
comp_list.append(str)
163196
# append where keyword to the list of clauses MAYBE CHANGE IN DIFFERENT ARRAYS
164197
join_list.append(token.token_first().value)
165198

166199

167-
return wherelist
200+
return where_list
168201

169202

170203
def _extract_group_by(tokens, i, group_list, having_list):
@@ -175,8 +208,8 @@ def _extract_group_by(tokens, i, group_list, having_list):
175208
if isinstance(t, sqlparse.sql.Token):
176209
# Check if the token is of a type that can be part of a GROUP BY clause
177210
if isinstance(tokens[j], (
178-
sqlparse.sql.IdentifierList, sqlparse.sql.Identifier,
179-
sqlparse.sql.Operation, sqlparse.sql.Function,
211+
sqlparse.sql.IdentifierList, sqlparse.sql.Identifier,
212+
sqlparse.sql.Operation, sqlparse.sql.Function,
180213
sqlparse.sql.Parenthesis)):
181214
# If so, extract the attributes from the token and add them to the group_list
182215
_extract_group_by_attributes(tokens[j], group_list)
@@ -262,7 +295,7 @@ def _extract_table_elements(token, tab_map, name_list: list):
262295
def _extract_order_by_attributes(token, order_list: list):
263296
# Check if the token is of a type that can be part of an ORDER BY clause
264297
if isinstance(token, (
265-
sqlparse.sql.Identifier, sqlparse.sql.Operation, sqlparse.sql.Function,
298+
sqlparse.sql.Identifier, sqlparse.sql.Operation, sqlparse.sql.Function,
266299
sqlparse.sql.Parenthesis, sqlparse.sql.Comparison)):
267300
# Check if the token contains the DESC (descending order) keyword
268301
if re.search(c.DESC, token.value):
@@ -282,7 +315,7 @@ def _extract_order_by_attributes(token, order_list: list):
282315

283316
def _extract_group_by_attributes(token, group_list: list):
284317
# Check if the token is one of the types that can be part of a GROUP BY clause
285-
if isinstance(token, (sqlparse.sql.Identifier, sqlparse.sql.Operation,
318+
if isinstance(token, (sqlparse.sql.Identifier, sqlparse.sql.Operation,
286319
sqlparse.sql.Function, sqlparse.sql.Parenthesis)):
287320
# If it is, format its value to remove excess whitespace and add it to the list
288321
group_list.append(f.format_whitespace(token.value))
@@ -310,12 +343,6 @@ def _extract_and_format_between(token, comp_list: list):
310343

311344

312345

313-
def _search_for_subqueries(tokens):
314-
subQueryList = []
315-
for curToken in tokens:
316-
if isinstance(curToken,sqlparse.sql.Parenthesis):
317-
subQueryList.append(curToken)
318-
return subQueryList
319346

320347

321348

0 commit comments

Comments
 (0)