2
2
from . import constants as c
3
3
from . import table_distance as tab_dist
4
4
from . import format as f
5
- from . import distance_calc as distance_calc
6
5
import re
7
6
from . import result_log as log
8
7
9
8
10
9
def extract_tables (ref , query ):
11
10
ref_tab_name : list [str ] = []
12
11
query_tab_name : list [str ] = []
13
-
12
+
14
13
ref_alias_map : dict [str , str ] = {}
15
14
query_alias_map : dict [str , str ] = {}
16
-
15
+
17
16
ref_join_list : list [str ] = []
18
17
query_join_list : list [str ] = []
19
-
18
+
20
19
ref_comp_list : list [str ] = []
21
20
query_comp_list : list [str ] = []
22
-
21
+
23
22
ref_order_list : list [str ] = []
24
23
query_order_list : list [str ] = []
25
-
24
+
26
25
ref_group_list : list [str ] = []
27
26
query_group_list : list [str ] = []
28
-
27
+
29
28
ref_having_list : list [str ] = []
30
29
query_having_list : list [str ] = []
31
- _token_iteration (ref , ref_alias_map , ref_tab_name , ref_join_list ,
30
+ _token_iteration (ref , ref_alias_map , ref_tab_name , ref_join_list ,
32
31
ref_comp_list , ref_order_list , ref_group_list ,
33
32
ref_having_list )
34
- _token_iteration (query , query_alias_map , query_tab_name ,
33
+ _token_iteration (query , query_alias_map , query_tab_name ,
35
34
query_join_list , query_comp_list , query_order_list ,
36
35
query_group_list , query_having_list )
37
36
@@ -46,7 +45,7 @@ def extract_tables(ref, query):
46
45
log .write_to_log (f"order by attributes: reference: { ref_order_list } ; query: { query_order_list } \n " )
47
46
48
47
comparison_distance = tab_dist .comparison_distance (ref_comp_list , query_comp_list )
49
-
48
+
50
49
order_distance = tab_dist .group_and_order_by_distance (ref_order_list , query_order_list )
51
50
52
51
group_by_distance = tab_dist .group_and_order_by_distance (ref_group_list , query_group_list )
@@ -67,30 +66,31 @@ def _token_iteration(tokens: sqlparse.sql.Statement, tab_map: dict, name_list: l
67
66
_token_iteration (token ,tab_map ,name_list ,join_list ,comp_list ,order_list ,group_list ,having_list )
68
67
# check and extract tables used after the FROM keyword
69
68
if token .ttype == sqlparse .tokens .Keyword and token .value == c .FROM :
70
- extractedFrom = _extract_from (tokens , i , tab_map , name_list )
71
- for fromToken in extractedFrom :
69
+ extracted_from = _extract_from (tokens , i , tab_map , name_list )
70
+ for from_token in extracted_from :
72
71
#check if FROM contains subquery
73
- if isinstance (fromToken ,sqlparse .sql .Parenthesis ):
72
+ if isinstance (from_token ,sqlparse .sql .Parenthesis ):
74
73
#call the method recursively to iterate through the tokens of the subquery
75
- _token_iteration (fromToken .tokens , tab_map , name_list , join_list , comp_list , order_list , group_list , having_list )
74
+ _token_iteration (from_token .tokens , tab_map , name_list , join_list , comp_list , order_list , group_list , having_list )
76
75
# check and extract the JOIN keywords and tables used after it
77
76
if token .ttype == sqlparse .tokens .Keyword and token .value in c .JOIN_TYPES :
78
- extractedJoin = _extract_join (token , tokens , i , tab_map , name_list , join_list )
77
+ extracted_join = _extract_join (token , tokens , i , tab_map , name_list , join_list )
79
78
#check if JOIN contains subquery
80
- for subquery in extractedJoin :
79
+ for subquery in extracted_join :
81
80
if isinstance (subquery ,sqlparse .sql .Parenthesis ):
82
81
#call the method recursively to iterate through the tokens of the subquery
83
82
_token_iteration (subquery .tokens , tab_map , name_list , join_list , comp_list , order_list , group_list , having_list )
84
83
85
84
# check and extract the comparison equations after ON condition
86
85
if token .ttype == sqlparse .tokens .Keyword and token .value == c .ON :
87
- extractedOn = _extract_on (tokens , i , comp_list )
88
- for onToken in extractedOn :
89
- _token_iteration (onToken , tab_map , name_list , join_list , comp_list , order_list , group_list ,having_list )
86
+ extracted_on = _extract_on (tokens , i , comp_list )
87
+ if (extracted_on != None ):
88
+ for onToken in extracted_on :
89
+ _token_iteration (onToken , tab_map , name_list , join_list , comp_list , order_list , group_list ,having_list )
90
90
# check and extract the WHERE keyword and comparison equations after it
91
91
if isinstance (token , sqlparse .sql .Where ):
92
- extractedWhere = _extract_where (token , comp_list , join_list )
93
- for whereToken in extractedWhere :
92
+ extracted_where = _extract_where (token , comp_list , join_list )
93
+ for whereToken in extracted_where :
94
94
if isinstance (whereToken , sqlparse .sql .Parenthesis ):
95
95
_token_iteration (whereToken .tokens , tab_map , name_list , join_list , comp_list , order_list ,group_list , having_list )
96
96
# check and extract attributes and iterate through group by clause
@@ -101,22 +101,30 @@ def _token_iteration(tokens: sqlparse.sql.Statement, tab_map: dict, name_list: l
101
101
_extract_order_by (tokens , i , order_list )
102
102
103
103
104
+ def _search_for_subqueries (tokens ):
105
+ subquery_list = []
106
+ for curToken in tokens :
107
+ if isinstance (curToken ,sqlparse .sql .Parenthesis ):
108
+ subquery_list .append (curToken )
109
+ return subquery_list
110
+
111
+
104
112
def _extract_from (tokens , i , tab_map , name_list ):
105
113
next_token = tokens [i + 2 ] # +2 to bypass whitespace token
106
114
#List for all subqueries
107
- subQueryList = []
115
+ subquery_list = []
108
116
# if singular table used, append it to list
109
117
if isinstance (next_token , sqlparse .sql .Identifier ):
110
118
_extract_table_elements (next_token , tab_map , name_list )
111
- subQueryList += _search_for_subqueries (next_token .tokens )
119
+ subquery_list += _search_for_subqueries (next_token .tokens )
112
120
113
121
# if multiple tables used, iterate through them and save them to list
114
122
elif isinstance (next_token , sqlparse .sql .IdentifierList ):
115
123
for t in list [sqlparse .sql .Identifier ](next_token .get_identifiers ()):
116
124
_extract_table_elements (t , tab_map , name_list )
117
- subQueryList += _search_for_subqueries (t .tokens )
125
+ subquery_list += _search_for_subqueries (t .tokens )
118
126
#return list of subqueries
119
- return subQueryList
127
+ return subquery_list
120
128
121
129
122
130
@@ -132,39 +140,64 @@ def _extract_on(tokens, i, comp_list):
132
140
if isinstance (next_token , sqlparse .sql .Comparison ):
133
141
# If it is a Comparison, format it to remove whitespaces
134
142
# The formatted comparison is appended to comp_list
135
- queryList = _search_for_subqueries (next_token .tokens )
143
+ query_list = _search_for_subqueries (next_token .tokens )
136
144
comp_list .append (f .format_like (f .format_whitespace (next_token .value )))
137
145
138
- #check if AND exists
139
- if ( len (tokens )> i + 4 ) :
146
+ #check if AND or OR exists
147
+ if len (tokens )> i + 4 :
140
148
next_token = tokens [i + 4 ]
141
- if (next_token .value == c .AND ):
142
- queryList += _extract_on (tokens , i + 4 ,comp_list )
143
- elif (next_token .value == c .OR ):
144
- queryList += _extract_on (tokens , i + 4 , comp_list )
145
- return queryList
149
+ if next_token .value == c .AND :
150
+ query_list += _extract_on (tokens , i + 4 ,comp_list )
151
+ elif next_token .value == c .OR :
152
+ query_list += _extract_on (tokens , i + 4 , comp_list )
153
+
154
+ for j , token in enumerate (tokens ):
155
+ if token .value == c .BETWEEN :
156
+ between_str = ""
157
+ for k in range (j - 2 , j + 7 ): # BETWEEN Bereich festlegen
158
+ between_str += tokens [k ].value
159
+ comp_list .append (between_str )
160
+
161
+ elif token .value in {c .IN , c .LIKE }:
162
+ in_like_str = ""
163
+ for k in range (j - 2 , j + 2 ): # IN und LIKE Bereich festlegen
164
+ in_like_str += tokens [k ].value
165
+ comp_list .append (in_like_str )
166
+ return query_list
167
+
146
168
147
169
148
170
def _extract_where (token , comp_list , join_list ):
149
171
_extract_and_format_between (token , comp_list )
150
- wherelist = []
151
-
152
- for t in token .tokens :
172
+ where_list = []
173
+ print (token )
174
+ for i , t in enumerate (token .tokens ):
175
+ print (t )
153
176
# add comparison to the list if found
154
177
if isinstance (t , sqlparse .sql .Comparison ):
155
178
#print("extr token comp ", t.tokens)
156
179
comp_list .append (f .format_like (f .format_whitespace (t .value )))
157
- wherelist .append (_search_for_subqueries (t .tokens ));
180
+ where_list .append (_search_for_subqueries (t .tokens ))
158
181
# save everything inside a parenthesis
159
182
if isinstance (t , sqlparse .sql .Parenthesis ):
160
183
#print(f"PARA {t.tokens}")
161
184
comp_list .append (f .format_like (f .format_parenthesis (t .value )))
162
-
185
+ if t .value == c .BETWEEN :
186
+ str = ""
187
+ for j in range (i - 2 ,i + 7 ):
188
+ str += token .tokens [j ].value
189
+ comp_list .append (str )
190
+ if (t .value == c .IN or t .value == c .LIKE ):
191
+ str = ""
192
+ print ("start" )
193
+ for j in range (i - 2 ,i + 2 ):
194
+ str += token .tokens [j ].value
195
+ comp_list .append (str )
163
196
# append where keyword to the list of clauses MAYBE CHANGE IN DIFFERENT ARRAYS
164
197
join_list .append (token .token_first ().value )
165
198
166
199
167
- return wherelist
200
+ return where_list
168
201
169
202
170
203
def _extract_group_by (tokens , i , group_list , having_list ):
@@ -175,8 +208,8 @@ def _extract_group_by(tokens, i, group_list, having_list):
175
208
if isinstance (t , sqlparse .sql .Token ):
176
209
# Check if the token is of a type that can be part of a GROUP BY clause
177
210
if isinstance (tokens [j ], (
178
- sqlparse .sql .IdentifierList , sqlparse .sql .Identifier ,
179
- sqlparse .sql .Operation , sqlparse .sql .Function ,
211
+ sqlparse .sql .IdentifierList , sqlparse .sql .Identifier ,
212
+ sqlparse .sql .Operation , sqlparse .sql .Function ,
180
213
sqlparse .sql .Parenthesis )):
181
214
# If so, extract the attributes from the token and add them to the group_list
182
215
_extract_group_by_attributes (tokens [j ], group_list )
@@ -262,7 +295,7 @@ def _extract_table_elements(token, tab_map, name_list: list):
262
295
def _extract_order_by_attributes (token , order_list : list ):
263
296
# Check if the token is of a type that can be part of an ORDER BY clause
264
297
if isinstance (token , (
265
- sqlparse .sql .Identifier , sqlparse .sql .Operation , sqlparse .sql .Function ,
298
+ sqlparse .sql .Identifier , sqlparse .sql .Operation , sqlparse .sql .Function ,
266
299
sqlparse .sql .Parenthesis , sqlparse .sql .Comparison )):
267
300
# Check if the token contains the DESC (descending order) keyword
268
301
if re .search (c .DESC , token .value ):
@@ -282,7 +315,7 @@ def _extract_order_by_attributes(token, order_list: list):
282
315
283
316
def _extract_group_by_attributes (token , group_list : list ):
284
317
# Check if the token is one of the types that can be part of a GROUP BY clause
285
- if isinstance (token , (sqlparse .sql .Identifier , sqlparse .sql .Operation ,
318
+ if isinstance (token , (sqlparse .sql .Identifier , sqlparse .sql .Operation ,
286
319
sqlparse .sql .Function , sqlparse .sql .Parenthesis )):
287
320
# If it is, format its value to remove excess whitespace and add it to the list
288
321
group_list .append (f .format_whitespace (token .value ))
@@ -310,12 +343,6 @@ def _extract_and_format_between(token, comp_list: list):
310
343
311
344
312
345
313
- def _search_for_subqueries (tokens ):
314
- subQueryList = []
315
- for curToken in tokens :
316
- if isinstance (curToken ,sqlparse .sql .Parenthesis ):
317
- subQueryList .append (curToken )
318
- return subQueryList
319
346
320
347
321
348
0 commit comments