-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path20171091.py
670 lines (499 loc) · 14.3 KB
/
20171091.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
import csv #to read csv files
import traceback #to print error statements
import sys #to take cl input
import sqlparse #to parse sql queries
def init():
"""
Load data in dictionary tinfo
"""
basedr='./files/'
with open(basedr+'metadata.txt') as f:
metadata=f.read().splitlines()
tinfo={}
newtable=False
name=""
for line in metadata:
if(line=="<begin_table>"):
newtable=True
elif(newtable==True):
newtable=False
name=line
tinfo[name]={}
elif(line!="<end_table>"):
tinfo[name][line]=[]
for tname in tinfo:
with open(basedr+tname+'.csv','r') as csvfile:
csvreader=csv.reader(csvfile)
for record in csvreader:
ind=0
for att in tinfo[tname]:
tinfo[tname][att].append(record[ind])
ind+=1
return tinfo
def checkdistinct(query):
"""
checks if distinct keyword present or not
"""
if(len(query)>1) and (query[1]=='distinct'):
return True
return False
def checkincorrect(query,distinct):
"""
Checks if error in sql syntax
"""
flag=True
if not(6>=len(query)>=4):
flag=False
if(query[0]!='select'):
flag=False
if(query[2+distinct]!='from'):
flag=False
if(4+distinct<len(query)) and (query[4+distinct][0:6]!='where '):
flag=False
if not flag:
print("Error: Incorrect SQL syntax")
exit(-1)
def gettables(query,distinct):
"""
Get all the tables involved in query
"""
try:
tables=query[3+distinct].split(",")
for i in range(len(tables)):
tables[i]=tables[i].strip()
return tables
except:
print("Error: From clause not parsed")
exit(-1)
def checktables(tables):
"""
Check if tables in query are in db or not
"""
tablelist=[]
for i in tinfo:
tablelist.append(i)
for i in tables:
if(i not in tablelist):
print("Error: Tables specified not present in database")
exit(-1)
def jointable(tables,tinfo):
"""
Joins all tables which are mentioned in from
"""
fields=[]#stores col name like tablename.attribute
fields2=[]#stores col name like attribute
aux2=[]#stores list after converting aux to a list
aux=tinfo[tables[0]]#cumulative store of joins
#find no of records in aux and append column header
noraux=0
for k in aux:
fields.append(str(tables[0])+'.'+str(k))
fields2.append(str(k))
noraux=len(aux[k])
#covert aux to a list
for l in range(noraux):
row3=[]
for m in aux.keys():
row3.append(aux[m][l])
aux2.append(row3)
#copy aux2 back to aux
aux=aux2
#return if only 1 table
if(len(tables)==1):
return [aux,fields,fields2]
#for multiple tables
for i in range(1,len(tables),1):
aux2=[]#empty aux2 after every iteration
a=tinfo[tables[i]]#next table
#find no of records in a
nora=0
for k in a:
nora=len(a[k])
fields.append(str(tables[i])+'.'+str(k))
fields2.append(str(k))
#perform join on aux and a
for l in aux:
for k in range(nora):
row=[]
for j in a.keys():
row.append(a[j][k])
aux2.append(l+row)
#copy aux2 back to aux
aux=aux2
return [aux,fields,fields2]
def getcond(query,distinct):
"""
Get all the conditions specified in where clause
"""
if(4+distinct<len(query)) and (query[4+distinct][0:6]=='where '):
a=query[4+distinct]
a=a[6:]#remove "where "
a=a.strip()#remove leading and trailing space and convert to list
#check for and/or in condition
oparr=[" AND "," and "," OR "," or "]
for op in oparr:
if(op in a):
a=a.replace(op,',')
a=a.split(',')
return [a[0:2],op.split()]
#if no and/or return cond after removing where
return [a.split(','),[]]
#if no where clause then return empty lists
return [[],[]]
def checkcond(cond,fh,fh2,tables):
"""
Check if conditions specified in where clause are correct or not
"""
oparr=["<=",">=","<",">","="]
opused=[]#stores op used in each cond
#check for condition syntax
for i in cond:
correct=False
#if any one op present in cond then correct
for j in oparr:
if(j in i):
correct=True
opused.append(j)
break
if not correct:
print("Error: Operator in where clause not specified")
exit(-1)
#check if attributes specified exist or not
for i in range(len(cond)):
cond[i]=cond[i].replace(opused[i],',')#replace op with comma
a=cond[i].split(',')#convert to list
#convert str(int) to int
for i in range(len(a)):
try:
a[i]=int(a[i])
except:
pass
#check in fh and fh2
for att in a:
if(isinstance(att,str)):
att=att.strip()
if (fh.count(att)!=1) and (fh2.count(att)!=1):
print("Error: Attributes not specified properly in where clause")
exit(-1)
else:
pass
return opused
def preprocessatt(att,fh,fh2):
"""
Converts attribute from att to tablename.att
"""
att=att.strip()
if att not in fh:
for i in fh:
i=i.split(".")[0]
i=str(i)+'.'+str(att)
if(i in fh):
return i
return att
def findindex(att,fh):
"""
Find index of att in fh
"""
for i in range(len(fh)):
if(att==fh[i]):
return i
def processatt(att,fh,fh2):
"""
Handles wether att is int or str
If att is num then return str(att)
Else return int
"""
if(isinstance(att,str)):
att=preprocessatt(att,fh,fh2)
att=findindex(att,fh)
else:
att=str(att)
return att
def returnindex(cond,fh,fh2):
"""
Returns index of attributes used in cond
"""
cond=cond.split(",")
for i in range(len(cond)):
try:
cond[i]=int(cond[i])
except:
pass
a1=processatt(cond[0],fh,fh2)
a2=processatt(cond[1],fh,fh2)
return[a1,a2]
def checkexpr(a1,a2,i,opused):
"""
Generates expression according to condition
"""
#both are attributes
if isinstance(a1,int) and isinstance(a2,int):
return str(i[a1])+str(opused)+str(i[a2])
#a2 is not an attribute
elif isinstance(a1,int) and isinstance(a2,str):
return str(i[a1])+str(opused)+a2
#a1 is not an attribute
elif isinstance(a1,str) and isinstance(a2,int):
return a1+str(opused)+str(i[a2])
#both are not attributes
else:
return a1+str(opused)+a2
def applycond(jt,fh,fh2,cond,opused,andor):
"""
remove records form joined table if they do not meet conditions
"""
#convert = to == for evaluating
for i in range(len(opused)):
if(opused[i]=="="):
opused[i]="=="
#if no where clause
if(len(cond)==0):
ans=[]
for i in jt:
ans.append(i)
return ans
#if no and/or in where clause
elif(len(andor)==0 and len(cond)!=0):
a1,a2=returnindex(cond[0],fh,fh2)
ans=[]
for i in jt:
temp=checkexpr(a1,a2,i,opused[0])
if eval(temp):
ans.append(i)#if cond sattisfied then append
return ans
#if and/or in where clause
elif(len(andor)==1):
#process and/or
andor=''.join(andor)
andor=andor.lower()
andor=' '+str(andor)+' '
a1,a2=returnindex(cond[0],fh,fh2)
b1,b2=returnindex(cond[1],fh,fh2)
ans=[]
for i in jt:
temp=checkexpr(a1,a2,i,opused[0])
temp2=checkexpr(b1,b2,i,opused[1])
temp3=str(temp)+str(andor)+str(temp2)
if eval(temp3):
ans.append(i)
return ans
else:
print("Error: Too many conditions")
exit(-1)
def selectpreprocess(s):
"""
Remove leading and trailing spaces from attributes
"""
s=s.split(',')
for i in range(len(s)):
s[i]=s[i].strip()
s=",".join(s)
return s
def checkagg(att,temp,distinct):
"""
Check if aggregate functions used or not
"""
agg=[]
aggarray=["max()","min()","sum()","avg()"]
if(len(att)>5):
a=att[0:4]
a+=att[-1]
if a in aggarray:
#if more than 1 col selected for projection
if(len(temp)!=1):
print("Error: Error in aggregate function")
exit(-1)
#remove aggregate from att name
att=att[4:]
att=att[:-1]
#check for distinct in att name
att=att.strip()
if(att[0:9]=="distinct "):
distinct=1
att=att[9:]
att=att.strip()
agg.append(a)
return [att,agg,distinct]
def checkselect(query,fh,fh2,distinct):
"""
Check if select statement is correct or not
"""
#if select *
if(query[1+distinct]=="*"):
return [["*"],[],distinct]
else:
temp=query[1+distinct].split(",")
for i in range(len(temp)):
att=temp[i]
att,agg,distinct=checkagg(att,temp,distinct)
temp[i]=att
if (fh.count(att)!=1) and (fh2.count(att)!=1):
print("Error: Attributes not specified properly in select clause")
exit(-1)
for i in range(len(temp)):
temp[i]=preprocessatt(temp[i],fh,fh2)
return [temp,agg,distinct]
def removeduplicate(ans):
"""
Removes dupliacte columns before printing
"""
if(len(ans)==0):
return [[],[]]
else:
#transpose
ans=list(zip(*ans))
#get unique
uniqans=[]
index=[]
for i in range(len(ans)):
if ans[i] not in uniqans:
uniqans.append(ans[i])
else:
index.append(i)
#transpose back
uniqans=list(zip(*uniqans))
#convert to list
for i in range(len(uniqans)):
uniqans[i]=list(uniqans[i])
return [uniqans,index]
def avg(x):
"""
Return avg of list elements
"""
return(sum(x)/len(x))
def niceprint(x,agg):
"""
Prints a 1D list in a good way
"""
cellwidth=10
if(agg==1):
cellwidth=20
#print row seperator
head=' '
for i in range(len(x)):
head+='-'*cellwidth
head+=' '
print(head)
#print each record
print('|',end='')
for i in x:
i=str(i)
i=i.center(cellwidth)#ljust,rjust,center
print(i,end='|')
#print newline after each record
print()
def printagg(agg,arr):
"""
Apply aggregate functin and then print
"""
for i in range(len(arr)):
arr[i]=int(arr[i][0])
exp=agg[0][:-1]
exp+=str(arr)+")"
niceprint([eval(exp)],1)
def printresult(fh,ans,distinct,cols,agg):
"""
Print query result
"""
if(cols==["*"]):
#remove duplicate columns
ans,index=removeduplicate(ans)
#remove col header of duplicate cols
for i in index:
fh.pop(i)
#print col names
niceprint(fh,0)
#check if distinct present
if(distinct==1):
uniqans=[]
for i in ans:
if i not in uniqans:
uniqans.append(i)
for i in uniqans:
niceprint(i,0)
else:
for i in ans:
niceprint(i,0)
else:
#get index of attributes present in query
index=[]
for att in cols:
index.append(findindex(att,fh))
#print col names
if(len(agg)==0):
temp=[]
for i in index:
temp.append(fh[i])
niceprint(temp,0)
else:
a=str(fh[index[0]])
b=agg[0]
b=b[0:4]+a+')'
niceprint([b],1)
#extract cols from joined table
ans2=[]
for record in ans:
tr=[]
for i in index:
tr.append(record[i])
ans2.append(tr)
if(distinct==1):
uniqans2=[]
for i in ans2:
if i not in uniqans2:
uniqans2.append(i)
if(len(agg)==0):
for i in uniqans2:
niceprint(i,0)
else:
printagg(agg,uniqans2)
else:
if(len(agg)==0):
for i in ans2:
niceprint(i,0)
else:
printagg(agg,ans2)
def processquery(q,tinfo):
"""
Process the given query
"""
#convert query to a list
temp=sqlparse.parse(q)[0].tokens
query=[]
for i in temp:
if(str(i)!=' '):
query.append(str(i))
#check if distinct present
distinct=checkdistinct(query)
distinct=int(distinct)
#check if incorrect
checkincorrect(query,distinct)
#process from clause
tables=gettables(query,distinct)
checktables(tables)
jt,fh,fh2 = jointable(tables,tinfo)
#process where clause
cond,andor=getcond(query,distinct)
opused=checkcond(cond,fh,fh2,tables)
ans=applycond(jt,fh,fh2,cond,opused,andor)
#process select clause
query[1+distinct]=selectpreprocess(query[1+distinct])
cols,agg,distinct=checkselect(query,fh,fh2,distinct)
#print
printresult(fh,ans,distinct,cols,agg)
if __name__ == "__main__":
#store tables in dictionary
tinfo=init()
#take command line inputs
if(len(sys.argv)!=2):
print("Error: Incorrect usage")
print("Usage: python3 20171091.py <sql-query>")
else:
try:
q=sys.argv[1]
if(len(q)==0):
print("Error: Empty query encountered")
exit(-1)
processquery(q,tinfo)
except Exception:
traceback.print_exc()