-
Notifications
You must be signed in to change notification settings - Fork 0
/
queries.py
156 lines (138 loc) · 4.39 KB
/
queries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#!/usr/bin/env python3
import pprint
import collections
from Chinook_Python import *
def project(relation, columns):
Result = collections.namedtuple('Result', columns)
finalResult = []
projectedColumns= set()
for t in relation:
result = []
for c in columns:
result.append(getattr(t,c))
finalResult.append(result)
for f in finalResult:
projectedColumns.add(Result(*f))
print('Total number of rows returned from project:', len(projectedColumns))
if len(projectedColumns) ==0:
return 'No rows returned'
else:
return projectedColumns
def select(relation, predicate):
Result = collections.namedtuple('Result', next(iter(relation))._fields)
SelectResult = set()
Select = set()
for t in relation:
if predicate(t):
SelectResult.add(t)
for s in SelectResult:
Select.add(Result(*s))
print('Total number of rows returned from select:', len(Select))
return Select
def rename(relation, new_columns=None, new_relation=None):
finalResult = set()
projectedColumns= set()
Result = collections.namedtuple('relation', new_columns, rename=True)
for t in relation:
finalResult.add(t)
for f in finalResult:
projectedColumns.add(Result(*f))
return projectedColumns
def cross(relation1, relation2):
Result = collections.namedtuple('Result', next(iter(relation1))._fields + next(iter(relation2))._fields, rename = True)
CrossResult = set()
CrossJoin = set()
for t1 in relation1:
for t2 in relation2:
CrossResult.add(t1+t2)
for c in CrossResult:
CrossJoin.add(Result(*c))
print('Total number of rows returned from cross join:', len(CrossResult))
return CrossJoin
def theta_join(relation1, relation2, predicate):
Result = collections.namedtuple('Result', next(iter(relation1))._fields + next(iter(relation2))._fields, rename = True)
Thetajoin = set()
ThetaJoinResult = set()
for t1 in relation1:
for t2 in relation2:
if predicate(t1,t2):
Thetajoin.add(t1+t2)
for t in Thetajoin:
ThetaJoinResult.add(Result(*t))
print('Total number of rows returned from theta join:', len(ThetaJoinResult))
return ThetaJoinResult
def natural_join(relation1, relation2):
NaturaljoinResult = []
NaturalJoin = []
cols1 = set(next(iter(relation1))._fields)
cols2 = set(next(iter(relation2))._fields)
merge_cols = cols1 & cols2
diff = set(cols2 - merge_cols)
finalcols=cols1|cols2
Result = collections.namedtuple('Result',finalcols)
t1map = {k: i for i, k in enumerate(next(iter(relation1))._fields)}
t2map = {k: i for i, k in enumerate(next(iter(relation2))._fields)}
for t1 in relation1:
for t2 in relation2:
row=list()
if (all(t1[t1map[m]] == t2[t2map[m]] for m in merge_cols)):
for col in finalcols:
if col in cols1:
row.append(t1[t1map[col]])
if col in diff:
row.append(t2[t2map[col]])
NaturalJoin.append(row)
for n in NaturalJoin:
NaturaljoinResult.append(Result(*n))
print('Total number of rows returned from natural join:', len(NaturaljoinResult))
return NaturaljoinResult
pprint.pprint(
project(
select(
select(
cross(
Album,
rename(Artist, ['Id', 'Name'])
),
lambda t: t.ArtistId == t.Id
),
lambda t: t.Name == 'Red Hot Chili Peppers'
),
['Title']
),
)
pprint.pprint(
project(
select(
theta_join(
Album,
rename(Artist, ['Id', 'Name']),
lambda t1, t2: t1.ArtistId == t2.Id
),
lambda t: t.Name == 'Red Hot Chili Peppers'
),
['Title']
)
)
pprint.pprint(
project(
theta_join(
Album,
rename(
select(Artist, lambda t: t.Name == 'Red Hot Chili Peppers'),
['Id', 'Name']
),
lambda t1, t2: t1.ArtistId == t2.Id
),
['Title']
)
)
pprint.pprint(
project(
natural_join(
Album,
select(Artist, lambda t: t.Name == 'Red Hot Chili Peppers')
),
['Title']
)
)