-
Notifications
You must be signed in to change notification settings - Fork 12
/
hash_tree.py
173 lines (142 loc) · 4.27 KB
/
hash_tree.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
from timing_wrapper import timeit
class Node:
'''
Class that implements a leaf node in the hash tree.
'''
def __init__(self, k, max_leaf_size, depth):
'''
Initialize the leaf node.
Parameters
----------
k : int, optional
denominator of the hash function.
max_leaf_size : int, optional
maximum number of itemsets that can be present in a leaf node.
depth : int, optional
the depth at which the leaf node is present in the hash tree.
'''
self.max_leaf_size = max_leaf_size
self.depth=depth
self.children={}
self.k=0
self.isTree=False
def add(self, candidate):
'''
Initialize the leaf node.
Parameters
----------
candidate : list
the candidate that needs to be inserted.
'''
self.children[tuple(candidate)] = 0
class Tree:
'''
Class that implements the hash tree.
'''
def __init__(self, c_list, k=3, max_leaf_size=3, depth=0):
'''
Hash tree constructor. It initializes a new hash tree and inserts the items present in the
c_list into the hash tree.
Parameters
----------
c_list : list
list of itemsets to be inserted into the hash tree.
k : int, optional
denominator of the hash function.
max_leaf_size : int, optional
maximum number of itemsets that can be present in a leaf node.
depth : int, optional
the current depth at which the tree is present.
Usage
-----
>>> t=Tree(c_list=[[1,2], [2,3], [3,4]], k=3, max_leaf_size=3, depth=0)
The tree has been created and the itemsets [1,2], [2,3] and [3,4] have been innserted into the tree.
'''
self.depth=depth
self.children={}
self.k=k
self.max_leaf_size=max_leaf_size
self.isTree=True
self.c_length=len(c_list[0])
self.build_tree(c_list)
def update_tree(self):
'''
Function which splits the leaf node of the tree if it contains more elements than self.max_leaf_size.
'''
for child in self.children:
if len(self.children[child].children) > self.max_leaf_size:
if self.depth+1 < self.c_length: # Make sure that only fewer than k divisions are performed
child=Tree(list(self.children[child].children.keys()), k=self.k, max_leaf_size=self.max_leaf_size, depth=self.depth+1)
def build_tree(self, c_list):
'''
Function that builds the tree and inserts the itemsets into the tree.
Parameters
----------
c_list : list
list of itemsets to be inserted into the hash tree.
'''
for candidate in c_list:
if candidate[self.depth]%self.k not in self.children:
self.children[candidate[self.depth]%self.k]=Node(k=self.k, max_leaf_size=self.max_leaf_size, depth=self.depth)
self.children[candidate[self.depth]%self.k].add(candidate)
self.update_tree()
def check(self, candidate, update=False):
'''
Function to check if candidate is present in the hash tree and to update support counts of tree elements.
Parameters
----------
candidate : list
the candidate that needs to be checked.
update : bool, optional
If true, the count of candidate is incremented in the tree.
Returns
----------
int
Support count of the candidate.
'''
support=0
if candidate[self.depth]%self.k in self.children:
child = self.children[candidate[self.depth]%self.k]
if child.isTree:
support = child.check(candidate)
else:
if tuple(candidate) in list(child.children.keys()):
if update:
child.children[tuple(candidate)]+=1
return child.children[tuple(candidate)]
else:
return 0
return support
def generate_subsets(transaction, k):
'''
Function to recursively generate k subsets of the transaction.
Parameters
----------
transaction : list
the transaction whose subsets have to be generated.
k : int
number of items in each subset.
Returns
----------
list
List containing subsets of the transaction.
'''
res=[]
n = len(transaction)
transaction.sort()
def recurse(transaction, k, i=0, curr=[]):
'''
Recursion function used in subset generation
'''
if k==1:
for j in range(i,n):
res.append(curr + [transaction[j]])
return None
for j in range(i,n-k+1):
temp= curr+ [transaction[j]]
recurse(transaction, k-1, j+1, temp[:])
recurse(transaction, k)
return res
if __name__=='__main__':
temp_list=[[1,2,3],[2,3,4],[3,5,6],[4,5,6],[5,7,9],[7,8,9],[4,7,9]]
t=Tree(temp_list_1, k=3, max_leaf_size=3, depth=0)