-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcloseAndMaxSetMining.py
55 lines (47 loc) · 1.55 KB
/
closeAndMaxSetMining.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from hashTreeRun import aprioriAlgorithmWithHashing as freqItemset
import basicOperation
# check if largeSet is immediate superset of smallSet
def isImmediateSuperSet(smallSet, largeSet):
if largeSet.issuperset(smallSet):
if len(largeSet) - len(smallSet) == 1:
return True
return False
data = basicOperation.loadDatabase('a1dataset.txt')
minsup = 400
# get frequent Itemset from external library
freqItemSet = freqItemset(data, minsup)
# count the occurrence of each freq item
dataWithFreq = dict()
for itemSet in freqItemSet:
dataWithFreq[tuple(itemSet)] = basicOperation.count(itemSet, data)
# find closed and max itemset
closedItemSet = list()
maxItemSet = list()
# go though every freqitem set, and check if it is closed of max
for x, y in dataWithFreq.items():
targetSet = set(x)
freq = y
isClosedFreqItemSet = True
isMaxSet = True
# compare a itemSet to all other itemSet
for p, q in dataWithFreq.items():
comparingSet = set(p)
comparingfreq = q
# do not do self comparison
if targetSet == comparingSet:
continue
if isImmediateSuperSet(targetSet, comparingSet):
isMaxSet = False
if comparingfreq == freq:
isClosedFreqItemSet = False
break
if isClosedFreqItemSet:
closedItemSet.append(list(targetSet))
if isMaxSet:
maxItemSet.append(list(targetSet))
print("\nclosedItemSet is")
print(closedItemSet)
print(len(closedItemSet))
print("\nmaxSet is")
print(maxItemSet)
print(len(maxItemSet))