-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpripel.py
64 lines (54 loc) · 2.66 KB
/
pripel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import datetime
import sys
from pm4py.objects.log.exporter.xes import exporter as xes_exporter
from pm4py.objects.log.importer.xes import importer as xes_import_factory
from attributeAnonymizier import AttributeAnonymizer as AttributeAnonymizer
from trace_variant_query import privatize_tracevariants
from tracematcher import TraceMatcher as TraceMatcher
def freq(lst):
d = {}
for i in lst:
if d.get(i):
d[i] += 1
else:
d[i] = 1
return d
log_path = sys.argv[1]
epsilon = float(sys.argv[2])
N = int(sys.argv[3])
k = int(sys.argv[4])
new_ending = "_epsilon_" + str(epsilon) + "_k" + str(k) + "_anonymized.xes"
result_log_path = log_path.replace(".xes", new_ending)
starttime = datetime.datetime.now()
log = xes_import_factory.apply(log_path)
starttime_tv_query = datetime.datetime.now()
tv_query_log = privatize_tracevariants(log, epsilon, k, N)
if (len(tv_query_log) == 0):
raise ValueError(
"Pruning parameter k is too high. The result of the trace variant query is empty. At least k traces must appear "
"in a noisy variant count to be part of the result of the query.")
endtime_tv_query = datetime.datetime.now()
print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query)))
starttime_trace_matcher = datetime.datetime.now()
traceMatcher = TraceMatcher(tv_query_log, log)
matchedLog = traceMatcher.matchQueryToLog()
print(len(matchedLog))
endtime_trace_matcher = datetime.datetime.now()
print("Time of TraceMatcher: " + str((endtime_trace_matcher - starttime_trace_matcher)))
distributionOfAttributes = traceMatcher.getAttributeDistribution()
occurredTimestamps, occurredTimestampDifferences = traceMatcher.getTimeStampData()
print(min(occurredTimestamps))
starttime_attribute_anonymizer = datetime.datetime.now()
attributeAnonymizer = AttributeAnonymizer()
anonymizedLog, attributeDistribution = attributeAnonymizer.anonymize(matchedLog, distributionOfAttributes, epsilon,
occurredTimestampDifferences, occurredTimestamps)
endtime_attribute_anonymizer = datetime.datetime.now()
print("Time of attribute anonymizer: " + str(endtime_attribute_anonymizer - starttime_attribute_anonymizer))
xes_exporter.apply(anonymizedLog, result_log_path)
endtime = datetime.datetime.now()
print("Complete Time: " + str((endtime - starttime)))
print("Time of TV Query: " + str((endtime_tv_query - starttime_tv_query)))
print("Time of TraceMatcher: " + str((endtime_trace_matcher - starttime_trace_matcher)))
print("Time of attribute anonymizer: " + str(endtime_attribute_anonymizer - starttime_attribute_anonymizer))
print(result_log_path)
print(freq(attributeDistribution))