-
Notifications
You must be signed in to change notification settings - Fork 0
/
occ.py
44 lines (37 loc) · 1.58 KB
/
occ.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from movielens.process import process_set
from utility import read_mapping
from sklearn import metrics
from random import shuffle
from statistics import mean, stdev
def occ(user, near_count, train_count, test_count, tree_count):
users = read_mapping("./process/users.txt")
movies = read_mapping("./process/movies.txt")
near = read_mapping("./process/near.txt")[user][:near_count]
train = process_set(user=user, count=train_count, users=users, movies=movies, mechanism=None)
test = process_set(user=user, count=test_count, users=users, movies=movies, mechanism=None)
trees = [list(near) for _ in range(tree_count)]
for tree in trees:
shuffle(tree)
positives = [target[0] for target in train if target[1]]
predictions = [distance_total(target[0], users, trees, positives) for target in test]
targets = [1 if target[1] else 0 for target in test]
return metrics.roc_auc_score(targets, predictions)
def distance_total(movie0, users, trees, positives):
distance = 0
for movie1 in positives:
for tree in trees:
distance += distance_single(movie0, movie1, users, tree)
return distance / (len(trees) * len(positives))
def distance_single(movie0, movie1, users, tree):
depth = 0
for node in tree:
if (movie0 in users[node]) == (movie1 in users):
depth += 1
else:
return 0.5 ** depth
return 0
USER = "Ariel-Garcé"
PATH = f"./experiment/occ-{USER}"
a = [occ(user=USER, near_count=5, train_count=100, test_count=100, tree_count=5) for _ in range(10)]
print(mean(a))
print(stdev(a))