-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtool.py
93 lines (72 loc) · 2.57 KB
/
tool.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import csv
import pickle as pk
import gzip
import random
import numpy as np
def dumpDataMat():
userMat = {}
bookMat = {}
books = {}
users = {}
with open('data/BX-Book-Ratings.csv', 'rt', encoding="iso-8859-1") as csvfile:
reader = csv.DictReader(csvfile, delimiter=';', quotechar='"')
for row in reader:
if int(row["Book-Rating"]) == 0: continue
userMat.setdefault(row["User-ID"], {})
userMat[row["User-ID"]][row["ISBN"]] = int(row["Book-Rating"])
bookMat.setdefault(row["ISBN"], {})
bookMat[row["ISBN"]][row["User-ID"]] = int(row["Book-Rating"])
with open('data/BX-Books.csv', 'rt', encoding="iso-8859-1") as csvfile:
reader = csv.DictReader(csvfile, delimiter=';', quotechar='"')
index = 0
for row in reader:
if row["ISBN"] in bookMat:
books[row["ISBN"]] = index
index += 1
with open('data/BX-Users.csv', 'rt', encoding="iso-8859-1") as csvfile:
reader = csv.DictReader(csvfile, delimiter=';', quotechar='"')
index = 0
for row in reader:
if row["User-ID"] in userMat:
users[row["User-ID"]] = index
index += 1
with gzip.open('Users.pklz', 'wb') as output:
pk.dump(users, output)
with gzip.open('Books.pklz', 'wb') as output:
pk.dump(books, output)
with gzip.open('UserMat.pklz', 'wb') as output:
pk.dump(userMat, output)
with gzip.open('BookMat.pklz', 'wb') as output:
pk.dump(bookMat, output)
def loadUsersBooks():
users = None
books = None
with gzip.open('Users.pklz', 'rb') as input:
users = pk.load(input)
with gzip.open('Books.pklz', 'rb') as input:
books = pk.load(input)
return users, books
def loadUserMat():
dataMat = None
with gzip.open('UserMat.pklz', 'rb') as input:
dataMat = pk.load(input)
return dataMat
def loadBookMat():
dataMat = None
with gzip.open('BookMat.pklz', 'rb') as input:
dataMat = pk.load(input)
return dataMat
def loadUserTestMat():
dataMat = loadUserMat();
testMat = dict(random.sample(dataMat.items(), 1000))
for user, ratings in testMat.items():
ratings = ratings.fromkeys(ratings, 0)
testMat[user] = ratings
return testMat
def loadBookTestMat():
dataMat = loadBookMat();
testMat = dict(random.sample(dataMat.items(), 100))
for book, ratings in testMat.items():
ratings = ratings.fromkeys(ratings, 0)
testMat[book] = ratings
return testMat