-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathloadMails.m
68 lines (59 loc) · 1.56 KB
/
loadMails.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
clear ; close all; clc
cd mail/all_spam
n_spam_mails = size(glob('*'))
cd ..
cd ..
cd mail/all_ham
n_ham_mails = size(glob('*'))
cd ..
cd ..
vocabList = getVocabList();
spam_mails = [];
spam_data = [];
for i = 1:n_spam_mails
try
#disp('generate filename') -->0.000128984
#tic()
ii = sprintf('%04d', i)
#toc()
#disp('load mail') -->0.0222859
#tic()
spam_mails = [spam_mails; loadMail('all_spam', ii)];
#toc()
#disp('create word indices') #--> 6.06974
#tic()
word_indices = processEmail_manipulated(spam_mails(end,:), vocabList);
#toc()
#disp('create feature vector') -->0.00157595
#tic()
features = emailFeatures(word_indices)';
#toc()
#disp('expand feature vector with spam flag') --> 5.38826e-05
#tic()
features = [features 1];
#toc()
#disp('append feature vector to data matrix') --> 3.40939e-05
#tic()
spam_data = [spam_data; features];
#toc()
catch err
disp(err.message)
disp(ii)
end
end
ham_mails = [];
ham_data = [];
for i = 1:n_ham_mails
try
ii = sprintf('%04d', i)
ham_mails = [ham_mails; loadMail('all_ham', ii)];
word_indices = processEmail_manipulated(ham_mails(end,:), vocabList);
features = emailFeatures(word_indices)';
features = [features 0];
ham_data = [ham_data; features];
catch err
disp(err.message)
disp(ii)
end
end
save('-mat','email_data.mat', 'spam_data', 'ham_data')