-
Notifications
You must be signed in to change notification settings - Fork 2
/
FRIQ_reduction_strategy_cluster_hierarchical.m
126 lines (99 loc) · 4.71 KB
/
FRIQ_reduction_strategy_cluster_hierarchical.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
function [leftBranch, rightBranch, reducedR] = FRIQ_reduction_strategy_cluster_hierarchical(RB)
% FRIQ-learning rule-base hierarchical clustering reduction strategy
% Based on hierarchical clustering of rules
% Developed by Tamas Tompa 2016 <tompa@iit.uni-miskolc.hu>
global numofstates finalReducedR U VE R R_tmp
global FRIQ_param_reward_good_above FRIQ_param_maxsteps FRIQ_param_epsilon FRIQ_param_alpha FRIQ_param_gamma
% initialize variables
[rows columns] = size(RB);
tempReducedR = zeros(rows,rows);
ruleDistance = zeros(rows,rows);
tempReducedMaxR = zeros(rows,rows);
tempReducedMinR = zeros(rows,rows);
% while the rules are existing
if(rows >= 4)
% distance matrix of the rule-base
for i=1:rows
observation = RB(i,1:(numofstates+1));
ruleDistance(i,:) = FIVERuleDist_fixres(U,VE,RB,observation);
end
% pivot objects search, the two furthest rule
% pivot1 object
[minDists minIndexes] = min(ruleDistance(:,:),[],2);
minDist = min(minDists);
minIndex = min(minIndexes);
Pivot1 = RB(minIndex,:);
% pivot2 object, the furthest rule from the pivot1
[maxDists maxIndexes] = max(ruleDistance(:,:),[],2);
maxDist = max(maxDists);
maxIndex = max(maxIndexes);
Pivot2 = RB(maxIndex,:);
% pivot objects
Pivots = [Pivot1; Pivot2];
% rules distance from the pivot1 and the pivot2 objects
distRP1s(:,:) = FIVERuleDist_fixres(U,VE,RB,Pivot1(1:(numofstates+1)));
distRP2s(:,:) = FIVERuleDist_fixres(U,VE,RB,Pivot2(1:(numofstates+1)));
% distance threshold, based on furthest rules
[maxThP1 maxThP1Index] = max(distRP1s(:,:),[],2);
[maxThP2 maxThP2Index] = max(distRP2s(:,:),[],2);
tresholdP1 = distRP1s(maxThP1Index) / 2;
tresholdP2 = distRP2s(maxThP2Index) / 2;
treshold = (tresholdP1 + tresholdP2) / 2;
% tree building
% initialize branches
tempLeftBranch = zeros(rows, columns);
tempRightBranch = zeros(rows, columns);
% create branches
for j=1:rows-2
if(distRP1s(j) <= treshold)
tempLeftBranch(j,:) = RB(j,:);
elseif(distRP1s(j) > treshold)
tempRightBranch(j,:) = RB(j,:);
end
end
% cleaning, 0 rows remove
leftBranch = tempLeftBranch(any(tempLeftBranch,2),:);
rightBranch = tempRightBranch(any(tempRightBranch,2),:);
% indexes of the min and max Q-value rules of the clusters
[leftBranchMinValue, leftBranchMinIndex] = min(leftBranch(:,numofstates+2));
[rightBranchMinValue, rightBranchMinIndex] = min(rightBranch(:,numofstates+2));
[leftBranchMaxValue, leftBranchMaxIndex] = max(leftBranch(:,numofstates+2));
[rightBranchMaxValue, rightBranchMaxIndex] = max(rightBranch(:,numofstates+2));
% max and min Q-value rules of the clusters
tempReducedMaxR = [leftBranch(leftBranchMaxIndex,:);
rightBranch(rightBranchMaxIndex,:)];
tempReducedMinR = [leftBranch(leftBranchMinIndex,:);
rightBranch(rightBranchMinIndex,:)];
% cleaning, remove the max and min Q-value rules from the branches, not
% add these rules again to the branches
if((leftBranchMaxIndex ~= leftBranchMinIndex) && (rightBranchMaxIndex ~= rightBranchMinIndex))
[leftBranch] = removerows(leftBranch, [leftBranchMaxIndex leftBranchMinIndex]);
[rightBranch] = removerows(rightBranch, [rightBranchMaxIndex rightBranchMinIndex]);
end
% cleaning, 0 rows remove
reducedMaxR = tempReducedMaxR(any(tempReducedMaxR,2),:);
reducedMinR = tempReducedMinR(any(tempReducedMinR,2),:);
reducedR = [reducedMaxR;reducedMinR];
% trying the rule-base
if(~isempty(finalReducedR))
R = finalReducedR;
end
[total_reward_friq, steps_friq] = FRIQ_episode(FRIQ_param_maxsteps, FRIQ_param_alpha, FRIQ_param_gamma, FRIQ_param_epsilon);
% bad rule-base, next iteration
if (total_reward_friq < FRIQ_param_reward_good_above || isempty(finalReducedR))
disp(['FRIQ_steps: ',int2str(steps_friq),' FRIQ_reward: ',num2str(total_reward_friq),' rules: ' num2str(size(finalReducedR,1))]);
disp('The rule-base did not solve the problem! Next iteration...');
disp(' ');
% merged the max and min Q-value rules of the clusters
finalReducedR = [finalReducedR;reducedR];
FRIQ_reduction_strategy_cluster_hierarchical(rightBranch);
FRIQ_reduction_strategy_cluster_hierarchical(leftBranch);
R = R_tmp;
else
% good rule-base, end
disp(['FRIQ_steps: ',int2str(steps_friq),' FRIQ_reward: ',num2str(total_reward_friq),' rules: ' num2str(size(finalReducedR,1))]);
disp('The rule-base solved the problem, smallest rule-base found. Exiting!');
return;
end
end % main if
end