-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBoard.cpp
82 lines (77 loc) · 2.86 KB
/
Board.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include <vector>
#include <iostream>
#include <algorithm>
#include "Board.h"
#include "util.cpp"
bool Board::is_inside(const Point& location) {
if ((location.x >= 0) && (location.y >= 0) \
&& (location.x < this->width) && (location.y < this->height))
return true;
return false;
}
float Board::move(const Point& current_loc, const Point& direction) {
float total_reward = 0;
if (direction.x == 0) {
total_reward += move(current_loc, Point(-1, 0), this->probs[1]);
total_reward += move(current_loc, Point(1, 0), this->probs[2]);
}
if (direction.y == 0) {
total_reward += move(current_loc, Point(0, -1), this->probs[1]);
total_reward += move(current_loc, Point(0, 1), this->probs[2]);
}
if (!util::is_in_vector(current_loc + direction, this->end_states)) {
total_reward += Board::move(current_loc, direction, this->probs[0]);
total_reward *= gamma;
total_reward += this->reward;
} else {
total_reward *= gamma;
total_reward += Board::move(current_loc, direction, this->probs[0]);
}
return total_reward;
}
float Board::move(const Point& current_loc, const Point& direction,
float prob) {
Point new_loc = current_loc + direction;
// edge cases
if (util::is_in_vector(new_loc, this->obstacles) || !is_inside(new_loc)) {
return prob * best_value[current_loc.x][current_loc.y];
}
if (util::is_in_vector(new_loc, this->end_states)) {
return prob * best_value[new_loc.x][new_loc.y];
}
// end of edges cases
return prob * this->best_value[new_loc.x][new_loc.y];
}
int Board::run() {
for (int i = 0; i < 10; i++) {
this->schedule.push(start_state);
std::vector<Point> visited;
while (this->schedule.size() > 0) {
Point p = schedule.front();
this->schedule.pop();
visited.insert(visited.begin(), p);
float result, best_result = std::numeric_limits<float>::lowest();
Point best_direction;
for (auto direction : direction) {
Point new_loc = p + direction;
if (this->is_inside(new_loc)) {
if (!util::is_in_vector(new_loc, visited)
&& (!util::is_in_vector(new_loc, obstacles))
&& (!util::is_in_vector(new_loc, end_states))) {
schedule.push(new_loc);
}
}
result = move(p, direction);
if (result > best_result) {
best_result = result;
best_direction = direction;
}
}
best_value[p.x][p.y] = best_result;
best_policy[p.x][p.y] = best_direction;
}
util::print<float>(best_value);
util::print<Point>(best_policy);
}
return 0;
}