-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtodo.txt
101 lines (75 loc) · 2.98 KB
/
todo.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
fix problem with undefined null
move percent value to left when last fraction has percentage value < 5%
maybe create Agent object and use it in SimulationContent
put images in documentation
deploy this project on vercel
// const setUpSimulationData = () => {
// const resetedBanditsData = banditsData.map((bandit, index) => {
// const simulationBandit = {
// ...bandit,
// clicks: 0,
// Q: 0,
// };
// return simulationBandit;
// });
// const expectedValues = resetedBanditsData.map((bandit) => bandit.q);
// const bestActionIndex = findMaxValueIndex(expectedValues);
// updateBestAction(bestActionIndex);
// updateSimulationData(resetedBanditsData);
// };
// useEffect(() => {
// setIsActive(true);
// // RESET BANDITS DATA
// setUpSimulationData();
// }, []);
// useEffect(() => {
// if (!isActive) return;
// // const initialEstimatedValues = simulationData.map((bandit) => 0);
// // setEstimatedValues(initialEstimatedValues);
// if (actions.length <= steps) {
// const intervalId = setInterval(() => {
// // AGENT DECIDES WHAT ACTION TO CHOOSE
// // FIRSTLY FINDS MAX Q(a)
// // AND IF Math.random() < EPSILON, AGENT DECIDES TO CHOOSE ANOTHER ACTION (EXPLORATION, NON-GREEDY)
// // IF EPSILON === 0 (EXPLOITATION, GREEDY)
// const estimatedValues = simulationData.map((bandit) => bandit.Q);
// const nextAction = getNextAction(estimatedValues, epsilon);
// let nextReward = null;
// updateSimulationData((prevBanditsData) => {
// return prevBanditsData.map((bandit) => {
// if (bandit.id === nextAction) {
// const target = bandit.distribution.drawNumber();
// const old_estimate = bandit.Q;
// const step_size = 1 / bandit.clicks;
// nextReward = target;
// return {
// ...bandit,
// clicks: bandit.clicks + 1,
// // INCREMENTAL ACTION VALUE ESTIMATION FORMULA
// // Qn+1 = Qn + 1/n(Rn - Qn)
// Q:
// bandit.clicks === 0
// ? target
// : Number(
// (
// old_estimate +
// step_size * (target - old_estimate)
// ).toFixed(2)
// ),
// lastDrawnNumber: target,
// };
// }
// return bandit;
// });
// });
// // UDPATE LAST REWARDS
// setRewards((prevRewards) => [...prevRewards, nextReward]);
// // ADD ACTION TO actions
// setActions((prevActions) => [...prevActions, nextAction]);
// }, delay * 1000); // ms
// return () => clearInterval(intervalId);
// } else {
// setIsActive(false);
// setIsSimulationFinished(true);
// }
// }, [actions, isActive]);