update lectures

cmudeeprl · Jan 29, 2025 · 4c2dd0e · 4c2dd0e
1 parent 8ab5c36
commit 4c2dd0e
Showing 1 changed file with 24 additions and 18 deletions.
diff --git a/_data/lectures.yml b/_data/lectures.yml
@@ -63,10 +63,10 @@
   notes:
   readings:
     - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch3, Ch4
-    - <a href="https://distill.pub/2019/paths-perspective-on-value-learning/" target="blank">The Path perspective on Value Learning </a> (blogpost)
-    - <a href="https://arxiv.org/abs/1312.5602">DQN</a>
-    - <a href="https://link.springer.com/chapter/10.1007/11564096_32">Neural Fitted Q-Iteration</a>
-    - <a href="https://jmlr.org/papers/v15/dann14a.html">Policy Evaluation with Temporal Differences&#58; A Survey and Comparison</a>
+    # - <a href="https://distill.pub/2019/paths-perspective-on-value-learning/" target="blank">The Path perspective on Value Learning </a> (blogpost)
+    # - <a href="https://arxiv.org/abs/1312.5602">DQN</a>
+    # - <a href="https://link.springer.com/chapter/10.1007/11564096_32">Neural Fitted Q-Iteration</a>
+    # - <a href="https://jmlr.org/papers/v15/dann14a.html">Policy Evaluation with Temporal Differences&#58; A Survey and Comparison</a>
   logistics: 
 
 
@@ -92,31 +92,37 @@
   video:
   notes:
   readings:
-    - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch6, Ch7 7.1-7.3
-    - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch9.1-9.3, 9.6 and Ch10.1
-    - <a href="https://arxiv.org/abs/1312.5602">DQN</a>
-    - <a href="https://link.springer.com/chapter/10.1007/11564096_32">Neural Fitted Q-Iteration</a>
-    - <a href="https://jmlr.org/papers/v15/dann14a.html">Policy Evaluation with Temporal Differences&#58; A Survey and Comparison</a>
+    - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch5, Ch6
+    - <a href="https://distill.pub/2019/paths-perspective-on-value-learning/" target="blank">(optional) The Path perspective on Value Learning </a> (blogpost)
+    # - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch6, Ch7 7.1-7.3
+    # - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch9.1-9.3, 9.6 and Ch10.1
+    # - <a href="https://arxiv.org/abs/1312.5602">DQN</a>
+    # - <a href="https://link.springer.com/chapter/10.1007/11564096_32">Neural Fitted Q-Iteration</a>
+    # - <a href="https://jmlr.org/papers/v15/dann14a.html">Policy Evaluation with Temporal Differences&#58; A Survey and Comparison</a>
   logistics:
 
 
 - date: W 01/29
   lecturer: 
   title: >
-    <strong>Evolutionary Methods for Policy Search, Policy Gradients</strong>
+    <strong>Value based methods cont. (DQN, MCTS)</strong>
   slides:
   slides2: 
   video:
   notes:
   readings:
-    # - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch5, Ch6
-    - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch13
-    - <a href="http://karpathy.github.io/2016/05/31/rl/" target = "_blank">http://karpathy.github.io/2016/05/31/rl/</a>
-    - Salimans et al. <a href="https://arxiv.org/abs/1703.03864" target="_blank">Evolution Strategies as a Scalable Alternative to Reinforcement Learning</a>
-    - (optional) Nikolaus Hansen. <a href="https://arxiv.org/pdf/1604.00772.pdf" target="_blank">The CMA Evolution Strategy - A Tutorial</a>
-    - <a href="https://proceedings.neurips.cc/paper_files/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"> Policy gradients with function approximation </a>
-    - <a href="https://people.eecs.berkeley.edu/~pabbeel/cs287-fa09/readings/KakadeLangford-icml2002.pdf"> Approximately Optimal Approximate RL </a>
-    - <a href="https://arxiv.org/abs/1908.00261"> On the theory of policy gradient methods </a>
+    - <a href="https://arxiv.org/abs/1312.5602">DQN</a>
+    - <a href="https://papers.nips.cc/paper/5421-deep-learning-for-real-time-atari-game-play-using-offline-monte-carlo-tree-search-planning" target="_blank">Deep Learning for Real-Time Atari Game Play Using Offline Monte-Carlo Tree Search Planning</a>
+    - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch8.11
+    - <a href="https://link.springer.com/chapter/10.1007/11564096_32">(optional) Neural Fitted Q-Iteration</a>
+    - <a href="https://jmlr.org/papers/v15/dann14a.html">(optional) Policy Evaluation with Temporal Differences&#58; A Survey and Comparison</a>
+    # - <a href="http://incompleteideas.net/book/RLbook2018.pdf" target="_blank">S & B Textbook</a>, Ch13
+    # - <a href="http://karpathy.github.io/2016/05/31/rl/" target = "_blank">http://karpathy.github.io/2016/05/31/rl/</a>
+    # - Salimans et al. <a href="https://arxiv.org/abs/1703.03864" target="_blank">Evolution Strategies as a Scalable Alternative to Reinforcement Learning</a>
+    # - (optional) Nikolaus Hansen. <a href="https://arxiv.org/pdf/1604.00772.pdf" target="_blank">The CMA Evolution Strategy - A Tutorial</a>
+    # - <a href="https://proceedings.neurips.cc/paper_files/paper/1999/file/464d828b85b0bed98e80ade0a5c43b0f-Paper.pdf"> Policy gradients with function approximation </a>
+    # - <a href="https://people.eecs.berkeley.edu/~pabbeel/cs287-fa09/readings/KakadeLangford-icml2002.pdf"> Approximately Optimal Approximate RL </a>
+    # - <a href="https://arxiv.org/abs/1908.00261"> On the theory of policy gradient methods </a>
   logistics: