generated from diegoferigo/classicthesis-uom
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathzotero.bib
3163 lines (2938 loc) · 340 KB
/
zotero.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@article{silver_reward_2021,
title = {Reward is enough},
volume = {299},
issn = {0004-3702},
url = {https://www.sciencedirect.com/science/article/pii/S0004370221000862},
doi = {10.1016/j.artint.2021.103535},
abstract = {In this article we hypothesise that intelligence, and its associated abilities, can be understood as subserving the maximisation of reward. Accordingly, reward is enough to drive behaviour that exhibits abilities studied in natural and artificial intelligence, including knowledge, learning, perception, social intelligence, language, generalisation and imitation. This is in contrast to the view that specialised problem formulations are needed for each ability, based on other signals or objectives. Furthermore, we suggest that agents that learn through trial and error experience to maximise reward could learn behaviour that exhibits most if not all of these abilities, and therefore that powerful reinforcement learning agents could constitute a solution to artificial general intelligence.},
language = {en},
urldate = {2022-02-09},
journal = {Artificial Intelligence},
author = {Silver, David and Singh, Satinder and Precup, Doina and Sutton, Richard S.},
month = oct,
year = {2021},
keywords = {Reinforcement learning, Artificial general intelligence, Artificial intelligence, Reward},
pages = {103535},
file = {ScienceDirect Snapshot:/home/dferigo/Zotero/storage/XUXAKHJ9/S0004370221000862.html:text/html;Silver et al_2021_Reward is enough.pdf:/home/dferigo/Zotero/storage/DE8PXLYA/Silver et al_2021_Reward is enough.pdf:application/pdf},
}
@article{cleach_fast_2021,
title = {Fast {Contact}-{Implicit} {Model}-{Predictive} {Control}},
url = {http://arxiv.org/abs/2107.05616},
abstract = {We present a general approach for controlling robotic systems that make and break contact with their environments. Contact-implicit model-predictive control (CI-MPC) generalizes linear MPC to contact-rich settings by relying on linear complementarity problems (LCP) computed using strategic Taylor approximations about a reference trajectory and retaining non-smooth impact and friction dynamics, allowing the policy to not only reason about contact forces and timing, but also generate entirely new contact mode sequences online. To achieve reliable and fast numerical convergence, we devise a structure-exploiting, path-following solver for the LCP contact dynamics and a custom trajectory optimizer for trajectory-tracking MPC problems. We demonstrate CI-MPC at real-time rates in simulation, and show that it is robust to model mismatch and can respond to disturbances by discovering and exploiting new contact modes across a variety of robotic systems, including a pushbot, hopper, and planar quadruped and biped.},
language = {en},
urldate = {2021-11-26},
journal = {arXiv:2107.05616 [cs, eess]},
author = {Cleac'h, Simon Le and Howell, Taylor and Schwager, Mac and Manchester, Zachary},
month = sep,
year = {2021},
note = {arXiv: 2107.05616},
keywords = {Computer Science - Robotics, Electrical Engineering and Systems Science - Systems and Control},
file = {Cleac'h et al. - 2021 - Fast Contact-Implicit Model-Predictive Control.pdf:/home/dferigo/Zotero/storage/FCY6KR2I/Cleac'h et al. - 2021 - Fast Contact-Implicit Model-Predictive Control.pdf:application/pdf},
}
@article{roy_machine_2021,
title = {From {Machine} {Learning} to {Robotics}: {Challenges} and {Opportunities} for {Embodied} {Intelligence}},
shorttitle = {From {Machine} {Learning} to {Robotics}},
url = {http://arxiv.org/abs/2110.15245},
abstract = {Machine learning has long since become a keystone technology, accelerating science and applications in a broad range of domains. Consequently, the notion of applying learning methods to a particular problem set has become an established and valuable modus operandi to advance a particular field. In this article we argue that such an approach does not straightforwardly extended to robotics -- or to embodied intelligence more generally: systems which engage in a purposeful exchange of energy and information with a physical environment. In particular, the purview of embodied intelligent agents extends significantly beyond the typical considerations of main-stream machine learning approaches, which typically (i) do not consider operation under conditions significantly different from those encountered during training; (ii) do not consider the often substantial, long-lasting and potentially safety-critical nature of interactions during learning and deployment; (iii) do not require ready adaptation to novel tasks while at the same time (iv) effectively and efficiently curating and extending their models of the world through targeted and deliberate actions. In reality, therefore, these limitations result in learning-based systems which suffer from many of the same operational shortcomings as more traditional, engineering-based approaches when deployed on a robot outside a well defined, and often narrow operating envelope. Contrary to viewing embodied intelligence as another application domain for machine learning, here we argue that it is in fact a key driver for the advancement of machine learning technology. In this article our goal is to highlight challenges and opportunities that are specific to embodied intelligence and to propose research directions which may significantly advance the state-of-the-art in robot learning.},
language = {en},
urldate = {2021-11-03},
journal = {arXiv:2110.15245 [cs]},
author = {Roy, Nicholas and Posner, Ingmar and Barfoot, Tim and Beaudoin, Philippe and Bengio, Yoshua and Bohg, Jeannette and Brock, Oliver and Depatie, Isabelle and Fox, Dieter and Koditschek, Dan and Lozano-Perez, Tomas and Mansinghka, Vikash and Pal, Christopher and Richards, Blake and Sadigh, Dorsa and Schaal, Stefan and Sukhatme, Gaurav and Therien, Denis and Toussaint, Marc and Van de Panne, Michiel},
month = oct,
year = {2021},
note = {arXiv: 2110.15245},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
file = {Roy et al_2021_From Machine Learning to Robotics.pdf:/home/dferigo/Zotero/storage/CA7DHLB4/Roy et al_2021_From Machine Learning to Robotics.pdf:application/pdf},
}
@techreport{traversaro_multibody_2019,
title = {Multibody dynamics notation},
url = {https://pure.tue.nl/ws/portalfiles/portal/139293126/A_Multibody_Dynamics_Notation_Revision_2_.pdf},
language = {en},
author = {Traversaro, Silvio and Saccon, Alessandro},
year = {2019},
pages = {24},
}
@inproceedings{liu_robot_2021,
title = {Robot {Reinforcement} {Learning} on the {Constraint} {Manifold}},
url = {https://openreview.net/forum?id=zwo1-MdMl1P},
abstract = {Reinforcement learning in robotics is extremely challenging due to many practical issues, including safety, mechanical constraints, and wear and tear. Typically, these issues are not considered in...},
language = {en},
urldate = {2021-10-21},
author = {Liu, Puze and Tateo, Davide and Ammar, Haitham Bou and Peters, Jan},
month = jun,
year = {2021},
file = {Liu et al_2021_Robot Reinforcement Learning on the Constraint Manifold.pdf:/home/dferigo/Zotero/storage/8VD2XRMC/Liu et al_2021_Robot Reinforcement Learning on the Constraint Manifold.pdf:application/pdf},
}
@article{acosta_validating_2021,
title = {Validating {Robotics} {Simulators} on {Real} {World} {Impacts}},
url = {http://arxiv.org/abs/2110.00541},
abstract = {A realistic simulation environment is an essential tool in every roboticist's toolkit, with uses ranging from planning and control to training policies with reinforcement learning. Despite the centrality of simulation in modern robotics, little work has been done to compare the performance of robotics simulators against real-world data, especially for scenarios involving dynamic motions with high speed impact events. Handling dynamic contact is the computational bottleneck for most simulations, and thus the modeling and algorithmic choices surrounding impacts and friction form the largest distinctions between popular tools. Here, we evaluate the ability of several simulators to reproduce real-world trajectories involving impacts. Using experimental data, we identify system-specific contact parameters of popular simulators Drake, MuJoCo, and Bullet, analyzing the effects of modeling choices around these parameters. For the simple example of a cube tossed onto a table, simulators capture inelastic impacts well while failing to capture elastic impacts. For the higher-dimensional case of a Cassie biped landing from a jump, the simulators capture the bulk motion well but the accuracy is limited by numerous model differences between the real robot and the simulators.},
urldate = {2021-10-15},
journal = {arXiv:2110.00541 [cs]},
author = {Acosta, Brian and Yang, William and Posa, Michael},
month = oct,
year = {2021},
note = {arXiv: 2110.00541},
keywords = {Computer Science - Robotics},
file = {Acosta et al_2021_Validating Robotics Simulators on Real World Impacts.pdf:/home/dferigo/Zotero/storage/D4WET8GW/Acosta et al_2021_Validating Robotics Simulators on Real World Impacts.pdf:application/pdf;arXiv.org Snapshot:/home/dferigo/Zotero/storage/LU98NNTF/2110.html:text/html},
}
@inproceedings{gronauer_successful_2021,
address = {Montreal, Canada},
title = {The {Successful} {Ingredients} of {Policy} {Gradient} {Algorithms}},
isbn = {978-0-9992411-9-6},
url = {https://www.ijcai.org/proceedings/2021/338},
doi = {10.24963/ijcai.2021/338},
abstract = {Despite the sublime success in recent years, the underlying mechanisms powering the advances of reinforcement learning are yet poorly understood. In this paper, we identify these mechanisms - which we call ingredients - in on-policy policy gradient methods and empirically determine their impact on the learning. To allow an equitable assessment, we conduct our experiments based on a unified and modular implementation. Our results underline the significance of recent algorithmic advances and demonstrate that reaching state-of-the-art performance may not need sophisticated algorithms but can also be accomplished by the combination of a few simple ingredients.},
language = {en},
urldate = {2021-10-14},
booktitle = {Proceedings of the {Thirtieth} {International} {Joint} {Conference} on {Artificial} {Intelligence}},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
author = {Gronauer, Sven and Gottwald, Martin and Diepold, Klaus},
month = aug,
year = {2021},
pages = {2455--2461},
file = {Gronauer et al_2021_The Successful Ingredients of Policy Gradient Algorithms.pdf:/home/dferigo/Zotero/storage/B9A5Y9UV/Gronauer et al_2021_The Successful Ingredients of Policy Gradient Algorithms.pdf:application/pdf},
}
@article{engstrom_implementation_2020,
title = {Implementation {Matters} in {Deep} {Policy} {Gradients}: {A} {Case} {Study} on {PPO} and {TRPO}},
shorttitle = {Implementation {Matters} in {Deep} {Policy} {Gradients}},
url = {http://arxiv.org/abs/2005.12729},
abstract = {We study the roots of algorithmic progress in deep policy gradient algorithms through a case study on two popular algorithms: Proximal Policy Optimization (PPO) and Trust Region Policy Optimization (TRPO). Specifically, we investigate the consequences of "code-level optimizations:" algorithm augmentations found only in implementations or described as auxiliary details to the core algorithm. Seemingly of secondary importance, such optimizations turn out to have a major impact on agent behavior. Our results show that they (a) are responsible for most of PPO's gain in cumulative reward over TRPO, and (b) fundamentally change how RL methods function. These insights show the difficulty and importance of attributing performance gains in deep reinforcement learning. Code for reproducing our results is available at https://github.com/MadryLab/implementation-matters .},
urldate = {2021-05-27},
journal = {arXiv:2005.12729 [cs, stat]},
author = {Engstrom, Logan and Ilyas, Andrew and Santurkar, Shibani and Tsipras, Dimitris and Janoos, Firdaus and Rudolph, Larry and Madry, Aleksander},
month = may,
year = {2020},
note = {arXiv: 2005.12729},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Statistics - Machine Learning},
file = {arXiv.org Snapshot:/home/dferigo/Zotero/storage/AI6US5L8/2005.html:text/html;Engstrom et al_2020_Implementation Matters in Deep Policy Gradients.pdf:/home/dferigo/Zotero/storage/HSDXPTLP/Engstrom et al_2020_Implementation Matters in Deep Policy Gradients.pdf:application/pdf},
}
@misc{schulman_nuts_2016,
title = {The {Nuts} and {Bolts} of {Deep} {RL} {Research}},
url = {https://rll.berkeley.edu/deeprlcourse/docs/nuts-and-bolts.pdf},
language = {en},
author = {Schulman, John},
month = dec,
year = {2016},
file = {Schulman - The Nuts and Bolts of Deep RL Research.pdf:/home/dferigo/Zotero/storage/NWLULPJC/Schulman - The Nuts and Bolts of Deep RL Research.pdf:application/pdf},
}
@article{andrychowicz_what_2020,
title = {What {Matters} {In} {On}-{Policy} {Reinforcement} {Learning}? {A} {Large}-{Scale} {Empirical} {Study}},
shorttitle = {What {Matters} {In} {On}-{Policy} {Reinforcement} {Learning}?},
url = {http://arxiv.org/abs/2006.05990},
abstract = {In recent years, on-policy reinforcement learning (RL) has been successfully applied to many different continuous control tasks. While RL algorithms are often conceptually simple, their state-of-the-art implementations take numerous low- and high-level design decisions that strongly affect the performance of the resulting agents. Those choices are usually not extensively discussed in the literature, leading to discrepancy between published descriptions of algorithms and their implementations. This makes it hard to attribute progress in RL and slows down overall progress [Engstrom'20]. As a step towards filling that gap, we implement {\textgreater}50 such ``choices'' in a unified on-policy RL framework, allowing us to investigate their impact in a large-scale empirical study. We train over 250'000 agents in five continuous control environments of different complexity and provide insights and practical recommendations for on-policy training of RL agents.},
urldate = {2021-05-27},
journal = {arXiv:2006.05990 [cs, stat]},
author = {Andrychowicz, Marcin and Raichuk, Anton and Stańczyk, Piotr and Orsini, Manu and Girgin, Sertan and Marinier, Raphael and Hussenot, Léonard and Geist, Matthieu and Pietquin, Olivier and Michalski, Marcin and Gelly, Sylvain and Bachem, Olivier},
month = jun,
year = {2020},
note = {arXiv: 2006.05990},
keywords = {Computer Science - Machine Learning, Statistics - Machine Learning},
file = {Andrychowicz et al_2020_What Matters In On-Policy Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/WZMAKPPK/Andrychowicz et al_2020_What Matters In On-Policy Reinforcement Learning.pdf:application/pdf;arXiv.org Snapshot:/home/dferigo/Zotero/storage/U5JYD637/2006.html:text/html},
}
@article{korber_comparing_2021,
title = {Comparing {Popular} {Simulation} {Environments} in the {Scope} of {Robotics} and {Reinforcement} {Learning}},
url = {http://arxiv.org/abs/2103.04616},
abstract = {This letter compares the performance of four different, popular simulation environments for robotics and reinforcement learning (RL) through a series of benchmarks. The benchmarked scenarios are designed carefully with current industrial applications in mind. Given the need to run simulations as fast as possible to reduce the real-world training time of the RL agents, the comparison includes not only different simulation environments but also different hardware configurations, ranging from an entry-level notebook up to a dual CPU high performance server. We show that the chosen simulation environments benefit the most from single core performance. Yet, using a multi core system, multiple simulations could be run in parallel to increase the performance.},
language = {en},
urldate = {2022-04-20},
journal = {arXiv:2103.04616 [cs]},
author = {Körber, Marian and Lange, Johann and Rediske, Stephan and Steinmann, Simon and Glück, Roland},
month = mar,
year = {2021},
note = {arXiv: 2103.04616},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Computer Science - Artificial Intelligence},
file = {Körber et al_2021_Comparing Popular Simulation Environments in the Scope of Robotics and.pdf:/home/dferigo/Zotero/storage/LWVBUMDD/Körber et al_2021_Comparing Popular Simulation Environments in the Scope of Robotics and.pdf:application/pdf},
}
@inproceedings{kim_survey_2021,
title = {A {Survey} on {Simulation} {Environments} for {Reinforcement} {Learning}},
doi = {10.1109/UR52253.2021.9494694},
abstract = {Most of the recent studies of reinforcement learning and robotics basically employ computer simulation due to the advantages of time and cost. For this reason, users have to spare time for investigation in order to choose optimal environment for their purposes. This paper presents a survey result that can be a guidance in user’s choice for simulation environments. The investigation result includes features, brief historical backgrounds, license policies and formats for robot and object description of the eight most popular environments in robot RL studies. We also propose a quantitative evaluation method for those simulation environments considering the features and a pragmatic point of view.},
booktitle = {2021 18th {International} {Conference} on {Ubiquitous} {Robots} ({UR})},
author = {Kim, Taewoo and Jang, Minsu and Kim, Jaehong},
month = jul,
year = {2021},
note = {ISSN: 2325-033X},
keywords = {Analytical models, Computer simulation, Lead, Licenses, Reinforcement learning, Rendering (computer graphics), Software},
pages = {63--67},
file = {IEEE Xplore Abstract Record:/home/dferigo/Zotero/storage/K5USS9RE/9494694.html:text/html;Kim et al_2021_A Survey on Simulation Environments for Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/2HCEHSZB/Kim et al_2021_A Survey on Simulation Environments for Reinforcement Learning.pdf:application/pdf},
}
@inproceedings{belbute-peres_end--end_2018,
title = {End-to-{End} {Differentiable} {Physics} for {Learning} and {Control}},
language = {en},
author = {Belbute-Peres, Filipe de A and Smith, Kevin A and Allen, Kelsey R and Tenenbaum, Joshua B and Kolter, J Zico},
year = {2018},
pages = {12},
file = {Belbute-Peres et al_2018_End-to-End Differentiable Physics for Learning and Control.pdf:/home/dferigo/Zotero/storage/YX2WUZLM/Belbute-Peres et al_2018_End-to-End Differentiable Physics for Learning and Control.pdf:application/pdf},
}
@article{rackauckas_universal_2021,
title = {Universal {Differential} {Equations} for {Scientific} {Machine} {Learning}},
url = {http://arxiv.org/abs/2001.04385},
abstract = {In the context of science, the well-known adage “a picture is worth a thousand words” might well be “a model is worth a thousand datasets.” In this manuscript we introduce the SciML software ecosystem as a tool for mixing the information of physical laws and scientific models with data-driven machine learning approaches. We describe a mathematical object, which we denote universal differential equations (UDEs), as the unifying framework connecting the ecosystem. We show how a wide variety of applications, from automatically discovering biological mechanisms to solving high-dimensional Hamilton-Jacobi-Bellman equations, can be phrased and efficiently handled through the UDE formalism and its tooling. We demonstrate the generality of the software tooling to handle stochasticity, delays, and implicit constraints. This funnels the wide variety of SciML applications into a core set of training mechanisms which are highly optimized, stabilized for stiff equations, and compatible with distributed parallelism and GPU accelerators.},
language = {en},
urldate = {2022-04-20},
journal = {arXiv:2001.04385 [cs, math, q-bio, stat]},
author = {Rackauckas, Christopher and Ma, Yingbo and Martensen, Julius and Warner, Collin and Zubov, Kirill and Supekar, Rohit and Skinner, Dominic and Ramadhan, Ali and Edelman, Alan},
month = nov,
year = {2021},
note = {arXiv: 2001.04385},
keywords = {Computer Science - Machine Learning, Mathematics - Dynamical Systems, Quantitative Biology - Quantitative Methods, Statistics - Machine Learning},
file = {Rackauckas et al_2021_Universal Differential Equations for Scientific Machine Learning.pdf:/home/dferigo/Zotero/storage/3ALYEQ65/Rackauckas et al_2021_Universal Differential Equations for Scientific Machine Learning.pdf:application/pdf},
}
@article{singh_efficient_2022,
title = {Efficient {Analytical} {Derivatives} of {Rigid}-{Body} {Dynamics} using {Spatial} {Vector} {Algebra}},
volume = {7},
issn = {2377-3766, 2377-3774},
url = {http://arxiv.org/abs/2105.05102},
doi = {10.1109/LRA.2022.3141194},
abstract = {An essential need for many model-based robot control algorithms is the ability to quickly and accurately compute partial derivatives of the equations of motion. State of the art approaches to this problem often use analytical methods based on the chain rule applied to existing dynamics algorithms. Although these methods are an improvement over finite differences in terms of accuracy, they are not always the most efficient. In this paper, we contribute new closed-form expressions for the firstorder partial derivatives of inverse dynamics, leading to a recursive algorithm. The algorithm is benchmarked against chain-rule approaches in Fortran and against an existing algorithm from the Pinocchio library in C++. Tests consider computing the partial derivatives of inverse and forward dynamics for robots ranging from kinematic chains to humanoids and quadrupeds. Compared to the previous open-source Pinocchio implementation, our new analytical results uncover a key computational restructuring that enables efficiency gains. Speedups of up to 1.4x are reported for calculating the partial derivatives of inverse dynamics for the 50-dof Talos humanoid.},
language = {en},
number = {2},
urldate = {2022-04-20},
journal = {IEEE Robotics and Automation Letters},
author = {Singh, Shubham and Russell, Ryan P. and Wensing, Patrick M.},
month = apr,
year = {2022},
note = {arXiv: 2105.05102},
keywords = {Computer Science - Robotics},
pages = {1776--1783},
file = {Singh et al_2022_Efficient Analytical Derivatives of Rigid-Body Dynamics using Spatial Vector.pdf:/home/dferigo/Zotero/storage/EU2RG38L/Singh et al_2022_Efficient Analytical Derivatives of Rigid-Body Dynamics using Spatial Vector.pdf:application/pdf},
}
@inproceedings{carpentier_analytical_2018,
title = {Analytical {Derivatives} of {Rigid} {Body} {Dynamics} {Algorithms}},
isbn = {978-0-9923747-4-7},
url = {http://www.roboticsproceedings.org/rss14/p38.pdf},
doi = {10.15607/RSS.2018.XIV.038},
abstract = {Rigid body dynamics is a well-established frame-work in robotics. It can be used to expose the analytic form of kinematic and dynamic functions of the robot model. So far, two major algorithms, namely the recursive Newton-Euler algorithm (RNEA) and the articulated body algorithm (ABA), have been proposed to compute the inverse dynamics and the forward dynamics in a few microseconds. Evaluating their derivatives is an important challenge for various robotic applications (optimal control, estimation, co-design or reinforcement learning). However it remains time consuming, whether using finite differences or automatic differentiation. In this paper, we propose new algorithms to efficiently compute them thanks to closed-form formulations. Using the chain rule and adequate algebraic differentiation of spatial algebra, we firstly differentiate explicitly RNEA. Then, using properties about the derivative of function composition, we show that the same algorithm can also be used to compute the derivatives of ABA with a marginal additional cost. For this purpose, we introduce a new algorithm to compute the inverse of the joint-space inertia matrix, without explicitly computing the matrix itself. All the algorithms are implemented in our open-source C++ framework called Pinocchio. Benchmarks show computational costs varying between 3 microseconds (for a 7-dof arm) up to 17 microseconds (for a 36-dof humanoid), outperforming the alternative approaches of the state of the art.},
language = {en},
urldate = {2021-05-18},
booktitle = {Robotics: {Science} and {Systems} {XIV}},
publisher = {Robotics: Science and Systems Foundation},
author = {Carpentier, Justin and Mansard, Nicolas},
month = jun,
year = {2018},
file = {Carpentier_Mansard_2018_Analytical Derivatives of Rigid Body Dynamics Algorithms.pdf:/home/dferigo/Zotero/storage/8XI5XBSA/Carpentier_Mansard_2018_Analytical Derivatives of Rigid Body Dynamics Algorithms.pdf:application/pdf},
}
@article{innes_differentiable_2019,
title = {A {Differentiable} {Programming} {System} to {Bridge} {Machine} {Learning} and {Scientific} {Computing}},
url = {http://arxiv.org/abs/1907.07587},
abstract = {Scientific computing is increasingly incorporating the advancements in machine learning and the ability to work with large amounts of data. At the same time, machine learning models are becoming increasingly sophisticated and exhibit many features often seen in scientific computing, stressing the capabilities of machine learning frameworks. Just as the disciplines of scientific computing and machine learning have shared common underlying infrastructure in the form of numerical linear algebra, we now have the opportunity to further share new computational infrastructure, and thus ideas, in the form of Differentiable Programming. We describe Zygote, a Differentiable Programming system that is able to take gradients of general program structures. We implement this system in the Julia programming language. Our system supports almost all language constructs (control flow, recursion, mutation, etc.) and compiles high-performance code without requiring any user intervention or refactoring to stage computations. This enables an expressive programming model for deep learning, but more importantly, it enables us to incorporate a large ecosystem of libraries in our models in a straightforward way. We discuss our approach to automatic differentiation, including its support for advanced techniques such as mixed-mode, complex and checkpointed differentiation, and present several examples of differentiating programs.},
urldate = {2019-09-14},
journal = {arXiv:1907.07587 [cs]},
author = {Innes, Mike and Edelman, Alan and Fischer, Keno and Rackauckas, Chris and Saba, Elliot and Shah, Viral B. and Tebbutt, Will},
month = jul,
year = {2019},
note = {arXiv: 1907.07587},
keywords = {Computer Science - Machine Learning, Computer Science - Programming Languages},
file = {arXiv.org Snapshot:/home/dferigo/Zotero/storage/GUIDB8WM/1907.html:text/html;Innes et al_2019_A Differentiable Programming System to Bridge Machine Learning and Scientific.pdf:/home/dferigo/Zotero/storage/5DV2YZ5K/Innes et al_2019_A Differentiable Programming System to Bridge Machine Learning and Scientific.pdf:application/pdf},
}
@misc{nvidia_nvidia_2011,
title = {Nvidia {PhysX}},
url = {https://developer.nvidia.com/physx-sdk},
author = {{NVIDIA}},
year = {2011},
}
@misc{nvidia_nvidia_2018,
title = {Nvidia {Isaac}},
url = {https://developer.nvidia.com/isaac-sdk},
author = {{NVIDIA}},
year = {2018},
}
@article{zhao_sim--real_2020,
title = {Sim-to-{Real} {Transfer} in {Deep} {Reinforcement} {Learning} for {Robotics}: a {Survey}},
shorttitle = {Sim-to-{Real} {Transfer} in {Deep} {Reinforcement} {Learning} for {Robotics}},
url = {http://arxiv.org/abs/2009.13303},
abstract = {Deep reinforcement learning has recently seen huge success across multiple areas in the robotics domain. Owing to the limitations of gathering real-world data, i.e., sample inefficiency and the cost of collecting it, simulation environments are utilized for training the different agents. This not only aids in providing a potentially infinite data source, but also alleviates safety concerns with real robots. Nonetheless, the gap between the simulated and real worlds degrades the performance of the policies once the models are transferred into real robots. Multiple research efforts are therefore now being directed towards closing this sim-toreal gap and accomplish more efficient policy transfer. Recent years have seen the emergence of multiple methods applicable to different domains, but there is a lack, to the best of our knowledge, of a comprehensive review summarizing and putting into context the different methods. In this survey paper, we cover the fundamental background behind sim-to-real transfer in deep reinforcement learning and overview the main methods being utilized at the moment: domain randomization, domain adaptation, imitation learning, meta-learning and knowledge distillation. We categorize some of the most relevant recent works, and outline the main application scenarios. Finally, we discuss the main opportunities and challenges of the different approaches and point to the most promising directions.},
language = {en},
urldate = {2020-10-02},
journal = {arXiv:2009.13303 [cs]},
author = {Zhao, Wenshuai and Queralta, Jorge Peña and Westerlund, Tomi},
month = sep,
year = {2020},
note = {arXiv: 2009.13303},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
file = {Zhao et al_2020_Sim-to-Real Transfer in Deep Reinforcement Learning for Robotics.pdf:/home/dferigo/Zotero/storage/BA2MFGQZ/Zhao et al_2020_Sim-to-Real Transfer in Deep Reinforcement Learning for Robotics.pdf:application/pdf},
}
@article{muratore_robot_2022,
title = {Robot {Learning} from {Randomized} {Simulations}: {A} {Review}},
shorttitle = {Robot {Learning} from {Randomized} {Simulations}},
url = {http://arxiv.org/abs/2111.00956},
abstract = {The rise of deep learning has caused a paradigm shift in robotics research, favoring methods that require large amounts of data. Unfortunately, it is prohibitively expensive to generate such data sets on a physical platform. Therefore, state-of-the-art approaches learn in simulation where data generation is fast as well as inexpensive and subsequently transfer the knowledge to the real robot (sim-to-real). Despite becoming increasingly realistic, all simulators are by construction based on models, hence inevitably imperfect. This raises the question of how simulators can be modified to facilitate learning robot control policies and overcome the mismatch between simulation and reality, often called the ‘reality gap’. We provide a comprehensive review of sim-to-real research for robotics, focusing on a technique named ‘domain randomization’ which is a method for learning from randomized simulations.},
language = {en},
urldate = {2022-04-20},
journal = {arXiv:2111.00956 [cs]},
author = {Muratore, Fabio and Ramos, Fabio and Turk, Greg and Yu, Wenhao and Gienger, Michael and Peters, Jan},
month = jan,
year = {2022},
note = {arXiv: 2111.00956},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
file = {Muratore et al_2022_Robot Learning from Randomized Simulations.pdf:/home/dferigo/Zotero/storage/BJEATQL9/Muratore et al_2022_Robot Learning from Randomized Simulations.pdf:application/pdf},
}
@article{li_reinforcement_2021,
title = {Reinforcement {Learning} for {Robust} {Parameterized} {Locomotion} {Control} of {Bipedal} {Robots}},
url = {http://arxiv.org/abs/2103.14295},
abstract = {Developing robust walking controllers for bipedal robots is a challenging endeavor. Traditional model-based locomotion controllers require simplifying assumptions and careful modelling; any small errors can result in unstable control. To address these challenges for bipedal locomotion, we present a model-free reinforcement learning framework for training robust locomotion policies in simulation, which can then be transferred to a real bipedal Cassie robot. To facilitate sim-to-real transfer, domain randomization is used to encourage the policies to learn behaviors that are robust across variations in system dynamics. The learned policies enable Cassie to perform a set of diverse and dynamic behaviors, while also being more robust than traditional controllers and prior learning-based methods that use residual control. We demonstrate this on versatile walking behaviors such as tracking a target walking velocity, walking height, and turning yaw.},
urldate = {2021-05-27},
journal = {arXiv:2103.14295 [cs, eess]},
author = {Li, Zhongyu and Cheng, Xuxin and Peng, Xue Bin and Abbeel, Pieter and Levine, Sergey and Berseth, Glen and Sreenath, Koushil},
month = mar,
year = {2021},
note = {arXiv: 2103.14295},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Computer Science - Artificial Intelligence, Electrical Engineering and Systems Science - Systems and Control},
file = {Li et al. - 2021 - Reinforcement Learning for Robust Parameterized Lo.pdf:/home/dferigo/Zotero/storage/CG5A5LGU/Li et al. - 2021 - Reinforcement Learning for Robust Parameterized Lo.pdf:application/pdf},
}
@inproceedings{seung-joon_yi_learning_2011,
address = {Shanghai, China},
title = {Learning full body push recovery control for small humanoid robots},
isbn = {978-1-61284-386-5},
url = {http://ieeexplore.ieee.org/document/5980531/},
doi = {10.1109/ICRA.2011.5980531},
abstract = {Dynamic bipedal walking is susceptible to external disturbances and surface irregularities, requiring robust feedback control to remain stable. In this work, we present a practical hierarchical push recovery strategy that can be readily implemented on a wide range of humanoid robots. Our method consists of low level controllers that perform simple, biomechanically motivated push recovery actions and a high level controller that combines the low level controllers according to proprioceptive and inertial sensory signals and the current robot state. Reinforcement learning is used to optimize the parameters of the controllers in order to maximize the stability of the robot over a broad range of external disturbances. The controllers are learned on a physical simulation and implemented on the Darwin-HP humanoid robot platform, and the resulting experiments demonstrate effective full body push recovery behaviors during dynamic walking.},
urldate = {2021-05-27},
booktitle = {2011 {IEEE} {International} {Conference} on {Robotics} and {Automation}},
publisher = {IEEE},
author = {{Seung-Joon Yi} and {Byoung-Tak Zhang} and Hong, Dennis and Lee, Daniel D.},
month = may,
year = {2011},
pages = {2047--2052},
file = {Seung-Joon Yi et al. - 2011 - Learning full body push recovery control for small.pdf:/home/dferigo/Zotero/storage/N3N4IJG7/Seung-Joon Yi et al. - 2011 - Learning full body push recovery control for small.pdf:application/pdf},
}
@article{smith_legged_2021,
title = {Legged {Robots} that {Keep} on {Learning}: {Fine}-{Tuning} {Locomotion} {Policies} in the {Real} {World}},
shorttitle = {Legged {Robots} that {Keep} on {Learning}},
url = {http://arxiv.org/abs/2110.05457},
abstract = {Legged robots are physically capable of traversing a wide range of challenging environments, but designing controllers that are sufficiently robust to handle this diversity has been a long-standing challenge in robotics. Reinforcement learning presents an appealing approach for automating the controller design process and has been able to produce remarkably robust controllers when trained in a suitable range of environments. However, it is difficult to predict all likely conditions the robot will encounter during deployment and enumerate them at training-time. What if instead of training controllers that are robust enough to handle any eventuality, we enable the robot to continually learn in any setting it finds itself in? This kind of real-world reinforcement learning poses a number of challenges, including efficiency, safety, and autonomy. To address these challenges, we propose a practical robot reinforcement learning system for fine-tuning locomotion policies in the real world. We demonstrate that a modest amount of real-world training can substantially improve performance during deployment, and this enables a real A1 quadrupedal robot to autonomously fine-tune multiple locomotion skills in a range of environments, including an outdoor lawn and a variety of indoor terrains.},
urldate = {2021-10-18},
journal = {arXiv:2110.05457 [cs]},
author = {Smith, Laura and Kew, J. Chase and Peng, Xue Bin and Ha, Sehoon and Tan, Jie and Levine, Sergey},
month = oct,
year = {2021},
note = {arXiv: 2110.05457},
keywords = {Computer Science - Robotics},
file = {Smith et al. - 2021 - Legged Robots that Keep on Learning Fine-Tuning L.pdf:/home/dferigo/Zotero/storage/FGTZX2P6/Smith et al. - 2021 - Legged Robots that Keep on Learning Fine-Tuning L.pdf:application/pdf},
}
@inproceedings{bloesch_towards_2022,
title = {Towards {Real} {Robot} {Learning} in the {Wild}: {A} {Case} {Study} in {Bipedal} {Locomotion}},
shorttitle = {Towards {Real} {Robot} {Learning} in the {Wild}},
abstract = {Algorithms for self-learning systems have made considerable progress in recent years, yet safety concerns and the need for additional instrumentation have so far largely limited learning experiments with real robots to well controlled lab settings. In this paper, we demonstrate how a small bipedal robot can autonomously learn to walk with minimal human intervention and with minimal instrumentation of the environment. We employ data-efficient off-policy deep reinforcement learning to learn to walk end-to-end, directly on hardware, using rewards that are computed exclusively from proprioceptive sensing. To allow the robot to autonomously adapt its behaviour to its environment, we additionally provide the agent with raw RGB camera images as input. By deploying two robots in different geographic locations while sharing data in a distributed learning setup, we achieve higher throughput and greater diversity of the training data. Our learning experiments constitute a step towards the long-term vision of learning “in the wild” for legged robots, and, to our knowledge, represent the first demonstration of learning a deep neural network controller for bipedal locomotion directly on hardware.},
language = {en},
booktitle = {Proceedings of the 5th {Conference} on {Robot} {Learning}},
publisher = {PMLR},
author = {Bloesch, Michael and Humplik, Jan and Patraucean, Viorica and Hafner, Roland and Haarnoja, Tuomas and Byravan, Arunkumar and Siegel, Noah Yamamoto and Tunyasuvunakool, Saran and Casarini, Federico and Batchelor, Nathan and Romano, Francesco and Saliceti, Stefano and Riedmiller, Martin and Eslami, S. M. Ali and Heess, Nicolas},
month = jan,
year = {2022},
note = {ISSN: 2640-3498},
pages = {1502--1511},
file = {Bloesch et al_2022_Towards Real Robot Learning in the Wild.pdf:/home/dferigo/Zotero/storage/MUVLGR7I/Bloesch et al_2022_Towards Real Robot Learning in the Wild.pdf:application/pdf},
}
@inproceedings{castillo_robust_2021,
address = {Prague, Czech Republic},
title = {Robust {Feedback} {Motion} {Policy} {Design} {Using} {Reinforcement} {Learning} on a {3D} {Digit} {Bipedal} {Robot}},
isbn = {978-1-66541-714-3},
url = {https://ieeexplore.ieee.org/document/9636467/},
doi = {10.1109/IROS51168.2021.9636467},
abstract = {In this paper, a hierarchical and robust framework for learning bipedal locomotion is presented and successfully implemented on the 3D biped robot Digit built by Agility Robotics. We propose a cascade-structure controller that combines the learning process with intuitive feedback regulations. This design allows the framework to realize robust and stable walking with a reduced-dimensional state and action spaces of the policy, significantly simplifying the design and increasing the sampling efficiency of the learning method. The inclusion of feedback regulation into the framework improves the robustness of the learned walking gait and ensures the success of the sim-to-real transfer of the proposed controller with minimal tuning. We specifically present a learning pipeline that considers hardware-feasible initial poses of the robot within the learning process to ensure the initial state of the learning is replicated as close as possible to the initial state of the robot in hardware experiments. Finally, we demonstrate the feasibility of our method by successfully transferring the learned policy in simulation to the Digit robot hardware, realizing sustained walking gaits under external force disturbances and challenging terrains not incurred during the training process. To the best of our knowledge, this is the first time a learning-based policy is transferred successfully to the Digit robot in hardware experiments.},
language = {en},
urldate = {2022-04-19},
booktitle = {2021 {IEEE}/{RSJ} {International} {Conference} on {Intelligent} {Robots} and {Systems} ({IROS})},
publisher = {IEEE},
author = {Castillo, Guillermo A. and Weng, Bowen and Zhang, Wei and Hereid, Ayonga},
month = sep,
year = {2021},
pages = {5136--5143},
file = {Castillo et al. - 2021 - Robust Feedback Motion Policy Design Using Reinfor.pdf:/home/dferigo/Zotero/storage/43DSYDWU/Castillo et al. - 2021 - Robust Feedback Motion Policy Design Using Reinfor.pdf:application/pdf},
}
@inproceedings{rudin_learning_2022,
title = {Learning to {Walk} in {Minutes} {Using} {Massively} {Parallel} {Deep} {Reinforcement} {Learning}},
url = {https://proceedings.mlr.press/v164/rudin22a.html},
abstract = {In this work, we present and study a training set-up that achieves fast policy generation for real-world robotic tasks by using massive parallelism on a single workstation GPU. We analyze and discuss the impact of different training algorithm components in the massively parallel regime on the final policy performance and training times. In addition, we present a novel game-inspired curriculum that is well suited for training with thousands of simulated robots in parallel. We evaluate the approach by training the quadrupedal robot ANYmal to walk on challenging terrain. The parallel approach allows training policies for flat terrain in under four minutes, and in twenty minutes for uneven terrain. This represents a speedup of multiple orders of magnitude compared to previous work. Finally, we transfer the policies to the real robot to validate the approach. We open-source our training code to help accelerate further research in the field of learned legged locomotion: https://leggedrobotics.github.io/legged\_gym/.},
language = {en},
urldate = {2022-04-19},
booktitle = {Proceedings of the 5th {Conference} on {Robot} {Learning}},
publisher = {PMLR},
author = {Rudin, Nikita and Hoeller, David and Reist, Philipp and Hutter, Marco},
month = jan,
year = {2022},
note = {ISSN: 2640-3498},
pages = {91--100},
file = {Rudin et al_2022_Learning to Walk in Minutes Using Massively Parallel Deep Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/UKAV6F4D/Rudin et al_2022_Learning to Walk in Minutes Using Massively Parallel Deep Reinforcement Learning.pdf:application/pdf},
}
@inproceedings{gangapurwala_real-time_2021,
address = {Xi'an, China},
title = {Real-{Time} {Trajectory} {Adaptation} for {Quadrupedal} {Locomotion} using {Deep} {Reinforcement} {Learning}},
isbn = {978-1-72819-077-8},
url = {https://ieeexplore.ieee.org/document/9561639/},
doi = {10.1109/ICRA48506.2021.9561639},
abstract = {We present a control architecture for real-time adaptation and tracking of trajectories generated using a terrain-aware trajectory optimization solver. This approach enables us to circumvent the computationally exhaustive task of online trajectory optimization, and further introduces a control solution robust to systems modeled with approximated dynamics. We train a policy using deep reinforcement learning (RL) to introduce additive deviations to a reference trajectory in order to generate a feedback-based trajectory tracking system for a quadrupedal robot. We train this policy across a multitude of simulated terrains and ensure its generality by introducing training methods that avoid overfitting and convergence towards local optima. Additionally, in order to capture terrain information, we include a latent representation of the height maps in the observation space of the RL environment as a form of exteroceptive feedback. We test the performance of our trained policy by tracking the corrected set points using a model-based whole-body controller and compare it with the tracking behavior obtained without the corrective feedback in several simulation environments, and show that introducing the corrective feedback results in increase of the success rate from 72.7\% to 92.4\% for tracking precomputed dynamic long horizon trajectories on flat terrain and from 47.5\% to 80.3\% on a complex modular uneven terrain. We also show successful transfer of our training approach to the real physical system and further present cogent arguments in support of our framework.},
language = {en},
urldate = {2022-04-19},
booktitle = {2021 {IEEE} {International} {Conference} on {Robotics} and {Automation} ({ICRA})},
publisher = {IEEE},
author = {Gangapurwala, Siddhant and Geisert, Mathieu and Orsolino, Romeo and Fallon, Maurice and Havoutis, Ioannis},
month = may,
year = {2021},
pages = {5973--5979},
file = {Gangapurwala et al_2021_Real-Time Trajectory Adaptation for Quadrupedal Locomotion using Deep.pdf:/home/dferigo/Zotero/storage/8MU3IKJR/Gangapurwala et al_2021_Real-Time Trajectory Adaptation for Quadrupedal Locomotion using Deep.pdf:application/pdf},
}
@article{bellegarda_robust_2021,
title = {Robust {High}-speed {Running} for {Quadruped} {Robots} via {Deep} {Reinforcement} {Learning}},
url = {http://arxiv.org/abs/2103.06484},
abstract = {Deep reinforcement learning has emerged as a popular and powerful way to develop locomotion controllers for quadruped robots. Common approaches have largely focused on learning actions directly in joint space, or learning to modify and offset foot positions produced by trajectory generators. Both approaches typically require careful reward shaping and training for millions of time steps, and with trajectory generators introduce human bias into the resulting control policies. In this paper, we instead explore learning foot positions in Cartesian space, which we track with impedance control, for a task of running as fast as possible subject to environmental disturbances. Compared with other action spaces, we observe less needed reward shaping, much improved sample efficiency, the emergence of natural gaits such as galloping and bounding, and ease of sim-to-sim transfer. Policies can be learned in only a few million time steps, even for challenging tasks of running over rough terrain with loads of over 100\% of the nominal quadruped mass. Training occurs in PyBullet, and we perform a sim-to-sim transfer to Gazebo, where our quadruped is able to run at over 4 m/s without a load, and 3.5 m/s with a 10 kg load, which is over 83\% of the nominal quadruped mass. Video results can be found at https://youtu.be/roE1vxpEWfw.},
language = {en},
urldate = {2021-05-27},
journal = {arXiv:2103.06484 [cs, eess]},
author = {Bellegarda, Guillaume and Nguyen, Quan},
month = mar,
year = {2021},
note = {sim-to-sim},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Electrical Engineering and Systems Science - Systems and Control, gazebo, locomotion, pybullet, quadrupeds, rl, sim-to-sim, simulation},
file = {Bellegarda_Nguyen_2021_Robust High-speed Running for Quadruped Robots via Deep Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/5NMBKYDC/Bellegarda_Nguyen_2021_Robust High-speed Running for Quadruped Robots via Deep Reinforcement Learning.pdf:application/pdf},
}
@article{haarnoja_soft_2018,
title = {Soft {Actor}-{Critic}: {Off}-{Policy} {Maximum} {Entropy} {Deep} {Reinforcement} {Learning} with a {Stochastic} {Actor}},
shorttitle = {Soft {Actor}-{Critic}},
url = {http://arxiv.org/abs/1801.01290},
abstract = {Model-free deep reinforcement learning (RL) algorithms have been demonstrated on a range of challenging decision making and control tasks. However, these methods typically suffer from two major challenges: very high sample complexity and brittle convergence properties, which necessitate meticulous hyperparameter tuning. Both of these challenges severely limit the applicability of such methods to complex, real-world domains. In this paper, we propose soft actor-critic, an off-policy actor-critic deep RL algorithm based on the maximum entropy reinforcement learning framework. In this framework, the actor aims to maximize expected reward while also maximizing entropy. That is, to succeed at the task while acting as randomly as possible. Prior deep RL methods based on this framework have been formulated as Q-learning methods. By combining off-policy updates with a stable stochastic actor-critic formulation, our method achieves state-of-the-art performance on a range of continuous control benchmark tasks, outperforming prior on-policy and off-policy methods. Furthermore, we demonstrate that, in contrast to other off-policy algorithms, our approach is very stable, achieving very similar performance across different random seeds.},
urldate = {2020-05-07},
journal = {arXiv:1801.01290 [cs, stat]},
author = {Haarnoja, Tuomas and Zhou, Aurick and Abbeel, Pieter and Levine, Sergey},
month = aug,
year = {2018},
note = {arXiv: 1801.01290},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence, Statistics - Machine Learning},
file = {arXiv.org Snapshot:/home/dferigo/Zotero/storage/53U5GB7V/1801.html:text/html;Haarnoja et al_2018_Soft Actor-Critic.pdf:/home/dferigo/Zotero/storage/KI69MXY9/Haarnoja et al_2018_Soft Actor-Critic.pdf:application/pdf},
}
@article{mnih_human-level_2015,
title = {Human-level control through deep reinforcement learning},
volume = {518},
issn = {0028-0836, 1476-4687},
url = {http://www.nature.com/articles/nature14236},
doi = {10.1038/nature14236},
language = {en},
number = {7540},
urldate = {2018-06-23},
journal = {Nature},
author = {Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A. and Veness, Joel and Bellemare, Marc G. and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K. and Ostrovski, Georg and Petersen, Stig and Beattie, Charles and Sadik, Amir and Antonoglou, Ioannis and King, Helen and Kumaran, Dharshan and Wierstra, Daan and Legg, Shane and Hassabis, Demis},
month = feb,
year = {2015},
keywords = {deepmind},
pages = {529--533},
file = {Mnih et al_2015_Human-level control through deep reinforcement learning.pdf:/home/dferigo/Zotero/storage/WC7QJIMF/Mnih et al_2015_Human-level control through deep reinforcement learning.pdf:application/pdf},
}
@inproceedings{kohl_policy_2004,
address = {New Orleans, LA, USA},
title = {Policy gradient reinforcement learning for fast quadrupedal locomotion},
isbn = {978-0-7803-8232-9},
url = {http://ieeexplore.ieee.org/document/1307456/},
doi = {10.1109/ROBOT.2004.1307456},
abstract = {This paper presents a machine learning approach to optimizing a quadrupedal trot gait for forward speed. Given a parameterized walk designed for a specific robot, we propose using a form of policy gradient reinforcement learning to automatically search the set of possible parameters with the goal of finding the fastest possible walk. We implement and test our approach on a commercially available quadrupedal robot platform, namely the Sony Aibo robot. After about three hours of learning, all on the physical robots and with no human intervention other than to change the batteries, the robots achieved a gait faster than any previously known gait for the Aibo, significantly outperforming a variety of existing hand-coded and learned solutions.},
language = {en},
urldate = {2022-04-19},
booktitle = {{IEEE} {International} {Conference} on {Robotics} and {Automation}, 2004. {Proceedings}. {ICRA} '04. 2004},
publisher = {IEEE},
author = {Kohl, N. and Stone, P.},
year = {2004},
pages = {2619--2624 Vol.3},
file = {Kohl_Stone_2004_Policy gradient reinforcement learning for fast quadrupedal locomotion.pdf:/home/dferigo/Zotero/storage/6VYM3T8R/Kohl_Stone_2004_Policy gradient reinforcement learning for fast quadrupedal locomotion.pdf:application/pdf},
}
@inproceedings{atkeson_robot_1997,
title = {Robot {Learning} {From} {Demonstration}},
abstract = {The goal of robot learning from demonstration is to have a robot learn from watching a demonstration of the task to be performed. In our approach to learning from demonstration the robot learns a reward function from the demonstration and a task model from repeated attempts to perform the task. A policy is computed based on the learned reward function and task model. Lessons learned from an implementation on an anthropomorphic robot arm using a pendulum swing up task include 1) simply mimicking demonstrated motions is not adequate to perform this task, 2) a task planner can use a learned model and reward function to compute an appropriate policy, 3) this modelbased planning process supports rapid learning, 4) both parametric and nonparametric models can be learned and used, and 5) incorporating a task level direct learning component, which is non-model-based, in addition to the model-based planner, is useful in compensating for structural modeling errors and slow model learning.},
language = {en},
author = {Atkeson, Christopher G and Schaal, Stefan},
year = {1997},
pages = {9},
file = {Atkeson_Schaal_Robot Learning From Demonstration.pdf:/home/dferigo/Zotero/storage/GNWQGNQS/Atkeson_Schaal_Robot Learning From Demonstration.pdf:application/pdf},
}
@inproceedings{kolter_hierarchical_2007,
title = {Hierarchical {Apprenticeship} {Learning} with {Application} to {Quadruped} {Locomotion}},
abstract = {We consider apprenticeship learning—learning from expert demonstrations—in the setting of large, complex domains. Past work in apprenticeship learning requires that the expert demonstrate complete trajectories through the domain. However, in many problems even an expert has difficulty controlling the system, which makes this approach infeasible. For example, consider the task of teaching a quadruped robot to navigate over extreme terrain; demonstrating an optimal policy (i.e., an optimal set of foot locations over the entire terrain) is a highly non-trivial task, even for an expert. In this paper we propose a method for hierarchical apprenticeship learning, which allows the algorithm to accept isolated advice at different hierarchical levels of the control task. This type of advice is often feasible for experts to give, even if the expert is unable to demonstrate complete trajectories. This allows us to extend the apprenticeship learning paradigm to much larger, more challenging domains. In particular, in this paper we apply the hierarchical apprenticeship learning algorithm to the task of quadruped locomotion over extreme terrain, and achieve, to the best of our knowledge, results superior to any previously published work.},
language = {en},
author = {Kolter, J Z and Abbeel, Pieter and Ng, Andrew Y},
year = {2007},
file = {Kolter et al. - Hierarchical Apprenticeship Learning with Applicat.pdf:/home/dferigo/Zotero/storage/3YAK7MMU/Kolter et al. - Hierarchical Apprenticeship Learning with Applicat.pdf:application/pdf},
}
@article{theodorou_generalized_2010,
title = {A {Generalized} {Path} {Integral} {Control} {Approach} to {Reinforcement} {Learning}},
volume = {11},
abstract = {With the goal to generate more scalable algorithms with higher efficiency and fewer open parameters, reinforcement learning (RL) has recently moved towards combining classical techniques from optimal control and dynamic programming with modern learning techniques from statistical estimation theory. In this vein, this paper suggests to use the framework of stochastic optimal control with path integrals to derive a novel approach to RL with parameterized policies. While solidly grounded in value function estimation and optimal control based on the stochastic Hamilton-JacobiBellman (HJB) equations, policy improvements can be transformed into an approximation problem of a path integral which has no open algorithmic parameters other than the exploration noise. The resulting algorithm can be conceived of as model-based, semi-model-based, or even model free, depending on how the learning problem is structured. The update equations have no danger of numerical instabilities as neither matrix inversions nor gradient learning rates are required. Our new algorithm demonstrates interesting similarities with previous RL research in the framework of probability matching and provides intuition why the slightly heuristically motivated probability matching approach can actually perform well. Empirical evaluations demonstrate significant performance improvements over gradient-based policy learning and scalability to high-dimensional control problems. Finally, a learning experiment on a simulated 12 degree-of-freedom robot dog illustrates the functionality of our algorithm in a complex robot learning scenario. We believe that Policy Improvement with Path Integrals (PI2) offers currently one of the most efficient, numerically robust, and easy to implement algorithms for RL based on trajectory roll-outs.},
language = {en},
journal = {The Journal of Machine Learning Research},
author = {Theodorou, Evangelos A and Buchli, Jonas and Schaal, Stefan and Org, Buchli},
year = {2010},
pages = {45},
file = {Theodorou et al_A Generalized Path Integral Control Approach to Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/N8HXKEPV/Theodorou et al_A Generalized Path Integral Control Approach to Reinforcement Learning.pdf:application/pdf},
}
@inproceedings{peters_reinforcement_2003,
title = {Reinforcement {Learning} for {Humanoid} {Robotics}},
abstract = {Reinforcement learning offers one of the most general framework to take traditional robotics towards true autonomy and versatility. However, applying reinforcement learning to high dimensional movement systems like humanoid robots remains an unsolved problem. In this paper, we discuss different approaches of reinforcement learning in terms of their applicability in humanoid robotics. Methods can be coarsely classified into three different categories, i.e., greedy methods, ‘vanilla’ policy gradient methods, and natural gradient methods. We discuss that greedy methods are not likely to scale into the domain humanoid robotics as they are problematic when used with function approximation. ‘Vanilla’ policy gradient methods on the other hand have been successfully applied on real-world robots including at least one humanoid robot [3]. We demonstrate that these methods can be significantly improved using the natural policy gradient instead of the regular policy gradient. A derivation of the natural policy gradient is provided, proving that the average policy gradient of Kakade [10] is indeed the true natural gradient. A general algorithm for estimating the natural gradient, the Natural Actor-Critic algorithm, is introduced. This algorithm converges to the nearest local minimum of the cost function with respect to the Fisher information metric under suitable conditions. The algorithm outperforms non-natural policy gradients by far in a cart-pole balancing evaluation, and for learning nonlinear dynamic motor primitives for humanoid robot control. It offers a promising route for the development of reinforcement learning for truly high-dimensionally continuous state-action systems.},
language = {en},
booktitle = {Proceedings of the third {IEEE}-{RAS} international conference on humanoid robots},
author = {Peters, Jan and Vijayakumar, Sethu and Schaal, Stefan},
year = {2003},
pages = {20},
file = {Peters et al_Reinforcement Learning for Humanoid Robotics.pdf:/home/dferigo/Zotero/storage/WVS83I5T/Peters et al_Reinforcement Learning for Humanoid Robotics.pdf:application/pdf},
}
@article{gullapalli_acquiring_1994,
title = {Acquiring robot skills via reinforcement learning},
volume = {14},
issn = {1941-000X},
doi = {10.1109/37.257890},
abstract = {Skill acquisition is a difficult , yet important problem in robot performance. The authors focus on two skills, namely robotic assembly and balancing and on two classic tasks to develop these skills via learning: the peg-in hole insertion task, and the ball balancing task. A stochastic real-valued (SRV) reinforcement learning algorithm is described and used for learning control and the authors show how it can be used with nonlinear multilayer ANNs. In the peg-in-hole insertion task the SRV network successfully learns to insert to insert a peg into a hole with extremely low clearance, in spite of high sensor noise. In the ball balancing task the SRV network successfully learns to balance the ball with minimal feedback.{\textless}{\textgreater}},
number = {1},
journal = {IEEE Control Systems Magazine},
author = {Gullapalli, V. and Franklin, J.A. and Benbrahim, H.},
month = feb,
year = {1994},
note = {Conference Name: IEEE Control Systems Magazine},
keywords = {Adaptive control, Control design, Control systems, Delay, Feedback, Robot control, Robotic assembly, Robust control, Supervised learning, Uncertainty},
pages = {13--24},
file = {Gullapalli et al_1994_Acquiring robot skills via reinforcement learning.pdf:/home/dferigo/Zotero/storage/D239HJKA/Gullapalli et al_1994_Acquiring robot skills via reinforcement learning.pdf:application/pdf},
}
@inproceedings{kober_policy_2008,
title = {Policy {Search} for {Motor} {Primitives} in {Robotics}},
volume = {21},
url = {https://proceedings.neurips.cc/paper/2008/hash/7647966b7343c29048673252e490f736-Abstract.html},
abstract = {Many motor skills in humanoid robotics can be learned using parametrized motor primitives as done in imitation learning. However, most interesting motor learning problems are high-dimensional reinforcement learning problems often beyond the reach of current methods. In this paper, we extend previous work on policy learning from the immediate reward case to episodic reinforcement learning. We show that this results into a general, common framework also connected to policy gradient methods and yielding a novel algorithm for policy learning by assuming a form of exploration that is particularly well-suited for dynamic motor primitives. The resulting algorithm is an EM-inspired algorithm applicable in complex motor learning tasks. We compare this algorithm to alternative parametrized policy search methods and show that it outperforms previous methods. We apply it in the context of motor learning and show that it can learn a complex Ball-in-a-Cup task using a real Barrett WAM robot arm.},
urldate = {2022-04-15},
booktitle = {Advances in {Neural} {Information} {Processing} {Systems}},
publisher = {Curran Associates, Inc.},
author = {Kober, Jens and Peters, Jan},
year = {2008},
file = {Kober_Peters_2008_Policy Search for Motor Primitives in Robotics.pdf:/home/dferigo/Zotero/storage/BFS6BPCJ/Kober_Peters_2008_Policy Search for Motor Primitives in Robotics.pdf:application/pdf},
}
@inproceedings{honglak_lee_quadruped_2006,
address = {Orlando, FL, USA},
title = {Quadruped robot obstacle negotiation via reinforcement learning},
isbn = {978-0-7803-9505-3},
url = {http://ieeexplore.ieee.org/document/1642158/},
doi = {10.1109/ROBOT.2006.1642158},
abstract = {Legged robots can, in principle, traverse a large variety of obstacles and terrains. In this paper, we describe a successful application of reinforcement learning to the problem of negotiating obstacles with a quadruped robot. Our algorithm is based on a two-level hierarchical decomposition of the task, in which the high-level controller selects the sequence of footplacement positions, and the low-level controller generates the continuous motions to move each foot to the specified positions. The high-level controller uses an estimate of the value function to guide its search; this estimate is learned partially from supervised data. The low-level controller is obtained via policy search. We demonstrate that our robot can successfully climb over a variety of obstacles which were not seen at training time.},
language = {en},
urldate = {2022-04-15},
booktitle = {Proceedings 2006 {IEEE} {International} {Conference} on {Robotics} and {Automation}, 2006. {ICRA} 2006.},
publisher = {IEEE},
author = {{Honglak Lee} and {Yirong Shen} and {Chih-Han Yu} and Singh, G. and Ng, A.Y.},
year = {2006},
pages = {3003--3010},
file = {Honglak Lee et al_2006_Quadruped robot obstacle negotiation via reinforcement learning.pdf:/home/dferigo/Zotero/storage/LE662D9H/Honglak Lee et al_2006_Quadruped robot obstacle negotiation via reinforcement learning.pdf:application/pdf},
}
@inproceedings{kohl_machine_2004,
title = {Machine {Learning} for {Fast} {Quadrupedal} {Locomotion}},
abstract = {For a robot, the ability to get from one place to another is one of the most basic skills. However, locomotion on legged robots is a challenging multidimensional control problem. This paper presents a machine learning approach to legged locomotion, with all training done on the physical robots. The main contributions are a specification of our fully automated learning environment and a detailed empirical comparison of four different machine learning algorithms for learning quadrupedal locomotion. The resulting learned walk is considerably faster than all previously reported hand-coded walks for the same robot platform.},
language = {en},
author = {Kohl, Nate and Stone, Peter},
year = {2004},
pages = {6},
file = {Kohl_Stone_2004_Machine Learning for Fast Quadrupedal Locomotion.pdf:/home/dferigo/Zotero/storage/ZF4WFM4N/Kohl_Stone_2004_Machine Learning for Fast Quadrupedal Locomotion.pdf:application/pdf},
}
@article{zico_kolter_stanford_2011,
title = {The {Stanford} {LittleDog}: {A} learning and rapid replanning approach to quadruped locomotion},
volume = {30},
issn = {0278-3649, 1741-3176},
shorttitle = {The {Stanford} {LittleDog}},
url = {http://journals.sagepub.com/doi/10.1177/0278364910390537},
doi = {10.1177/0278364910390537},
abstract = {Legged robots offer the potential to navigate a wide variety of terrains that are inaccessible to wheeled vehicles. In this paper we consider the planning and control tasks of navigating a quadruped robot over a wide variety of challenging terrain, including terrain which it has not seen until run-time. We present a software architecture that makes use of both static and dynamic gaits, as well as specialized dynamic maneuvers, to accomplish this task. Throughout the paper we highlight two themes that have been central to our approach: 1) the prevalent use of learning algorithms, and 2) a focus on rapid recovery and replanning techniques; we present several novel methods and algorithms that we developed for the quadruped and that illustrate these two themes. We evaluate the performance of these different methods, and also present and discuss the performance of our system on the official Learning Locomotion tests.},
language = {en},
number = {2},
urldate = {2022-04-15},
journal = {The International Journal of Robotics Research},
author = {Zico Kolter, J. and Ng, Andrew Y},
month = feb,
year = {2011},
pages = {150--174},
file = {Zico Kolter_Ng_2011_The Stanford LittleDog.pdf:/home/dferigo/Zotero/storage/3SSTHW7F/Zico Kolter_Ng_2011_The Stanford LittleDog.pdf:application/pdf},
}
@article{schaal_is_1999,
title = {Is imitation learning the route to humanoid robots?},
volume = {3},
issn = {13646613},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1364661399013273},
doi = {10.1016/S1364-6613(99)01327-3},
abstract = {This review investigates two recent developments in artificial intelligence and neural computation: learning from imitation and the development of humanoid robots. It will be postulated that the study of imitation learning offers a promising route to gain new insights into mechanisms of perceptual motor control that could ultimately lead to the creation of autonomous humanoid robots. Imitation learning focuses on three important issues: efficient motor learning, the connection between action and perception, and modular motor control in form of movement primitives. It will be reviewed how research on representations of, and functional connections between action and perception have contributed to our understanding of motor acts of other beings. The recent discovery that some areas in the primate brain are active during both movement perception and execution has provided a hypothetical neural basis of imitation. Computational approaches to imitation learning will also be described, initially from the perspective of traditional AI and robotics, but also from the perspective of neural network models and statistical learning research. Parallels and differences between biological and computational approaches to imitation will be highlighted and an overview of current projects that actually employ imitation learning for humanoid robots will be given.},
language = {en},
number = {6},
urldate = {2022-04-15},
journal = {Trends in Cognitive Sciences},
author = {Schaal, Stefan},
month = jun,
year = {1999},
pages = {233--242},
file = {Schaal_1999_Is imitation learning the route to humanoid robots.pdf:/home/dferigo/Zotero/storage/IN2PXD9V/Schaal_1999_Is imitation learning the route to humanoid robots.pdf:application/pdf},
}
@inproceedings{schaal_learning_1996,
title = {Learning from {Demonstration}},
volume = {9},
url = {https://proceedings.neurips.cc/paper/1996/hash/68d13cf26c4b4f4f932e3eff990093ba-Abstract.html},
abstract = {By now it is widely accepted that learning a task from scratch, i.e., without any prior knowledge, is a daunting undertaking. Humans, however, rarely at(cid:173) tempt to learn from scratch. They extract initial biases as well as strategies how to approach a learning problem from instructions and/or demonstrations of other humans. For learning control, this paper investigates how learning from demonstration can be applied in the context of reinforcement learning. We consider priming the Q-function, the value function, the policy, and the model of the task dynamics as possible areas where demonstrations can speed up learning. In general nonlinear learning problems, only model-based rein(cid:173) forcement learning shows significant speed-up after a demonstration, while in the special case of linear quadratic regulator (LQR) problems, all methods profit from the demonstration. In an implementation of pole balancing on a complex anthropomorphic robot arm, we demonstrate that, when facing the complexities of real signal processing, model-based reinforcement learning offers the most robustness for LQR problems. Using the suggested methods, the robot learns pole balancing in just a single trial after a 30 second long demonstration of the human instructor.},
urldate = {2022-04-15},
booktitle = {Advances in {Neural} {Information} {Processing} {Systems}},
publisher = {MIT Press},
author = {Schaal, Stefan},
year = {1996},
}
@incollection{schaal_dynamic_2006,
address = {Tokyo},
title = {Dynamic {Movement} {Primitives} -{A} {Framework} for {Motor} {Control} in {Humans} and {Humanoid} {Robotics}},
isbn = {978-4-431-24164-5},
url = {http://link.springer.com/10.1007/4-431-31381-8_23},
abstract = {Given the continuous stream of movements that biological systems exhibit in their daily activities, an account for such versatility and creativity has to assume that movement sequences consist of segments, executed either in sequence or with partial or complete overlap. Therefore, a fundamental question that has pervaded research in motor control both in artificial and biological systems revolves around identifying movement primitives (a.k.a. units of actions, basis behaviors, motor schemas, etc.). What are the fundamental building blocks that are strung together, adapted to, and created for ever new behaviors? This paper summarizes results that led to the hypothesis of Dynamic Movement Primitives (DMP). DMPs are units of action that are formalized as stable nonlinear attractor systems. They are useful for autonomous robotics as they are highly flexible in creating complex rhythmic (e.g., locomotion) and discrete (e.g., a tennis swing) behaviors that can quickly be adapted to the inevitable perturbations of a dynamically changing, stochastic environment. Moreover, DMPs provide a formal framework that also lends itself to investigations in computational neuroscience. A recent finding that allows creating DMPs with the help of well-understood statistical learning methods has elevated DMPs from a more heuristic to a principled modeling approach. Theoretical insights, evaluations on a humanoid robot, and behavioral and brain imaging data will serve to outline the framework of DMPs for a general approach to motor control in robotics and biology.},
language = {en},
urldate = {2022-04-15},
booktitle = {Adaptive {Motion} of {Animals} and {Machines}},
publisher = {Springer-Verlag},
author = {Schaal, Stefan},
year = {2006},
doi = {10.1007/4-431-31381-8_23},
pages = {261--280},
file = {Schaal_2006_Dynamic Movement Primitives -A Framework for Motor Control in Humans and.pdf:/home/dferigo/Zotero/storage/NGSBJLMR/Schaal_2006_Dynamic Movement Primitives -A Framework for Motor Control in Humans and.pdf:application/pdf},
}
@inproceedings{peters_policy_2006,
address = {Beijing, China},
title = {Policy {Gradient} {Methods} for {Robotics}},
url = {http://ieeexplore.ieee.org/document/4058714/},
doi = {10.1109/IROS.2006.282564},
abstract = {The aquisition and improvement of motor skills and control policies for robotics from trial and error is of essential importance if robots should ever leave precisely pre-structured environments. However, to date only few existing reinforcement learning methods have been scaled into the domains of highdimensional robots such as manipulator, legged or humanoid robots. Policy gradient methods remain one of the few exceptions and have found a variety of applications. Nevertheless, the application of such methods is not without peril if done in an uninformed manner. In this paper, we give an overview on learning with policy gradient methods for robotics with a strong focus on recent advances in the field. We outline previous applications to robotics and show how the most recently developed methods can significantly improve learning performance. Finally, we evaluate our most promising algorithm in the application of hitting a baseball with an anthropomorphic arm.},
language = {en},
urldate = {2022-04-15},
booktitle = {2006 {IEEE}/{RSJ} {International} {Conference} on {Intelligent} {Robots} and {Systems}},
publisher = {IEEE},
author = {Peters, Jan and Schaal, Stefan},
month = oct,
year = {2006},
pages = {2219--2225},
file = {Peters_Schaal_2006_Policy Gradient Methods for Robotics.pdf:/home/dferigo/Zotero/storage/MAL8FFI9/Peters_Schaal_2006_Policy Gradient Methods for Robotics.pdf:application/pdf},
}
@article{benbrahim_biped_1997,
title = {Biped dynamic walking using reinforcement learning},
volume = {22},
issn = {09218890},
doi = {10.1016/S0921-8890(97)00043-2},
number = {3-4},
journal = {Robotics and Autonomous Systems},
author = {Benbrahim, Hamid and Franklin, Judy A.},
month = dec,
year = {1997},
pages = {283--302},
file = {Benbrahim_Franklin_1997_Biped dynamic walking using reinforcement learning.pdf:/home/dferigo/Zotero/storage/2JYIXAGY/Benbrahim_Franklin_1997_Biped dynamic walking using reinforcement learning.pdf:application/pdf},
}
@phdthesis{watkins_christopher_learning_1989,
title = {Learning from {Delayed} {Rewards}},
school = {King's College},
author = {Watkins, Christopher},
year = {1989},
file = {Watkins, Christopher John Cornish Hellaby_Learning from Delayed Rewards.pdf:/home/dferigo/Zotero/storage/BV9UA3K9/Watkins, Christopher John Cornish Hellaby_Learning from Delayed Rewards.pdf:application/pdf},
}
@article{hinton_fast_2006,
title = {A {Fast} {Learning} {Algorithm} for {Deep} {Belief} {Nets}},
volume = {18},
issn = {0899-7667, 1530-888X},
url = {https://direct.mit.edu/neco/article/18/7/1527-1554/7065},
doi = {10.1162/neco.2006.18.7.1527},
abstract = {We show how to use “complementary priors” to eliminate the explaining-away effects that make inference difficult in densely connected belief nets that have many hidden layers. Using complementary priors, we derive a fast, greedy algorithm that can learn deep, directed belief networks one layer at a time, provided the top two layers form an undirected associative memory. The fast, greedy algorithm is used to initialize a slower learning procedure that fine-tunes the weights using a contrastive version of the wake-sleep algorithm. After fine-tuning, a network with three hidden layers forms a very good generative model of the joint distribution of handwritten digit images and their labels. This generative model gives better digit classification than the best discriminative learning algorithms. The low-dimensional manifolds on which the digits lie are modeled by long ravines in the free-energy landscape of the top-level associative memory, and it is easy to explore these ravines by using the directed connections to display what the associative memory has in mind.},
language = {en},
number = {7},
urldate = {2022-04-15},
journal = {Neural Computation},
author = {Hinton, Geoffrey E. and Osindero, Simon and Teh, Yee-Whye},
month = jul,
year = {2006},
pages = {1527--1554},
file = {Hinton et al_2006_A Fast Learning Algorithm for Deep Belief Nets.pdf:/home/dferigo/Zotero/storage/IMEASWQY/Hinton et al_2006_A Fast Learning Algorithm for Deep Belief Nets.pdf:application/pdf},
}
@techreport{rummery_-line_1994,
title = {On-{Line} {Q}-{Learning} {Using} {Connectionist} {Systems}},
abstract = {Reinforcement learning algorithms are a powerful machine learning technique. However, much of the work on these algorithms has been developed with regard to discrete finite-state Markovian problems, which is too restrictive for many real-world environments. Therefore, it is desirable to extend these methods to high dimensional continuous state-spaces, which requires the use of function approximation to generalise the information learnt by the system. In this report, the use of back-propagation neural networks (Rumelhart, Hinton and Williams 1986) is considered in this context. We consider a number of different algorithms based around Q-Learning (Watkins 1989) combined with the Temporal Difference algorithm (Sutton 1988), including a new algorithm (Modified Connectionist Q-Learning), and Q() (Peng and Williams 1994). In addition, we present algorithms for applying these updates on-line during trials, unlike backward replay used by Lin (1993) that requires waiting until the end of each t...},
author = {Rummery, G. A. and Niranjan, M.},
year = {1994},
file = {Citeseer - Snapshot:/home/dferigo/Zotero/storage/IBKMIYHG/summary.html:text/html;Rummery_Niranjan_1994_On-Line Q-Learning Using Connectionist Systems.pdf:/home/dferigo/Zotero/storage/RP9VWYGZ/Rummery_Niranjan_1994_On-Line Q-Learning Using Connectionist Systems.pdf:application/pdf},
}
@article{williams_simple_1992,
title = {Simple statistical gradient-following algorithms for connectionist reinforcement learning},
volume = {8},
issn = {1573-0565},
url = {https://doi.org/10.1007/BF00992696},
doi = {10.1007/BF00992696},
abstract = {This article presents a general class of associative reinforcement learning algorithms for connectionist networks containing stochastic units. These algorithms, called REINFORCE algorithms, are shown to make weight adjustments in a direction that lies along the gradient of expected reinforcement in both immediate-reinforcement tasks and certain limited forms of delayed-reinforcement tasks, and they do this without explicitly computing gradient estimates or even storing information from which such estimates could be computed. Specific examples of such algorithms are presented, some of which bear a close relationship to certain existing algorithms while others are novel but potentially interesting in their own right. Also given are results that show how such algorithms can be naturally integrated with backpropagation. We close with a brief discussion of a number of additional issues surrounding the use of such algorithms, including what is known about their limiting behaviors as well as further considerations that might be used to help develop similar but potentially more powerful reinforcement learning algorithms.},
language = {en},
number = {3},
urldate = {2022-04-15},
journal = {Machine Learning},
author = {Williams, Ronald J.},
month = may,
year = {1992},
pages = {229--256},
file = {Williams_1992_Simple statistical gradient-following algorithms for connectionist.pdf:/home/dferigo/Zotero/storage/TICDQE49/Williams_1992_Simple statistical gradient-following algorithms for connectionist.pdf:application/pdf},
}
@article{sutton_learning_1988,
title = {Learning to predict by the methods of temporal differences},
volume = {3},
issn = {0885-6125, 1573-0565},
url = {http://link.springer.com/10.1007/BF00115009},
doi = {10.1007/BF00115009},
abstract = {This article introduces a class of incremental learning procedures specialized for prediction that is, for using past experience with an incompletely known system to predict its future behavior. Whereas conventional prediction-learning methods assign credit by means of the difference between predicted and actual outcomes, tile new methods assign credit by means of the difference between temporally successive predictions. Although such temporal-difference method{\textasciitilde} have been used in Samuel's checker player, Holland's bucket brigade, and the author's Adaptive Heuristic Critic, they have remained poorly understood. Here we prove their convergence and optimality for special cases and relate them to supervised-learning methods. For most real-world prediction problems, telnporal-differenee methods require less memory and less peak computation than conventional methods and they produce more accurate predictions. We argue that most problems to which supervised learning is currently applied are really prediction problems of the sort to which temporaldifference methods can be applied to advantage.},
language = {en},
number = {1},
urldate = {2022-04-15},
journal = {Machine Learning},
author = {Sutton, Richard S.},
month = aug,
year = {1988},
pages = {9--44},
file = {Sutton_1988_Learning to predict by the methods of temporal differences.pdf:/home/dferigo/Zotero/storage/XBWG7D4V/Sutton_1988_Learning to predict by the methods of temporal differences.pdf:application/pdf},
}
@article{tesauro_td-gammon_1994,
title = {{TD}-{Gammon}, a {Self}-{Teaching} {Backgammon} {Program}, {Achieves} {Master}-{Level} {Play}},
volume = {6},
issn = {0899-7667, 1530-888X},
url = {https://direct.mit.edu/neco/article/6/2/215-219/5771},
doi = {10.1162/neco.1994.6.2.215},
abstract = {TD-Gammonis a neural network that is able to teach itself to play backgammosnolely by playing against itself and learning from the results, based on the TD(A)reinforcement learning algorithm (Sutton, 1988). Despite starting from random initial weights (and hence random initial strategy), TD-Gammoanchieves a surprisingly strong level of play. With zero knowledge built in at the start of learning (i.e. given only a "raw" description of the board state), the network learns to play at a strong intermediate level. Furthermore, when a set of hand-crafted features is added to the network’s input representation, the result is a truly staggering level of performance: the latest version of TD-Gammoisn nowestimated to play at a strong master level that is extremely close to the world’s best humanplayers.},
language = {en},
number = {2},
urldate = {2022-04-15},
journal = {Neural Computation},
author = {Tesauro, Gerald},
month = mar,
year = {1994},
pages = {215--219},
file = {Tesauro_1994_TD-Gammon, a Self-Teaching Backgammon Program, Achieves Master-Level Play.pdf:/home/dferigo/Zotero/storage/9C9X9HWP/Tesauro_1994_TD-Gammon, a Self-Teaching Backgammon Program, Achieves Master-Level Play.pdf:application/pdf},
}
@phdthesis{lin_reinforcement_1993,
type = {{PhD} thesis},
title = {Reinforcement {Learning} for {Robots} {Using} {Neural} {Networks}},
language = {en},
school = {Carnegie Mellon University},
author = {Lin, Long-Ji},
year = {1993},
file = {Lin_1993_Reinforcement Learning for Robots Using Neural Networks.pdf:/home/dferigo/Zotero/storage/Y2VMYRV8/Lin_1993_Reinforcement Learning for Robots Using Neural Networks.pdf:application/pdf},
}
@article{narendra_identification_1990,
title = {Identification and control of dynamical systems using neural networks},
volume = {1},
issn = {10459227},
url = {http://ieeexplore.ieee.org/document/80202/},
doi = {10.1109/72.80202},
abstract = {The paper demonstrates that neural networks can be used effectively for the identification and control of nonlinear dynamical systems. The emphasis of the paper is on models for both identification and control. Static and dynamic back-propagation methods for the adjustment of parameters are discussed. In the models that are introduced, multilayer and recurrent networks are interconnected in novel configurations and hence there is a real need to study them in a unified fashion. Simulation results reveal that the identification and adaptive control schemes suggested are practically feasible. Basic concepts and definitions are introduced throughout the paper, and theoretical questions which have to be addressed are also described.},
language = {en},
number = {1},
urldate = {2022-04-15},
journal = {IEEE Transactions on Neural Networks},
author = {Narendra, K.S. and Parthasarathy, K.},
month = mar,
year = {1990},
pages = {4--27},
file = {Narendra_Parthasarathy_1990_Identification and control of dynamical systems using neural networks.pdf:/home/dferigo/Zotero/storage/T9ITSBDR/Narendra_Parthasarathy_1990_Identification and control of dynamical systems using neural networks.pdf:application/pdf},
}
@article{koberReinforcementLearningRobotics2013,
title = {Reinforcement {Learning} in {Robotics}: {A} {Survey}},
language = {en},
journal = {International Journal of Robotics Research},
author = {Kober, Jens and Bagnell, J Andrew and Peters, Jan},
year = {2013},
pages = {38},
file = {Kober2013-Reinforcement_Learning_in_Robotics.pdf:/home/dferigo/Zotero/storage/858JLQET/Kober2013-Reinforcement_Learning_in_Robotics.pdf:application/pdf;Kober2013-Reinforcement_Learning_in_Robotics.pdf:/home/dferigo/Zotero/storage/W3LHRYT7/Kober2013-Reinforcement_Learning_in_Robotics.pdf:application/pdf},
}
@article{chatzilygeroudis_survey_2020,
title = {A {Survey} on {Policy} {Search} {Algorithms} for {Learning} {Robot} {Controllers} in a {Handful} of {Trials}},
volume = {36},
issn = {1941-0468},
doi = {10.1109/TRO.2019.2958211},
abstract = {Most policy search (PS) algorithms require thousands of training episodes to find an effective policy, which is often infeasible with a physical robot. This survey article focuses on the extreme other end of the spectrum: how can a robot adapt with only a handful of trials (a dozen) and a few minutes? By analogy with the word “big-data,” we refer to this challenge as “micro-data reinforcement learning.” In this article, we show that a first strategy is to leverage prior knowledge on the policy structure (e.g., dynamic movement primitives), on the policy parameters (e.g., demonstrations), or on the dynamics (e.g., simulators). A second strategy is to create data-driven surrogate models of the expected reward (e.g., Bayesian optimization) or the dynamical model (e.g., model-based PS), so that the policy optimizer queries the model instead of the real system. Overall, all successful micro-data algorithms combine these two strategies by varying the kind of model and prior knowledge. The current scientific challenges essentially revolve around scaling up to complex robots, designing generic priors, and optimizing the computing time.},
number = {2},
journal = {IEEE Transactions on Robotics},
author = {Chatzilygeroudis, Konstantinos and Vassiliades, Vassilis and Stulp, Freek and Calinon, Sylvain and Mouret, Jean-Baptiste},
month = apr,
year = {2020},
keywords = {Autonomous agents, learning and adaptive systems, micro-data policy search (MDPS), robot learning},
pages = {328--347},
file = {Chatzilygeroudis et al_2020_A Survey on Policy Search Algorithms for Learning Robot Controllers in a.pdf:/home/dferigo/Zotero/storage/VLFIF5C6/Chatzilygeroudis et al_2020_A Survey on Policy Search Algorithms for Learning Robot Controllers in a.pdf:application/pdf;IEEE Xplore Abstract Record:/home/dferigo/Zotero/storage/I636FFXG/8944013.html:text/html},
}
@article{fabisch_survey_2019,
title = {A {Survey} of {Behavior} {Learning} {Applications} in {Robotics} -- {State} of the {Art} and {Perspectives}},
url = {http://arxiv.org/abs/1906.01868},
abstract = {Recent success of machine learning in many domains has been overwhelming, which often leads to false expectations regarding the capabilities of behavior learning in robotics. In this survey, we analyze the current state of machine learning for robotic behaviors. We will give a broad overview of behaviors that have been learned and used on real robots. Our focus is on kinematically or sensorially complex robots. That includes humanoid robots or parts of humanoid robots, for example, legged robots or robotic arms. We will classify presented behaviors according to various categories and we will draw conclusions about what can be learned and what should be learned. Furthermore, we will give an outlook on problems that are challenging today but might be solved by machine learning in the future and argue that classical robotics and other approaches from artificial intelligence should be integrated more with machine learning to form complete, autonomous systems.},
language = {en},
urldate = {2019-06-27},
journal = {arXiv:1906.01868 [cs]},
author = {Fabisch, Alexander and Petzoldt, Christoph and Otto, Marc and Kirchner, Frank},
month = jun,
year = {2019},
note = {arXiv: 1906.01868},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
file = {Fabisch et al_2019_A Survey of Behavior Learning Applications in Robotics -- State of the Art and.pdf:/home/dferigo/Zotero/storage/R9NX3LR5/Fabisch et al_2019_A Survey of Behavior Learning Applications in Robotics -- State of the Art and.pdf:application/pdf},
}
@article{busoniu_reinforcement_2018,
title = {Reinforcement learning for control: {Performance}, stability, and deep approximators},
volume = {46},
issn = {13675788},
shorttitle = {Reinforcement learning for control},
url = {https://linkinghub.elsevier.com/retrieve/pii/S1367578818301184},
doi = {10.1016/j.arcontrol.2018.09.005},
abstract = {Reinforcement learning (RL) offers powerful algorithms to search for optimal controllers of systems with nonlinear, possibly stochastic dynamics that are unknown or highly uncertain. This review mainly covers artificial-intelligence approaches to RL, from the viewpoint of the control engineer. We explain how approximate representations of the solution make RL feasible for problems with continuous states and control actions. Stability is a central concern in control, and we argue that while the control-theoretic RL subfield called adaptive dynamic programming is dedicated to it, stability of RL largely remains an open question. We also cover in detail the case where deep neural networks are used for approximation, leading to the field of deep RL, which has shown great success in recent years. With the control practitioner in mind, we outline opportunities and pitfalls of deep RL; and we close the survey with an outlook that – among other things – points out some avenues for bridging the gap between control and artificial-intelligence RL techniques.},
language = {en},
urldate = {2018-12-17},
journal = {Annual Reviews in Control},
author = {Buşoniu, Lucian and de Bruin, Tim and Tolić, Domagoj and Kober, Jens and Palunko, Ivana},
year = {2018},
pages = {8--28},
file = {Buşoniu et al_2018_Reinforcement learning for control.pdf:/home/dferigo/Zotero/storage/KWV5Z2VL/Buşoniu et al_2018_Reinforcement learning for control.pdf:application/pdf},
}
@inproceedings{stulp_reinforcement_2010,
title = {Reinforcement learning of full-body humanoid motor skills},
isbn = {978-1-4244-8688-5},
url = {http://ieeexplore.ieee.org/document/5686320/},
doi = {10.1109/ICHR.2010.5686320},
abstract = {Applying reinforcement learning to humanoid robots is challenging because humanoids have a large number of degrees of freedom and state and action spaces are continuous. Thus, most reinforcement learning algorithms would become computationally infeasible and require a prohibitive amount of trials to explore such high-dimensional spaces. In this paper, we present a probabilistic reinforcement learning approach, which is derived from the framework of stochastic optimal control and path integrals. The algorithm, called Policy Improvement with Path Integrals (PI2), has a surprisingly simple form, has no open tuning parameters besides the exploration noise, is modelfree, and performs numerically robustly in high dimensional learning problems. We demonstrate how PI2 is able to learn fullbody motor skills on a 34-DOF humanoid robot. To demonstrate the generality of our approach, we also apply PI2 in the context of variable impedance control, where both planned trajectories and gain schedules for each joint are optimized simultaneously.},
language = {en},
urldate = {2018-06-17},
publisher = {IEEE},
author = {Stulp, Freek and Buchli, Jonas and Theodorou, Evangelos and Schaal, Stefan},
month = dec,
year = {2010},
pages = {405--410},
file = {Stulp et al_2010_Reinforcement learning of full-body humanoid motor skills.pdf:/home/dferigo/Zotero/storage/4H6NFGV6/Stulp et al_2010_Reinforcement learning of full-body humanoid motor skills.pdf:application/pdf},
}
@article{sola_micro_2020,
title = {A micro {Lie} theory for state estimation in robotics},
url = {http://arxiv.org/abs/1812.01537},
abstract = {A Lie group is an old mathematical abstract object dating back to the XIX century, when mathematician Sophus Lie laid the foundations of the theory of continuous transformation groups. Its influence has spread over diverse areas of science and technology many years later. In robotics, we are recently experiencing an important trend in its usage, at least in the fields of estimation, and particularly in motion estimation for navigation. Yet for a vast majority of roboticians, Lie groups are highly abstract constructions and therefore difficult to understand and to use.},
language = {en},
urldate = {2021-11-26},
journal = {arXiv:1812.01537 [cs]},
author = {Solà, Joan and Deray, Jeremie and Atchuthan, Dinesh},
month = nov,
year = {2020},
note = {arXiv: 1812.01537},
keywords = {Computer Science - Robotics},
file = {Solà et al_2020_A micro Lie theory for state estimation in robotics.pdf:/home/dferigo/Zotero/storage/IBW6W28A/Solà et al_2020_A micro Lie theory for state estimation in robotics.pdf:application/pdf},
}
@article{dulac-arnold_empirical_2021,
title = {An empirical investigation of the challenges of real-world reinforcement learning},
url = {http://arxiv.org/abs/2003.11881},
abstract = {Reinforcement learning (RL) has proven its worth in a series of artificial domains, and is beginning to show some successes in real-world scenarios. However, much of the research advances in RL are hard to leverage in real-world systems due to a series of assumptions that are rarely satisfied in practice. In this work, we identify and formalize a series of independent challenges that embody the difficulties that must be addressed for RL to be commonly deployed in real-world systems. For each challenge, we define it formally in the context of a Markov Decision Process, analyze the effects of the challenge on state-of-the-art learning algorithms, and present some existing attempts at tackling it. We believe that an approach that addresses our set of proposed challenges would be readily deployable in a large number of real world problems. Our proposed challenges are implemented in a suite of continuous control environments called realworldrl-suite which we propose an as an open-source benchmark.},
language = {en},
urldate = {2022-04-13},
journal = {arXiv:2003.11881 [cs]},
author = {Dulac-Arnold, Gabriel and Levine, Nir and Mankowitz, Daniel J. and Li, Jerry and Paduraru, Cosmin and Gowal, Sven and Hester, Todd},
month = mar,
year = {2021},
note = {arXiv: 2003.11881},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
file = {Dulac-Arnold et al_2021_An empirical investigation of the challenges of real-world reinforcement.pdf:/home/dferigo/Zotero/storage/FN6Z7NFN/Dulac-Arnold et al_2021_An empirical investigation of the challenges of real-world reinforcement.pdf:application/pdf},
}
@article{dulac-arnold_empirical_2020,
title = {An empirical investigation of the challenges of real-world reinforcement learning},
url = {http://arxiv.org/abs/2003.11881},
abstract = {Reinforcement learning (RL) has proven its worth in a series of artificial domains, and is beginning to show some successes in real-world scenarios. However, much of the research advances in RL are hard to leverage in real-world systems due to a series of assumptions that are rarely satisfied in practice. In this work, we identify and formalize a series of independent challenges that embody the difficulties that must be addressed for RL to be commonly deployed in real-world systems. For each challenge, we define it formally in the context of a Markov Decision Process, analyze the effects of the challenge on state-of-the-art learning algorithms, and present some existing attempts at tackling it. We believe that an approach that addresses our set of proposed challenges would be readily deployable in a large number of real world problems. Our proposed challenges are implemented in a suite of continuous control environments called realworldrl-suite which we propose an as an open-source benchmark.},
language = {en},
urldate = {2020-07-25},
journal = {arXiv:2003.11881 [cs]},
author = {Dulac-Arnold, Gabriel and Levine, Nir and Mankowitz, Daniel J. and Li, Jerry and Paduraru, Cosmin and Gowal, Sven and Hester, Todd},
month = mar,
year = {2020},
note = {arXiv: 2003.11881},
keywords = {Computer Science - Machine Learning, Computer Science - Artificial Intelligence},
file = {Dulac-Arnold et al_2020_An empirical investigation of the challenges of real-world reinforcement.pdf:/home/dferigo/Zotero/storage/X77JWG9W/Dulac-Arnold et al_2020_An empirical investigation of the challenges of real-world reinforcement.pdf:application/pdf},
}
@article{haarnoja_learning_2019,
title = {Learning to {Walk} via {Deep} {Reinforcement} {Learning}},
url = {http://arxiv.org/abs/1812.11103},
abstract = {Deep reinforcement learning (deep RL) holds the promise of automating the acquisition of complex controllers that can map sensory inputs directly to low-level actions. In the domain of robotic locomotion, deep RL could enable learning locomotion skills with minimal engineering and without an explicit model of the robot dynamics. Unfortunately, applying deep RL to real-world robotic tasks is exceptionally difficult, primarily due to poor sample complexity and sensitivity to hyperparameters. While hyperparameters can be easily tuned in simulated domains, tuning may be prohibitively expensive on physical systems, such as legged robots, that can be damaged through extensive trial-and-error learning. In this paper, we propose a sample-efficient deep RL algorithm based on maximum entropy RL that requires minimal per-task tuning and only a modest number of trials to learn neural network policies. We apply this method to learning walking gaits on a real-world Minitaur robot. Our method can acquire a stable gait from scratch directly in the real world in about two hours, without relying on any model or simulation, and the resulting policy is robust to moderate variations in the environment. We further show that our algorithm achieves state-of-the-art performance on simulated benchmarks with a single set of hyperparameters. Videos of training and the learned policy can be found on the project website.},
urldate = {2020-05-07},
journal = {arXiv:1812.11103 [cs, stat]},
author = {Haarnoja, Tuomas and Ha, Sehoon and Zhou, Aurick and Tan, Jie and Tucker, George and Levine, Sergey},
month = jun,
year = {2019},
note = {arXiv: 1812.11103},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Computer Science - Artificial Intelligence, Statistics - Machine Learning},
file = {arXiv.org Snapshot:/home/dferigo/Zotero/storage/NVW3BT23/1812.html:text/html;Haarnoja et al_2019_Learning to Walk via Deep Reinforcement Learning.pdf:/home/dferigo/Zotero/storage/X3F83MC6/Haarnoja et al_2019_Learning to Walk via Deep Reinforcement Learning.pdf:application/pdf},
}
@inproceedings{rohmer_v-rep_2013,
title = {V-{REP}: {A} versatile and scalable robot simulation framework},
shorttitle = {V-{REP}},
doi = {10.1109/IROS.2013.6696520},
abstract = {From exploring planets to cleaning homes, the reach and versatility of robotics is vast. The integration of actuation, sensing and control makes robotics systems powerful, but complicates their simulation. This paper introduces a versatile, scalable, yet powerful general-purpose robot simulation framework called V-REP. The paper discusses the utility of a portable and flexible simulation framework that allows for direct incorporation of various control techniques. This renders simulations and simulation models more accessible to a general-public, by reducing the simulation model deployment complexity. It also increases productivity by offering built-in and ready-to-use functionalities, as well as a multitude of programming approaches. This allows for a multitude of applications including rapid algorithm development, system verification, rapid prototyping, and deployment for cases such as safety/remote monitoring, training and education, hardware control, and factory automation simulation.},
booktitle = {2013 {IEEE}/{RSJ} {International} {Conference} on {Intelligent} {Robots} and {Systems}},
author = {Rohmer, Eric and Singh, Surya P. N. and Freese, Marc},
month = nov,
year = {2013},
note = {ISSN: 2153-0866},
keywords = {Computational modeling, Hardware, Joints, Load modeling, Robots, Sensors, Shape},
pages = {1321--1326},
file = {Rohmer et al_2013_V-REP.pdf:/home/dferigo/Zotero/storage/P46T7D6L/Rohmer et al_2013_V-REP.pdf:application/pdf},
}
@article{ibarz_how_2021,
title = {How to {Train} {Your} {Robot} with {Deep} {Reinforcement} {Learning}; {Lessons} {We}'ve {Learned}},
issn = {0278-3649, 1741-3176},
url = {http://arxiv.org/abs/2102.02915},
doi = {10.1177/0278364920987859},
abstract = {Deep reinforcement learning (RL) has emerged as a promising approach for autonomously acquiring complex behaviors from low level sensor observations. Although a large portion of deep RL research has focused on applications in video games and simulated control, which does not connect with the constraints of learning in real environments, deep RL has also demonstrated promise in enabling physical robots to learn complex skills in the real world. At the same time,real world robotics provides an appealing domain for evaluating such algorithms, as it connects directly to how humans learn; as an embodied agent in the real world. Learning to perceive and move in the real world presents numerous challenges, some of which are easier to address than others, and some of which are often not considered in RL research that focuses only on simulated domains. In this review article, we present a number of case studies involving robotic deep RL. Building off of these case studies, we discuss commonly perceived challenges in deep RL and how they have been addressed in these works. We also provide an overview of other outstanding challenges, many of which are unique to the real-world robotics setting and are not often the focus of mainstream RL research. Our goal is to provide a resource both for roboticists and machine learning researchers who are interested in furthering the progress of deep RL in the real world.},
urldate = {2021-04-08},
journal = {The International Journal of Robotics Research},
author = {Ibarz, Julian and Tan, Jie and Finn, Chelsea and Kalakrishnan, Mrinal and Pastor, Peter and Levine, Sergey},
month = jan,
year = {2021},
note = {arXiv: 2102.02915},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
pages = {027836492098785},
file = {arXiv.org Snapshot:/home/dferigo/Zotero/storage/89CMU29H/2102.html:text/html;Ibarz et al_2021_How to Train Your Robot with Deep Reinforcement Learning\; Lessons We've Learned.pdf:/home/dferigo/Zotero/storage/YXS79SEC/Ibarz et al_2021_How to Train Your Robot with Deep Reinforcement Learning\; Lessons We've Learned.pdf:application/pdf},
}
@article{da_learning_2020,
title = {Learning a {Contact}-{Adaptive} {Controller} for {Robust}, {Efficient} {Legged} {Locomotion}},
url = {http://arxiv.org/abs/2009.10019},
abstract = {We present a hierarchical framework that combines model-based control and reinforcement learning (RL) to synthesize robust controllers for a quadruped (the Unitree Laikago). The system consists of a high-level controller that learns to choose from a set of primitives in response to changes in the environment and a low-level controller that utilizes an established control method to robustly execute the primitives. Our framework learns a controller that can adapt to challenging environmental changes on the fly, including novel scenarios not seen during training. The learned controller is up to 85 percent more energy efficient and is more robust compared to baseline methods. We also deploy the controller on a physical robot without any randomization or adaptation scheme.},
language = {en},
urldate = {2020-10-01},
journal = {arXiv:2009.10019 [cs]},
author = {Da, Xingye and Xie, Zhaoming and Hoeller, David and Boots, Byron and Anandkumar, Animashree and Zhu, Yuke and Babich, Buck and Garg, Animesh},
month = sep,
year = {2020},
note = {arXiv: 2009.10019},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics},
file = {Da et al_2020_Learning a Contact-Adaptive Controller for Robust, Efficient Legged Locomotion.pdf:/home/dferigo/Zotero/storage/UABUZD58/Da et al_2020_Learning a Contact-Adaptive Controller for Robust, Efficient Legged Locomotion.pdf:application/pdf},
}
@article{yang_data_2019,
title = {Data {Efficient} {Reinforcement} {Learning} for {Legged} {Robots}},
url = {http://arxiv.org/abs/1907.03613},
abstract = {We present a model-based reinforcement learning framework for robot locomotion that achieves walking based on only 4.5 minutes of data collected on a quadruped robot. To accurately model the robot’s dynamics over a long horizon, we introduce a loss function that tracks the model’s prediction over multiple timesteps. We adapt model predictive control to account for planning latency, which allows the learned model to be used for real time control. Additionally, to ensure safe exploration during model learning, we embed prior knowledge of leg trajectories into the action space. The resulting system achieves fast and robust locomotion. Unlike model-free methods, which optimize for a particular task, our planner can use the same learned dynamics for various tasks, simply by changing the reward function.1 To the best of our knowledge, our approach is more than an order of magnitude more sample efficient than current model-free methods.},
language = {en},
urldate = {2022-04-13},
journal = {arXiv:1907.03613 [cs]},
author = {Yang, Yuxiang and Caluwaerts, Ken and Iscen, Atil and Zhang, Tingnan and Tan, Jie and Sindhwani, Vikas},
month = oct,
year = {2019},
note = {arXiv: 1907.03613},
keywords = {Computer Science - Machine Learning, Computer Science - Robotics, Computer Science - Artificial Intelligence},
file = {Yang et al_2019_Data Efficient Reinforcement Learning for Legged Robots.pdf:/home/dferigo/Zotero/storage/XQMX3SIP/Yang et al_2019_Data Efficient Reinforcement Learning for Legged Robots.pdf:application/pdf},
}
@article{hwangboLearningAgileDynamic2019s,
title = {Learning agile and dynamic motor skills for legged robots},
issn = {2470-9476},
doi = {10.1126/scirobotics.aau5872},
language = {en},
urldate = {2019-05-20},
journal = {Science Robotics},
author = {Hwangbo, Jemin and Lee, Joonho and Dosovitskiy, Alexey and Bellicoso, Dario and Tsounis, Vassilios and Koltun, Vladlen and Hutter, Marco},
month = jan,
year = {2019},
file = {Hwangbo et al_2019_Learning agile and dynamic motor skills for legged robots.pdf:/home/dferigo/Zotero/storage/GDPEWKB2/Hwangbo et al_2019_Learning agile and dynamic motor skills for legged robots.pdf:application/pdf},
}
@article{tsounis_deepgait_2020,
title = {{DeepGait}: {Planning} and {Control} of {Quadrupedal} {Gaits} using {Deep} {Reinforcement} {Learning}},
shorttitle = {{DeepGait}},
url = {http://arxiv.org/abs/1909.08399},
abstract = {This paper addresses the problem of legged locomotion in non-flat terrain. As legged robots such as quadrupeds are to be deployed in terrains with geometries which are difficult to model and predict, the need arises to equip them with the capability to generalize well to unforeseen situations. In this work, we propose a novel technique for training neural-network policies for terrain-aware locomotion, which combines state-of-the-art methods for model-based motion planning and reinforcement learning. Our approach is centered on formulating Markov decision processes using the evaluation of dynamic feasibility criteria in place of physical simulation. We thus employ policy-gradient methods to independently train policies which respectively plan and execute foothold and base motions in 3D environments using both proprioceptive and exteroceptive measurements. We apply our method within a challenging suite of simulated terrain scenarios which contain features such as narrow bridges, gaps and stepping-stones, and train policies which succeed in locomoting effectively in all cases.},
urldate = {2020-07-28},
journal = {arXiv:1909.08399 [cs]},
author = {Tsounis, Vassilios and Alge, Mitja and Lee, Joonho and Farshidian, Farbod and Hutter, Marco},
month = jan,
year = {2020},