1k steps
An implementation of Phasic Policy Gradient, a proposed improvement on top of Proximal Policy Optimization (PPO), in Pytorch. It will be my very first project in Reinforcement Learning.
$ pip install -r requirements.txt
$ python train.py --render
@misc{cobbe2020phasic,
title = {Phasic Policy Gradient},
author = {Karl Cobbe and Jacob Hilton and Oleg Klimov and John Schulman},
year = {2020},
eprint = {2009.04416},
archivePrefix = {arXiv},
primaryClass = {cs.LG}
}
@article{Nauman2024BiggerRO,
title = {Bigger, Regularized, Optimistic: scaling for compute and sample-efficient continuous control},
author = {Michal Nauman and Mateusz Ostaszewski and Krzysztof Jankowski and Piotr Milo's and Marek Cygan},
journal = {ArXiv},
year = {2024},
volume = {abs/2405.16158},
url = {https://api.semanticscholar.org/CorpusID:270063045}
}
@article{Zhang2024ReLU2WD,
title = {ReLU2 Wins: Discovering Efficient Activation Functions for Sparse LLMs},
author = {Zhengyan Zhang and Yixin Song and Guanghui Yu and Xu Han and Yankai Lin and Chaojun Xiao and Chenyang Song and Zhiyuan Liu and Zeyu Mi and Maosong Sun},
journal = {ArXiv},
year = {2024},
volume = {abs/2402.03804},
url = {https://api.semanticscholar.org/CorpusID:267499856}
}
@article{Shleifer2021NormFormerIT,
title = {NormFormer: Improved Transformer Pretraining with Extra Normalization},
author = {Sam Shleifer and Jason Weston and Myle Ott},
journal = {ArXiv},
year = {2021},
volume = {abs/2110.09456},
url = {https://api.semanticscholar.org/CorpusID:239016890}
}