@phdthesis{oai:oist.repo.nii.ac.jp:00002805,
 author = {Han, Dongqi},
 month = {2022-10-10, 2022-10-03},
 note = {The thesis aims to advance cognitive decision-making and motor control using reinforcement learning (RL) with stochastic recurrent neural networks (RNNs). RL is a computational framework to train an agent, such as a robot, to select the actions that maximize immediate or future rewards. Recently, RL has undergone rapid development by introducing artificial neural networks as function approximators. RL using neural networks, also known as deep RL, have shown super-human performance on a wide range of virtual and real-world tasks, such as games, robotic control, and manipulating nuclear fusion devices. There would not be such a success without the efforts of numerous researchers who developed and improved the deep RL algorithms. In particular, most of the works focus on designing or revising the RL objective functions by mathematical analysis and heuristic ideas. While the well-formulated loss functions are critical to the RL performance, relatively fewer efforts have been paid to developing and improving the architecture of the neural network models used in deep RL. The thesis discusses the benefits of using novel network architectures for deep RL. In particular, the thesis includes two of the authors’ original studies about developing novel stochastic RNN architectures for RL in partially observable environments. The first work proposes a novel, multiple-level, stochastic RNN model for solving tasks that require hierarchical control. It is shown that an action hierarchy, characterized by consistent representation for abstracted sub-goals in the higher level, self-develops during the learning in several challenging continuous robotic control tasks. The emerged action hierarchy is also observed to enable faster relearning when the sub-goals are recomposed. The second work introduces a variational RNN model for predicting state transitions in continuous robotic control tasks in which the environmental state is partially observable. By predicting subsequent observations, the models learn to represent the underlying states of the environment that are indispensable but not observable. A corresponding algorithm is proposed to facilitate efficient learning in partially observable environments. The proposed studies suggest that the performance of RL agents can be improved by adequate usage of stochastic RNNs structures, which provides novel insights for designing better model architectures for future deep RL studies.},
 school = {Okinawa Institute of Science and Technology Graduate University},
 title = {抽象化及び推論し行動計画を立案することが可能な認知脳型ロボットの構築に向けて：階層的で部分観測な環境での強化学習及び能動推論},
 year = {}
}