{"created":"2023-06-26T11:00:56.977089+00:00","id":1545,"links":{},"metadata":{"_buckets":{"deposit":"7834fe9c-71c6-4832-ba67-5cc64e7e684b"},"_deposit":{"created_by":29,"id":"1545","owners":[29],"pid":{"revision_id":0,"type":"depid","value":"1545"},"status":"published"},"_oai":{"id":"oai:oist.repo.nii.ac.jp:00001545","sets":["6:26","6:78"]},"author_link":["9352","9353","9354"],"item_10001_biblio_info_7":{"attribute_name":"Bibliographic Information","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2020-06-06","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"162","bibliographicPageStart":"149","bibliographicVolumeNumber":"129","bibliographic_titles":[{},{"bibliographic_title":"Neural Networks","bibliographic_titleLang":"en"}]}]},"item_10001_creator_3":{"attribute_name":"Author","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Han, Dongqi"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Doya, Kenji"}],"nameIdentifiers":[{}]},{"creatorNames":[{"creatorName":"Tani, Jun"}],"nameIdentifiers":[{}]}]},"item_10001_description_5":{"attribute_name":"Abstract","attribute_value_mlt":[{"subitem_description":"Recurrent neural networks (RNNs) for reinforcement learning (RL) have shown distinct advantages, e.g., solving memory-dependent tasks and meta-learning. However, little effort has been spent on improving RNN architectures and on understanding the underlying neural mechanisms for performance gain. In this paper, we propose a novel, multiple-timescale, stochastic RNN for RL. Empirical results show that the network can autonomously learn to abstract sub-goals and can self-develop an action hierarchy using internal dynamics in a challenging continuous control task. Furthermore, we show that the self-developed compositionality of the network enhances faster re-learning when adapting to a new task that is a re-composition of previously learned sub-goals, than when starting from scratch. We also found that improved performance can be achieved when neural activities are subject to stochastic rather than deterministic dynamics.","subitem_description_type":"Other"}]},"item_10001_publisher_8":{"attribute_name":"Publisher","attribute_value_mlt":[{"subitem_publisher":"Elsevier"}]},"item_10001_relation_14":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type":"isIdenticalTo","subitem_relation_type_id":{"subitem_relation_type_id_text":"info:doi/10.1016/j.neunet.2020.06.002","subitem_relation_type_select":"DOI"}}]},"item_10001_relation_16":{"attribute_name":"情報源","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"https://creativecommons.org/licenses/by-nc-nd/4.0/"}]}]},"item_10001_relation_17":{"attribute_name":"Related site","attribute_value_mlt":[{"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://doi.org/10.1016/j.neunet.2020.06.002","subitem_relation_type_select":"DOI"}}]},"item_10001_rights_15":{"attribute_name":"Rights","attribute_value_mlt":[{"subitem_rights":"© 2020 The Authors."}]},"item_10001_source_id_9":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"0893-6080","subitem_source_identifier_type":"ISSN"}]},"item_10001_version_type_20":{"attribute_name":"Author's flag","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2020-06-12"}],"displaytype":"detail","filename":"Han-2020-Self-organization of action hierarchy.pdf","filesize":[{"value":"2.5 MB"}],"format":"application/pdf","license_note":"Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International(https://creativecommons.org/licenses/by-nc-nd/4.0/)","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"Han-2020-Self-organization of action hierarchy","url":"https://oist.repo.nii.ac.jp/record/1545/files/Han-2020-Self-organization of action hierarchy.pdf"},"version_id":"e73da711-69a9-4871-8a9b-768ee3eee086"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Self-organization of action hierarchy and compositionality by reinforcement learning with recurrent neural networks","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Self-organization of action hierarchy and compositionality by reinforcement learning with recurrent neural networks","subitem_title_language":"en"}]},"item_type_id":"10001","owner":"29","path":["26","78"],"pubdate":{"attribute_name":"公開日","attribute_value":"2020-06-12"},"publish_date":"2020-06-12","publish_status":"0","recid":"1545","relation_version_is_last":true,"title":["Self-organization of action hierarchy and compositionality by reinforcement learning with recurrent neural networks"],"weko_creator_id":"29","weko_shared_id":29},"updated":"2023-06-26T11:48:30.512207+00:00"}