{"created":"2023-06-26T11:00:11.511683+00:00","id":415,"links":{},"metadata":{"_buckets":{"deposit":"55ecd599-daaa-4b21-803d-7b9de1f08170"},"_deposit":{"created_by":22,"id":"415","owners":[22],"pid":{"revision_id":0,"type":"depid","value":"415"},"status":"published"},"_oai":{"id":"oai:oist.repo.nii.ac.jp:00000415","sets":["7:84"]},"author_link":[],"item_10006_creator_3":{"attribute_name":"Author","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"Reinke, Chris","creatorNameLang":"en"}]}]},"item_10006_date_granted_11":{"attribute_name":"Degree Conferral Date","attribute_value_mlt":[{"subitem_dategranted":"2018-06-30"}]},"item_10006_degree_grantor_9":{"attribute_name":"Degree Conferrral Institution","attribute_value_mlt":[{"subitem_degreegrantor":[{"subitem_degreegrantor_name":"Okinawa Institute of Science and Technology Graduate University"}],"subitem_degreegrantor_identifier":[{"subitem_degreegrantor_identifier_name":"38005","subitem_degreegrantor_identifier_scheme":"kakenhi"}]}]},"item_10006_degree_name_8":{"attribute_name":"Degree","attribute_value_mlt":[{"subitem_degreename":"Doctor of Philosophy"}]},"item_10006_description_7":{"attribute_name":"Abstract","attribute_value_mlt":[{"subitem_description":"Reinforcement learning allows artificial agents to learn complex tasks, such as playing Go on an expert level. Still, unlike humans, artificial agents lack the ability to adapt learned behavior to task changes, or to new objectives, such as to capture as many opponent pieces within a given number of moves, instead of simply winning. The Independent Gamma-Ensemble (IGE), a new brain-inspired framework, allows such adaptations. It is composed of several Q-learning modules, each with a different discount factor. The off-policy nature of Q-learning allows modules to learn several policies in parallel, each representing a different solution for the payoff between a high reward sum and the time to gain it. The IGE adapts to new task conditions by switching between its policies (transfer learning). It can also decode the expected reward sum and the required time for each policy, allowing it to immediately select the most appropriate policy for a new task objective (zero-shot learning). Additionally, this allows to optimize the average reward in discrete MDPs where non-zero reward is only given in goal states. The convergence to the optimal policy can be proven for such MDPs. The modular structure behind the IGE can be combined with many reinforcement learning algorithms and applied to various tasks, allowing to improve the adaptive abilities of artificial agents in general.","subitem_description_type":"Other"}]},"item_10006_dissertation_number_12":{"attribute_name":"Degree Referral Number","attribute_value_mlt":[{"subitem_dissertationnumber":"甲第17号"}]},"item_10006_identifier_registration":{"attribute_name":"ID登録","attribute_value_mlt":[{"subitem_identifier_reg_text":"10.15102/1394.00000369","subitem_identifier_reg_type":"JaLC"}]},"item_10006_rights_13":{"attribute_name":"Copyright Information","attribute_value_mlt":[{"subitem_rights":"© 2018 The Author."}]},"item_10006_text_24":{"attribute_name":"Exam Date","attribute_value_mlt":[{"subitem_text_value":"2018/05/29"}]},"item_10006_version_type_18":{"attribute_name":"Version Format","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_access_right":{"attribute_name":"アクセス権","attribute_value_mlt":[{"subitem_access_right":"open access","subitem_access_right_uri":"http://purl.org/coar/access_right/c_abf2"}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2018-07-04"}],"displaytype":"detail","filename":"Full-Text.pdf","filesize":[{"value":"14.1 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"Full-Text","objectType":"fulltext","url":"https://oist.repo.nii.ac.jp/record/415/files/Full-Text.pdf"},"version_id":"7daae70b-dcf1-462e-a3b9-e541001f0c98"},{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2018-07-04"}],"displaytype":"detail","filename":"Final Exam Abstract.pdf","filesize":[{"value":"42.9 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"Final Exam Abstract","objectType":"abstract","url":"https://oist.repo.nii.ac.jp/record/415/files/Final Exam Abstract.pdf"},"version_id":"f541ba84-f85f-42cc-a73a-0915c8a46d25"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"doctoral thesis","resourceuri":"http://purl.org/coar/resource_type/c_db06"}]},"item_title":"ガンマアンサンブル:多様な時間割引モジュールによる適応的強化学習","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"ガンマアンサンブル:多様な時間割引モジュールによる適応的強化学習","subitem_title_language":"ja"},{"subitem_title":"The Gamma-Ensemble - Adaptive Reinforcement Learning via Modular Discounting","subitem_title_language":"en"}]},"item_type_id":"10006","owner":"22","path":["84"],"pubdate":{"attribute_name":"PubDate","attribute_value":"2018-07-04"},"publish_date":"2018-07-04","publish_status":"0","recid":"415","relation_version_is_last":true,"title":["ガンマアンサンブル:多様な時間割引モジュールによる適応的強化学習"],"weko_creator_id":"22","weko_shared_id":-1},"updated":"2023-08-28T05:52:43.654090+00:00"}