WEKO3
アイテム
{"_buckets": {"deposit": "9ade34d5-b3ac-4dc1-9645-b98bc5b97787"}, "_deposit": {"created_by": 3, "id": "3710", "owners": [3], "pid": {"revision_id": 0, "type": "depid", "value": "3710"}, "status": "published"}, "_oai": {"id": "oai:tokyo-metro-u.repo.nii.ac.jp:00003710", "sets": ["624"]}, "author_link": ["10847", "10842", "10844", "10846", "10845", "10841", "10848", "10843", "10840"], "item_6_alternative_title_19": {"attribute_name": "その他のタイトル", "attribute_value_mlt": [{"subitem_alternative_title": "Parallel Reinforcement Learning Systems Using Exploration Agents"}]}, "item_6_biblio_info_7": {"attribute_name": "書誌情報", "attribute_value_mlt": [{"bibliographicIssueDates": {"bibliographicIssueDate": "2008-03-25", "bibliographicIssueDateType": "Issued"}, "bibliographicIssueNumber": "739", "bibliographicPageEnd": "701", "bibliographicPageStart": "692", "bibliographicVolumeNumber": "74", "bibliographic_titles": [{"bibliographic_title": "日本機械学會論文集. C編"}]}]}, "item_6_creator_2": {"attribute_name": "著者(ヨミ)", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "タテヤマ, タケシ"}], "nameIdentifiers": [{"nameIdentifier": "10843", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "カワタ, セイイチ"}], "nameIdentifiers": [{"nameIdentifier": "10844", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "シマムラ, ヨシキ"}], "nameIdentifiers": [{"nameIdentifier": "10845", "nameIdentifierScheme": "WEKO"}]}]}, "item_6_creator_3": {"attribute_name": "著者別名", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "Tateyama, Takeshi"}], "nameIdentifiers": [{"nameIdentifier": "10846", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Kawata, Seiichi"}], "nameIdentifiers": [{"nameIdentifier": "10847", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "Shimomura, Yoshiki"}], "nameIdentifiers": [{"nameIdentifier": "10848", "nameIdentifierScheme": "WEKO"}]}]}, "item_6_description_4": {"attribute_name": "抄録", "attribute_value_mlt": [{"subitem_description": "We propose a new strategy for parallel reinforcement learning ; using this strategy, the optimal value function and policy can be constructed more quickly than by using traditional strategies. We define two types of agents : the exploitation agents and the exploration agents. The exploitation agents select actions mainly for exploitation, and the exploration agents concentrate on exploration using the extended k-certainty exploration method. These agents learn in the same environment in parallel and combine each value function periodically. By using this strategy, the construction of the optimal value function is expected, and the optimal actions can be selected by the exploitation agents quickly. The experimental results of the mobile robot simulation showed the availability of our method.", "subitem_description_type": "Abstract"}]}, "item_6_publisher_33": {"attribute_name": "出版者", "attribute_value_mlt": [{"subitem_publisher": "社団法人 日本機械学会"}]}, "item_6_relation_46": {"attribute_name": "異版である", "attribute_value_mlt": [{"subitem_relation_type": "isVersionOf", "subitem_relation_type_id": {"subitem_relation_type_id_text": "http://ci.nii.ac.jp/naid/110006643686", "subitem_relation_type_select": "URI"}}]}, "item_6_rights_13": {"attribute_name": "権利", "attribute_value_mlt": [{"subitem_rights": "社団法人日本機械学会"}, {"subitem_rights": "本文データは学協会の許諾に基づきCiNiiから複製したものである"}]}, "item_6_source_id_10": {"attribute_name": "書誌レコードID", "attribute_value_mlt": [{"subitem_source_identifier": "AN00187463", "subitem_source_identifier_type": "NCID"}]}, "item_6_source_id_8": {"attribute_name": "ISSN", "attribute_value_mlt": [{"subitem_source_identifier": "03875024", "subitem_source_identifier_type": "ISSN"}]}, "item_6_version_type_16": {"attribute_name": "著者版フラグ", "attribute_value_mlt": [{"subitem_version_resource": "http://purl.org/coar/version/c_970fb48d4fbd8a85", "subitem_version_type": "VoR"}]}, "item_creator": {"attribute_name": "著者", "attribute_type": "creator", "attribute_value_mlt": [{"creatorNames": [{"creatorName": "舘山, 武史"}], "nameIdentifiers": [{"nameIdentifier": "10840", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "川田, 誠一"}], "nameIdentifiers": [{"nameIdentifier": "10841", "nameIdentifierScheme": "WEKO"}]}, {"creatorNames": [{"creatorName": "下村, 芳樹"}], "nameIdentifiers": [{"nameIdentifier": "10842", "nameIdentifierScheme": "WEKO"}]}]}, "item_files": {"attribute_name": "ファイル情報", "attribute_type": "file", "attribute_value_mlt": [{"accessrole": "open_date", "date": [{"dateType": "Available", "dateValue": "2016-08-04"}], "displaytype": "detail", "download_preview_message": "", "file_order": 0, "filename": "10153-001.pdf", "filesize": [{"value": "1.2 MB"}], "format": "application/pdf", "future_date_message": "", "is_thumbnail": false, "licensetype": "license_free", "mimetype": "application/pdf", "size": 1200000.0, "url": {"label": "10153-001.pdf", "url": "https://tokyo-metro-u.repo.nii.ac.jp/record/3710/files/10153-001.pdf"}, "version_id": "6feb3650-2f99-4aa6-9e36-5ed731fb89c9"}]}, "item_keyword": {"attribute_name": "キーワード", "attribute_value_mlt": [{"subitem_subject": "Parallel Reinforcement Learning", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Q-Learning", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Extended k-certainty Explotion Method", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Policy Iteration", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Exploration-Exploitation Dilemma", "subitem_subject_scheme": "Other"}, {"subitem_subject": "Dyna-Q", "subitem_subject_scheme": "Other"}]}, "item_language": {"attribute_name": "言語", "attribute_value_mlt": [{"subitem_language": "jpn"}]}, "item_resource_type": {"attribute_name": "資源タイプ", "attribute_value_mlt": [{"resourcetype": "journal article", "resourceuri": "http://purl.org/coar/resource_type/c_6501"}]}, "item_title": "探索エージェントを導入した学習経験を共有するマルチエージェント強化学習システムの提案", "item_titles": {"attribute_name": "タイトル", "attribute_value_mlt": [{"subitem_title": "探索エージェントを導入した学習経験を共有するマルチエージェント強化学習システムの提案"}]}, "item_type_id": "6", "owner": "3", "path": ["624"], "permalink_uri": "http://hdl.handle.net/10748/4040", "pubdate": {"attribute_name": "公開日", "attribute_value": "2011-03-07"}, "publish_date": "2011-03-07", "publish_status": "0", "recid": "3710", "relation": {}, "relation_version_is_last": true, "title": ["探索エージェントを導入した学習経験を共有するマルチエージェント強化学習システムの提案"], "weko_shared_id": -1}
探索エージェントを導入した学習経験を共有するマルチエージェント強化学習システムの提案
http://hdl.handle.net/10748/4040
http://hdl.handle.net/10748/4040a11c2561-cbb2-4d29-ad39-22a8ce64be66
名前 / ファイル | ライセンス | アクション |
---|---|---|
10153-001.pdf (1.2 MB)
|
|
Item type | 学術雑誌論文 / Journal Article(1) | |||||
---|---|---|---|---|---|---|
公開日 | 2011-03-07 | |||||
タイトル | ||||||
タイトル | 探索エージェントを導入した学習経験を共有するマルチエージェント強化学習システムの提案 | |||||
言語 | ||||||
言語 | jpn | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Parallel Reinforcement Learning | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Q-Learning | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Extended k-certainty Explotion Method | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Policy Iteration | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Exploration-Exploitation Dilemma | |||||
キーワード | ||||||
主題Scheme | Other | |||||
主題 | Dyna-Q | |||||
資源タイプ | ||||||
資源タイプ識別子 | http://purl.org/coar/resource_type/c_6501 | |||||
資源タイプ | journal article | |||||
著者 |
舘山, 武史
× 舘山, 武史× 川田, 誠一× 下村, 芳樹 |
|||||
著者(ヨミ) |
タテヤマ, タケシ
× タテヤマ, タケシ× カワタ, セイイチ× シマムラ, ヨシキ |
|||||
著者別名 |
Tateyama, Takeshi
× Tateyama, Takeshi× Kawata, Seiichi× Shimomura, Yoshiki |
|||||
抄録 | ||||||
内容記述タイプ | Abstract | |||||
内容記述 | We propose a new strategy for parallel reinforcement learning ; using this strategy, the optimal value function and policy can be constructed more quickly than by using traditional strategies. We define two types of agents : the exploitation agents and the exploration agents. The exploitation agents select actions mainly for exploitation, and the exploration agents concentrate on exploration using the extended k-certainty exploration method. These agents learn in the same environment in parallel and combine each value function periodically. By using this strategy, the construction of the optimal value function is expected, and the optimal actions can be selected by the exploitation agents quickly. The experimental results of the mobile robot simulation showed the availability of our method. | |||||
書誌情報 |
日本機械学會論文集. C編 巻 74, 号 739, p. 692-701, 発行日 2008-03-25 |
|||||
ISSN | ||||||
収録物識別子タイプ | ISSN | |||||
収録物識別子 | 03875024 | |||||
書誌レコードID | ||||||
収録物識別子タイプ | NCID | |||||
収録物識別子 | AN00187463 | |||||
権利 | ||||||
権利情報 | 社団法人日本機械学会 | |||||
権利 | ||||||
権利情報 | 本文データは学協会の許諾に基づきCiNiiから複製したものである | |||||
著者版フラグ | ||||||
出版タイプ | VoR | |||||
出版タイプResource | http://purl.org/coar/version/c_970fb48d4fbd8a85 | |||||
その他のタイトル | ||||||
その他のタイトル | Parallel Reinforcement Learning Systems Using Exploration Agents | |||||
出版者 | ||||||
出版者 | 社団法人 日本機械学会 | |||||
異版である | ||||||
関連タイプ | isVersionOf | |||||
識別子タイプ | URI | |||||
関連識別子 | http://ci.nii.ac.jp/naid/110006643686 |