Matches in SemOpenAlex for { <https://semopenalex.org/work/W2895557109> ?p ?o ?g. }
- W2895557109 endingPage "455" @default.
- W2895557109 startingPage "438" @default.
- W2895557109 abstract "Policy search algorithms have facilitated application of Reinforcement Learning (RL) to dynamic systems, such as control of robots. Many policy search algorithms are based on the policy gradient, and thus may suffer from slow convergence or local optima complications. In this paper, we take a Bayesian approach to policy search under RL paradigm, for the problem of controlling a discrete time Markov decision process with continuous state and action spaces and with a multiplicative reward structure. For this purpose, we assume a prior over policy parameters and aim for the ‘posterior’ distribution where the ‘likelihood’ is the expected reward. We propound a Markov chain Monte Carlo algorithm as a method of generating samples for policy parameters from this posterior. The proposed algorithm is compared with certain well-known policy gradient-based RL methods and exhibits more appropriate performance in terms of time response and convergence rate, when applied to a nonlinear model of a Cart-Pole benchmark." @default.
- W2895557109 created "2018-10-12" @default.
- W2895557109 creator A5000375496 @default.
- W2895557109 creator A5015003622 @default.
- W2895557109 creator A5016930658 @default.
- W2895557109 date "2018-01-01" @default.
- W2895557109 modified "2023-10-16" @default.
- W2895557109 title "A Markov chain Monte Carlo algorithm for Bayesian policy search" @default.
- W2895557109 cites W1483307070 @default.
- W2895557109 cites W1501586228 @default.
- W2895557109 cites W1585575029 @default.
- W2895557109 cites W1588514005 @default.
- W2895557109 cites W1970789124 @default.
- W2895557109 cites W1988071341 @default.
- W2895557109 cites W2038885294 @default.
- W2895557109 cites W2073995413 @default.
- W2895557109 cites W2080039641 @default.
- W2895557109 cites W2088413745 @default.
- W2895557109 cites W2091860746 @default.
- W2895557109 cites W2106706098 @default.
- W2895557109 cites W2107662876 @default.
- W2895557109 cites W2119717200 @default.
- W2895557109 cites W2233997862 @default.
- W2895557109 cites W3103182070 @default.
- W2895557109 cites W3125893104 @default.
- W2895557109 cites W4233487859 @default.
- W2895557109 cites W9583197 @default.
- W2895557109 doi "https://doi.org/10.1080/21642583.2018.1528483" @default.
- W2895557109 hasPublicationYear "2018" @default.
- W2895557109 type Work @default.
- W2895557109 sameAs 2895557109 @default.
- W2895557109 citedByCount "5" @default.
- W2895557109 countsByYear W28955571092019 @default.
- W2895557109 countsByYear W28955571092021 @default.
- W2895557109 countsByYear W28955571092022 @default.
- W2895557109 countsByYear W28955571092023 @default.
- W2895557109 crossrefType "journal-article" @default.
- W2895557109 hasAuthorship W2895557109A5000375496 @default.
- W2895557109 hasAuthorship W2895557109A5015003622 @default.
- W2895557109 hasAuthorship W2895557109A5016930658 @default.
- W2895557109 hasBestOaLocation W28955571091 @default.
- W2895557109 hasConcept C105795698 @default.
- W2895557109 hasConcept C106189395 @default.
- W2895557109 hasConcept C107673813 @default.
- W2895557109 hasConcept C111350023 @default.
- W2895557109 hasConcept C11413529 @default.
- W2895557109 hasConcept C119857082 @default.
- W2895557109 hasConcept C126255220 @default.
- W2895557109 hasConcept C127162648 @default.
- W2895557109 hasConcept C13280743 @default.
- W2895557109 hasConcept C154945302 @default.
- W2895557109 hasConcept C159886148 @default.
- W2895557109 hasConcept C162324750 @default.
- W2895557109 hasConcept C185798385 @default.
- W2895557109 hasConcept C205649164 @default.
- W2895557109 hasConcept C2777303404 @default.
- W2895557109 hasConcept C31258907 @default.
- W2895557109 hasConcept C33923547 @default.
- W2895557109 hasConcept C41008148 @default.
- W2895557109 hasConcept C50522688 @default.
- W2895557109 hasConcept C57830394 @default.
- W2895557109 hasConcept C57869625 @default.
- W2895557109 hasConcept C97541855 @default.
- W2895557109 hasConcept C98763669 @default.
- W2895557109 hasConceptScore W2895557109C105795698 @default.
- W2895557109 hasConceptScore W2895557109C106189395 @default.
- W2895557109 hasConceptScore W2895557109C107673813 @default.
- W2895557109 hasConceptScore W2895557109C111350023 @default.
- W2895557109 hasConceptScore W2895557109C11413529 @default.
- W2895557109 hasConceptScore W2895557109C119857082 @default.
- W2895557109 hasConceptScore W2895557109C126255220 @default.
- W2895557109 hasConceptScore W2895557109C127162648 @default.
- W2895557109 hasConceptScore W2895557109C13280743 @default.
- W2895557109 hasConceptScore W2895557109C154945302 @default.
- W2895557109 hasConceptScore W2895557109C159886148 @default.
- W2895557109 hasConceptScore W2895557109C162324750 @default.
- W2895557109 hasConceptScore W2895557109C185798385 @default.
- W2895557109 hasConceptScore W2895557109C205649164 @default.
- W2895557109 hasConceptScore W2895557109C2777303404 @default.
- W2895557109 hasConceptScore W2895557109C31258907 @default.
- W2895557109 hasConceptScore W2895557109C33923547 @default.
- W2895557109 hasConceptScore W2895557109C41008148 @default.
- W2895557109 hasConceptScore W2895557109C50522688 @default.
- W2895557109 hasConceptScore W2895557109C57830394 @default.
- W2895557109 hasConceptScore W2895557109C57869625 @default.
- W2895557109 hasConceptScore W2895557109C97541855 @default.
- W2895557109 hasConceptScore W2895557109C98763669 @default.
- W2895557109 hasIssue "1" @default.
- W2895557109 hasLocation W28955571091 @default.
- W2895557109 hasLocation W28955571092 @default.
- W2895557109 hasOpenAccess W2895557109 @default.
- W2895557109 hasPrimaryLocation W28955571091 @default.
- W2895557109 hasRelatedWork W1549519846 @default.
- W2895557109 hasRelatedWork W1626977535 @default.
- W2895557109 hasRelatedWork W1662842982 @default.
- W2895557109 hasRelatedWork W1985560493 @default.
- W2895557109 hasRelatedWork W2128702080 @default.
- W2895557109 hasRelatedWork W2145363145 @default.