Matches in SemOpenAlex for { <https://semopenalex.org/work/W2895095767> ?p ?o ?g. }
- W2895095767 endingPage "59" @default.
- W2895095767 startingPage "41" @default.
- W2895095767 abstract "Delayed Q-learning is an efficient model-free reinforcement-learning algorithm. This algorithm is guaranteed to converge in polynomial time to near optimal policies in Markov decision processes. However, Delayed Q-learning performs very poorly in some stochastic environments because it overestimates action values. Overestimated action values are caused by a positive bias that is a result of using the maximum value function to update the maximum expected action value. This paper applies the double-estimator method to Delayed Q-learning to construct a new algorithm called Double Delayed Q-learning (2D Q-learning). The 2D Q-learning was tested using the gambling game of roulette. The experimental results showed that 2D Q-learning converges to an optimal policy and that it performs better than Delayed Q-learning in some settings where Delayed Q-learning has a poor performance because of its large overestimation." @default.
- W2895095767 created "2018-10-12" @default.
- W2895095767 creator A5032488540 @default.
- W2895095767 creator A5079015282 @default.
- W2895095767 date "2018-09-15" @default.
- W2895095767 modified "2023-09-26" @default.
- W2895095767 title "Double Delayed Q-learning" @default.
- W2895095767 cites W1705315349 @default.
- W2895095767 cites W1861050369 @default.
- W2895095767 cites W1975174639 @default.
- W2895095767 cites W2015527784 @default.
- W2895095767 cites W2027591961 @default.
- W2895095767 cites W2072953419 @default.
- W2895095767 cites W2075832535 @default.
- W2895095767 cites W2115211925 @default.
- W2895095767 cites W2118686230 @default.
- W2895095767 cites W2120346334 @default.
- W2895095767 cites W2121863487 @default.
- W2895095767 cites W2123890518 @default.
- W2895095767 cites W2129670787 @default.
- W2895095767 cites W2146917784 @default.
- W2895095767 cites W2149166950 @default.
- W2895095767 cites W2153351685 @default.
- W2895095767 cites W2167489871 @default.
- W2895095767 cites W2214934461 @default.
- W2895095767 cites W2340989005 @default.
- W2895095767 cites W2344944957 @default.
- W2895095767 cites W2565952796 @default.
- W2895095767 cites W2595292627 @default.
- W2895095767 cites W2618289908 @default.
- W2895095767 cites W2701355419 @default.
- W2895095767 cites W2762849146 @default.
- W2895095767 cites W2766795553 @default.
- W2895095767 cites W2777065655 @default.
- W2895095767 cites W3011120880 @default.
- W2895095767 cites W952777547 @default.
- W2895095767 cites W1707466736 @default.
- W2895095767 cites W2168551972 @default.
- W2895095767 hasPublicationYear "2018" @default.
- W2895095767 type Work @default.
- W2895095767 sameAs 2895095767 @default.
- W2895095767 citedByCount "2" @default.
- W2895095767 countsByYear W28950957672019 @default.
- W2895095767 countsByYear W28950957672020 @default.
- W2895095767 crossrefType "journal-article" @default.
- W2895095767 hasAuthorship W2895095767A5032488540 @default.
- W2895095767 hasAuthorship W2895095767A5079015282 @default.
- W2895095767 hasConcept C105795698 @default.
- W2895095767 hasConcept C106189395 @default.
- W2895095767 hasConcept C119857082 @default.
- W2895095767 hasConcept C121332964 @default.
- W2895095767 hasConcept C126255220 @default.
- W2895095767 hasConcept C14646407 @default.
- W2895095767 hasConcept C154945302 @default.
- W2895095767 hasConcept C159886148 @default.
- W2895095767 hasConcept C185429906 @default.
- W2895095767 hasConcept C188116033 @default.
- W2895095767 hasConcept C195502155 @default.
- W2895095767 hasConcept C199360897 @default.
- W2895095767 hasConcept C2524010 @default.
- W2895095767 hasConcept C2776291640 @default.
- W2895095767 hasConcept C2780791683 @default.
- W2895095767 hasConcept C2780801425 @default.
- W2895095767 hasConcept C33923547 @default.
- W2895095767 hasConcept C41008148 @default.
- W2895095767 hasConcept C62520636 @default.
- W2895095767 hasConcept C97541855 @default.
- W2895095767 hasConceptScore W2895095767C105795698 @default.
- W2895095767 hasConceptScore W2895095767C106189395 @default.
- W2895095767 hasConceptScore W2895095767C119857082 @default.
- W2895095767 hasConceptScore W2895095767C121332964 @default.
- W2895095767 hasConceptScore W2895095767C126255220 @default.
- W2895095767 hasConceptScore W2895095767C14646407 @default.
- W2895095767 hasConceptScore W2895095767C154945302 @default.
- W2895095767 hasConceptScore W2895095767C159886148 @default.
- W2895095767 hasConceptScore W2895095767C185429906 @default.
- W2895095767 hasConceptScore W2895095767C188116033 @default.
- W2895095767 hasConceptScore W2895095767C195502155 @default.
- W2895095767 hasConceptScore W2895095767C199360897 @default.
- W2895095767 hasConceptScore W2895095767C2524010 @default.
- W2895095767 hasConceptScore W2895095767C2776291640 @default.
- W2895095767 hasConceptScore W2895095767C2780791683 @default.
- W2895095767 hasConceptScore W2895095767C2780801425 @default.
- W2895095767 hasConceptScore W2895095767C33923547 @default.
- W2895095767 hasConceptScore W2895095767C41008148 @default.
- W2895095767 hasConceptScore W2895095767C62520636 @default.
- W2895095767 hasConceptScore W2895095767C97541855 @default.
- W2895095767 hasIssue "2" @default.
- W2895095767 hasLocation W28950957671 @default.
- W2895095767 hasOpenAccess W2895095767 @default.
- W2895095767 hasPrimaryLocation W28950957671 @default.
- W2895095767 hasRelatedWork W114528961 @default.
- W2895095767 hasRelatedWork W1605318140 @default.
- W2895095767 hasRelatedWork W1812824539 @default.
- W2895095767 hasRelatedWork W1983486975 @default.
- W2895095767 hasRelatedWork W2129670787 @default.
- W2895095767 hasRelatedWork W2230868825 @default.
- W2895095767 hasRelatedWork W2340989005 @default.