@COMMENT This file was generated by bib2html.pl version 0.94 @COMMENT written by Patrick Riley @COMMENT This file came from Bikramjit Banerjee's publication pages at @COMMENT http://www.cs.usm.edu/~banerjee/publications/class_type.html @InCollection{Banerjee06:Unifying, author = {B. Banerjee and J. Peng}, title = {Unifying Convergence and No-regret in Multiagent Learning}, booktitle = {Learning and Adaption in Multi-Agent Systems}, pages = {100 - 114}, publisher = {Springer-Verlag}, year = {2006}, editor = {Karl Tuyls and Pieter Jan 't Hoen and Sandip Sen and Katja Verbeeck}, volume = {LNAI 3898}, abstract = { We present a new multiagent learning algorithm, RVσ(t) , that builds on an earlier version, ReDVaLeR . ReDVaLeR could guarantee (a) convergence to best response against stationary opponents and either (b) constant bounded re- gret against arbitrary opponents, or (c) convergence to Nash equilibrium policies in self-play. But it makes two strong assumptions: (1) that it can distinguish be- tween self-play and otherwise non-stationary agents and (2) that all agents know their portions of the same equilibrium in self-play. We show that the adaptive learnng rate of RVσ(t) that is explicitly dependent on time can overcome both of these assumptions. Consequently, RVσ(t) theoretically achieves (a’) convergence to near-best response against eventually stationary opponents, (b’) no-regret pay- off against arbitrary opponents and (c’) convergence to some Nash equilibrium policy in some classes of games, in self-play. Each agent now needs to know its portion of any equilibrium, and does not need to distinguish among non- stationary opponent types. This is also the first successful attempt (to our knowl- edge) at convergence of a no-regret algorithm in the Shapley game. } }