@article{0de6f710b227479eb4c3bfc46233a0a0,
title = "Reinforcement learning and non-zero-sum game output regulation for multi-player linear uncertain systems",
abstract = "This paper studies the non-zero-sum game output regulation problem (GORP) for a class of continuous-time multi-player linear systems. Without the knowledge of state and input matrices, the Nash equilibrium solution, N-tuple of feedback control policy, is learned through online data collected along the system trajectories. A key strategy is, for the first time, to combine techniques from reinforcement learning (RL), differential game theory, and output regulation for data-driven control design. Different from the existing literature of adaptive optimal output regulation, the feedforward matrices are considered nontrivial. Theoretical analysis shows the disturbance rejection and tracking ability of the closed-loop system. Simulation results demonstrate the efficacy of the developed data-driven control approach.",
keywords = "Adaptive optimal control, Data-Driven control, Game theory, Output regulation, Reinforcement learning (RL)",
author = "Adedapo Odekunle and Weinan Gao and Masoud Davari and Jiang, {Zhong Ping}",
note = "Funding Information: This paper proposes a novel reinforcement-learning-based approach to the output regulation problem of linear systems with non-zero-sum differential games. A systematic data-driven control scheme is proposed for designing asymptotic trackers with the guaranteed rejection of non-vanishing disturbances. Numerical simulation results show the effectiveness of the proposed learning approach. Adedapo Odekunle received the B.Sc. degree in computer engineering from University of Lagos, Lagos, Nigeria, in 2010, the M.Sc. degree in electrical engineering from Georgia Southern University, Statesboro, Georgia, in 2019 with a research concentration in reinforcement learning, adaptive dynamic programming, optimal control, adaptive control, connected and autonomous vehicles and output regulation theory. He is currently working with Intel corporation as a Yield analysis Engineer. Weinan Gao received the B.Sc. degree in Automation from Northeastern University, Shenyang, China, in 2011, the M.Sc. degree in Control Theory and Control Engineering from Northeastern University, Shenyang, China, in 2013, and the Ph.D. degree in Electrical Engineering from New York University, in 2017. He is currently a tenure-track Assistant Professor of Electrical and Computer Engineering with Allen E. Paulson College of Engineering and Computing, Georgia Southern University. His research interests include reinforcement learning, adaptive dynamic programming (ADP), optimal control, cooperative adaptive cruise control (CACC), intelligent transportation systems, sampled-data control systems, and output regulation theory. He is the recipient of the best paper award in IEEE International Conference on Real-time Computing and Robotics (RCAR) in 2018, and David Goodman Research Award at New York University in 2019. Dr. Gao serves as an Associate Editor of Neurocomputing, a member of Editorial Board of Neural Computing and Applications, and a technical committee member in IEEE Control Systems Society on Nonlinear Systems and Control. He has also served as a web chair in IEEE INFOCOM MobiSec Workshop in 2018. Masoud Davari was born in Isfahan, Iran, on September 14, 1985. He received the B.Sc. degree (with Distinction) in electrical engineering-power from the Isfahan University of Technology, Isfahan, Iran, in September 2007, the M.Sc. degree (with Distinction) in electrical engineering-power from Amirkabir University of Technology-Tehran Polytechnic, Tehran, Iran, in January 2010, and the Ph.D. degree in electrical engineering-energy systems from the University of Alberta, Edmonton, AB, Canada, in January 2016. He has been working with Iran{\textquoteright}s Grid Secure Operation Research Center and Iran{\textquoteright}s Electric Power Research Institute (EPRI) in Tehran, Iran, from January 2010 to December 2011. From April 2015 to June 2017, he has been collaborating with Quanta-Technology Company in the field of the dynamic interaction of renewable energy systems with smart grids as well as control, protection, and automation of microgrids as a Senior R & D Specialist and Senior Consultant. Afterward, since July 2017, he has joined the Department of Electrical and Computer Engineering in Allen E. Paulson College of Engineering and Computing at Georgia Southern University, Statesboro, GA, USA as a tenure-track Assistant Professor faculty member. His research interest includes the dynamics, controls, and protections of different types of power electronic converters (which are employed in the hybrid ac/dc smart grids) and hardware-in-the-loop (HIL) testing of modernized power systems. Dr. Davari is an invited member of the Golden Key International Honour Society. He is an active member and a chapter lead (for Chapter 3) in the IEEE WG P2004, a newly established IEEE working group on the Hardware-In-the-Loop (HIL) simulation for IEEE Standards Association, from June 2017 until now. He served as the chair of the Literature Review Subgroup of Standards for IEEE Standards Association from April 2014 to October 2015. He has developed and implemented several experimental test rigs for both research universities and the industry. He is also the author, the invited reviewer, and the invited speaker of several IEEE Transactions and journals, IET journals, Energies journal, various conferences, and diverse universities and places from different societies. Zhong-Ping Jiang received the M.Sc. degree in statistics from the University of Paris XI, France, in 1989, and the Ph.D. degree in automatic control and mathematics from the Ecole des Mines de Paris (now, called ParisTech-Mines), France, in 1993, under the direction of Prof. Laurent Praly. Currently, he is a Professor of Electrical and Computer Engineering at the Tandon School of Engineering, New York University. His main research interests include stability theory, robust/adaptive/distributed nonlinear control, robust adaptive dynamic programming, learning-based control and their applications to information, mechanical and biological systems. In these fields, he has written four books and is author/co-author of over 400 peer-reviewed journal and conference papers. Dr. Jiang has served as Senior Editor and Associate Editor for numerous journals. Prof. Jiang is a Fellow of the IEEE, a Fellow of the IFAC and a Clarivate Analytics Researcher. Funding Information: This work has been partly supported by the U.S. National Science Foundation grants ECCS-1903781, ECCS-1808279, and ECCS-1902787. The material in this paper was not presented at any conference. This paper was recommended for publication in revised form by Associate Editor Kyriakos G. Vamvoudakis under the direction of Editor Miroslav Krstic. Publisher Copyright: {\textcopyright} 2019 Elsevier Ltd",
year = "2020",
month = feb,
doi = "10.1016/j.automatica.2019.108672",
language = "English (US)",
volume = "112",
journal = "Automatica",
issn = "0005-1098",
publisher = "Elsevier Limited",
}