learning to control the linear quadratic regulator Benjamin Recht University of California, Berkeley
Collaborators Joint work with Sarah Dean, Horia Mania, Nikolai Matni, Max Simchowitz, and Stephen Tu.
trustable, scalable, predictable
What are the fundamental limits of learning systems that interact with the physical environment? How well must we understand a system in order to control it? •statistical learning theory theoretical •robust control theory foundations •core optimization
<latexit sha1_base64="DOx/ktybitgjChwuZWtodyh8jiA=">ACgHicbVFdSxtBFJ1s1apt/aiPvgwGIYKkuyK09ElaQR98UDQqJEu4O7lJLs7ObmfuFsPi7+hr/Vn+G2djBJN4cLhnPt9k1yT4zB8qgUfFhaXPi6vrH76/GVtfWPz67XLCquwpTKd2dsEHGoy2GJijbe5RUgTjTfJ3e9Kv/mL1lFmrniUY5zCwFCfFLCn4k5OXW50GIou73U36mEzHJucB9E1MXEzrubtbjTy1SRomGlwbl2FOYcl2CZlMaH1U7hMAd1BwNse2gReX46kf5K5nerKfWe+G5Zh9m1FC6twoTXxkCjx0s1pFvqe1C+7/iEsyecFo1EujfqElZ7I6geyRcV65AEoS35WqYZgQbE/1FSXce0c1dQm5X1hSGU9nGE137MFTzrkFMhUW5UnpLW8BOPkGQ2G/Kr6spXcOKYBsds/898we3PB/iHR7PnwfVBMwqb0cVh/ejX5DXLYlvsiIaIxHdxJE7FuWgJf6If+K/eAyCoBF8C6KX0KA2ydkSUxb8fAb8qMVJ</latexit> <latexit sha1_base64="DOx/ktybitgjChwuZWtodyh8jiA=">ACgHicbVFdSxtBFJ1s1apt/aiPvgwGIYKkuyK09ElaQR98UDQqJEu4O7lJLs7ObmfuFsPi7+hr/Vn+G2djBJN4cLhnPt9k1yT4zB8qgUfFhaXPi6vrH76/GVtfWPz67XLCquwpTKd2dsEHGoy2GJijbe5RUgTjTfJ3e9Kv/mL1lFmrniUY5zCwFCfFLCn4k5OXW50GIou73U36mEzHJucB9E1MXEzrubtbjTy1SRomGlwbl2FOYcl2CZlMaH1U7hMAd1BwNse2gReX46kf5K5nerKfWe+G5Zh9m1FC6twoTXxkCjx0s1pFvqe1C+7/iEsyecFo1EujfqElZ7I6geyRcV65AEoS35WqYZgQbE/1FSXce0c1dQm5X1hSGU9nGE137MFTzrkFMhUW5UnpLW8BOPkGQ2G/Kr6spXcOKYBsds/898we3PB/iHR7PnwfVBMwqb0cVh/ejX5DXLYlvsiIaIxHdxJE7FuWgJf6If+K/eAyCoBF8C6KX0KA2ydkSUxb8fAb8qMVJ</latexit> <latexit sha1_base64="DOx/ktybitgjChwuZWtodyh8jiA=">ACgHicbVFdSxtBFJ1s1apt/aiPvgwGIYKkuyK09ElaQR98UDQqJEu4O7lJLs7ObmfuFsPi7+hr/Vn+G2djBJN4cLhnPt9k1yT4zB8qgUfFhaXPi6vrH76/GVtfWPz67XLCquwpTKd2dsEHGoy2GJijbe5RUgTjTfJ3e9Kv/mL1lFmrniUY5zCwFCfFLCn4k5OXW50GIou73U36mEzHJucB9E1MXEzrubtbjTy1SRomGlwbl2FOYcl2CZlMaH1U7hMAd1BwNse2gReX46kf5K5nerKfWe+G5Zh9m1FC6twoTXxkCjx0s1pFvqe1C+7/iEsyecFo1EujfqElZ7I6geyRcV65AEoS35WqYZgQbE/1FSXce0c1dQm5X1hSGU9nGE137MFTzrkFMhUW5UnpLW8BOPkGQ2G/Kr6spXcOKYBsds/898we3PB/iHR7PnwfVBMwqb0cVh/ejX5DXLYlvsiIaIxHdxJE7FuWgJf6If+K/eAyCoBF8C6KX0KA2ydkSUxb8fAb8qMVJ</latexit> <latexit sha1_base64="DOx/ktybitgjChwuZWtodyh8jiA=">ACgHicbVFdSxtBFJ1s1apt/aiPvgwGIYKkuyK09ElaQR98UDQqJEu4O7lJLs7ObmfuFsPi7+hr/Vn+G2djBJN4cLhnPt9k1yT4zB8qgUfFhaXPi6vrH76/GVtfWPz67XLCquwpTKd2dsEHGoy2GJijbe5RUgTjTfJ3e9Kv/mL1lFmrniUY5zCwFCfFLCn4k5OXW50GIou73U36mEzHJucB9E1MXEzrubtbjTy1SRomGlwbl2FOYcl2CZlMaH1U7hMAd1BwNse2gReX46kf5K5nerKfWe+G5Zh9m1FC6twoTXxkCjx0s1pFvqe1C+7/iEsyecFo1EujfqElZ7I6geyRcV65AEoS35WqYZgQbE/1FSXce0c1dQm5X1hSGU9nGE137MFTzrkFMhUW5UnpLW8BOPkGQ2G/Kr6spXcOKYBsds/898we3PB/iHR7PnwfVBMwqb0cVh/ejX5DXLYlvsiIaIxHdxJE7FuWgJf6If+K/eAyCoBF8C6KX0KA2ydkSUxb8fAb8qMVJ</latexit> <latexit sha1_base64="oTGOPnlC3lpbuJxkZHAlqk3gehs=">ACoXicbVHbahsxEJW3tzS9Oe1jX0RNwQHX7IZC8xIbaEt5MG9OAnYyzKrHdsiWmRsVm8U/0a/ra/kX/plrHgdrugOBwzlw0Z/JKSUdx/KcV3bp95+69vfv7Dx4+evykfD03BlvBQ6FUcZe5uBQSY1DkqTwsrIZa7wIr961+gX39E6afQ3WlSYljDVciIFUKCydm9M4DPiJ7zrs6Q3VoUh1/NZTa+SZW+exTfUPKPDrN2J+/Eq+C5I1qD1jHIDlrpuDCl6hJKHBulMQVpTVYkLhcn/sHVYgrmCKowA1lOjSerXWkr8MTMEnxoania/YfytqKJ1blHnILIFmbltryP9pI0+T47SWuvKEWlwPmnjFyfDGI15Ii4LUIgAQVoa/cjEDC4KCkxtTVr0rFBub1HOvpTAFbrGK5mQhkA6pBKmbreoPUin+FbTjZ3I6oxs1tG3k7ns5lcH9s3AufbiTHA6SbNu/C86P+kncTz6/7py+XZ9mjz1nL1iXJewNO2Uf2YANmWA/2E/2i/2OtGnaB9uU6NWuaZ2wjotFfdanQ+g=</latexit> <latexit sha1_base64="oTGOPnlC3lpbuJxkZHAlqk3gehs=">ACoXicbVHbahsxEJW3tzS9Oe1jX0RNwQHX7IZC8xIbaEt5MG9OAnYyzKrHdsiWmRsVm8U/0a/ra/kX/plrHgdrugOBwzlw0Z/JKSUdx/KcV3bp95+69vfv7Dx4+evykfD03BlvBQ6FUcZe5uBQSY1DkqTwsrIZa7wIr961+gX39E6afQ3WlSYljDVciIFUKCydm9M4DPiJ7zrs6Q3VoUh1/NZTa+SZW+exTfUPKPDrN2J+/Eq+C5I1qD1jHIDlrpuDCl6hJKHBulMQVpTVYkLhcn/sHVYgrmCKowA1lOjSerXWkr8MTMEnxoania/YfytqKJ1blHnILIFmbltryP9pI0+T47SWuvKEWlwPmnjFyfDGI15Ii4LUIgAQVoa/cjEDC4KCkxtTVr0rFBub1HOvpTAFbrGK5mQhkA6pBKmbreoPUin+FbTjZ3I6oxs1tG3k7ns5lcH9s3AufbiTHA6SbNu/C86P+kncTz6/7py+XZ9mjz1nL1iXJewNO2Uf2YANmWA/2E/2i/2OtGnaB9uU6NWuaZ2wjotFfdanQ+g=</latexit> <latexit sha1_base64="oTGOPnlC3lpbuJxkZHAlqk3gehs=">ACoXicbVHbahsxEJW3tzS9Oe1jX0RNwQHX7IZC8xIbaEt5MG9OAnYyzKrHdsiWmRsVm8U/0a/ra/kX/plrHgdrugOBwzlw0Z/JKSUdx/KcV3bp95+69vfv7Dx4+evykfD03BlvBQ6FUcZe5uBQSY1DkqTwsrIZa7wIr961+gX39E6afQ3WlSYljDVciIFUKCydm9M4DPiJ7zrs6Q3VoUh1/NZTa+SZW+exTfUPKPDrN2J+/Eq+C5I1qD1jHIDlrpuDCl6hJKHBulMQVpTVYkLhcn/sHVYgrmCKowA1lOjSerXWkr8MTMEnxoania/YfytqKJ1blHnILIFmbltryP9pI0+T47SWuvKEWlwPmnjFyfDGI15Ii4LUIgAQVoa/cjEDC4KCkxtTVr0rFBub1HOvpTAFbrGK5mQhkA6pBKmbreoPUin+FbTjZ3I6oxs1tG3k7ns5lcH9s3AufbiTHA6SbNu/C86P+kncTz6/7py+XZ9mjz1nL1iXJewNO2Uf2YANmWA/2E/2i/2OtGnaB9uU6NWuaZ2wjotFfdanQ+g=</latexit> <latexit sha1_base64="oTGOPnlC3lpbuJxkZHAlqk3gehs=">ACoXicbVHbahsxEJW3tzS9Oe1jX0RNwQHX7IZC8xIbaEt5MG9OAnYyzKrHdsiWmRsVm8U/0a/ra/kX/plrHgdrugOBwzlw0Z/JKSUdx/KcV3bp95+69vfv7Dx4+evykfD03BlvBQ6FUcZe5uBQSY1DkqTwsrIZa7wIr961+gX39E6afQ3WlSYljDVciIFUKCydm9M4DPiJ7zrs6Q3VoUh1/NZTa+SZW+exTfUPKPDrN2J+/Eq+C5I1qD1jHIDlrpuDCl6hJKHBulMQVpTVYkLhcn/sHVYgrmCKowA1lOjSerXWkr8MTMEnxoania/YfytqKJ1blHnILIFmbltryP9pI0+T47SWuvKEWlwPmnjFyfDGI15Ii4LUIgAQVoa/cjEDC4KCkxtTVr0rFBub1HOvpTAFbrGK5mQhkA6pBKmbreoPUin+FbTjZ3I6oxs1tG3k7ns5lcH9s3AufbiTHA6SbNu/C86P+kncTz6/7py+XZ9mjz1nL1iXJewNO2Uf2YANmWA/2E/2i/2OtGnaB9uU6NWuaZ2wjotFfdanQ+g=</latexit> <latexit sha1_base64="Vs+14vGXEYCWQa4/aBIirWhHyZg=">ADGnicbVJNb9NAELXNV0n5SOHIZUVElYoshESCFSpoiA49FBE01bKGmu9GSer7q6t3TFKsPxPuPJHuCGuXPg3rFMjSMJIlmbfe/N2Z8ZpIYXFMPzlB1euXrt+Y+tmZ/vW7Tt3uzv3Tm1eGg4jnsvcnKfMghQaRihQwnlhgKlUwl6cdjwZ5/AWJHrE1wUECs21SITnKGDku5XmsJU6IoZwxZ1JWXdoSrN5USWijxGWqyS6hiOEvT6k2dAJWQ4ZjaUiUV7kf1xNymGB/nuCgTHCPGjGdYUxpa2OHOGws5k79OKrJPsn+qgfQVNDOLnEHR9FCOJIia5w6FPSkfVfS7YXDcBlkM4napOe1cZzs+DGd5LxUoJFLZu04CguMnR0KLsE1WVoGL9gUxi7VDMFNq6W86zJI4dMSJYb92kS/TfiopaxcqdcpmMnada8D/ceMSs+dxJXRImh+eVFWSoI5aZDJsIAR7lwCeNGuLcSPmOGcXQrXLl6V0AX+mkmpda8HwCa6jEORrmQAuomNBNV9VbISX5wLQlR83K/rDOtqH7r8VUoB0cuf9E72I3UKi9fFvJqdPhlE4jN4/7R28alez5T3wHnp9L/KeQfeO+/YG3nc3/Yj/4X/MvgSfAu+Bz8upYHf1tz3ViL4+Rs0RP43</latexit> <latexit sha1_base64="Vs+14vGXEYCWQa4/aBIirWhHyZg=">ADGnicbVJNb9NAELXNV0n5SOHIZUVElYoshESCFSpoiA49FBE01bKGmu9GSer7q6t3TFKsPxPuPJHuCGuXPg3rFMjSMJIlmbfe/N2Z8ZpIYXFMPzlB1euXrt+Y+tmZ/vW7Tt3uzv3Tm1eGg4jnsvcnKfMghQaRihQwnlhgKlUwl6cdjwZ5/AWJHrE1wUECs21SITnKGDku5XmsJU6IoZwxZ1JWXdoSrN5USWijxGWqyS6hiOEvT6k2dAJWQ4ZjaUiUV7kf1xNymGB/nuCgTHCPGjGdYUxpa2OHOGws5k79OKrJPsn+qgfQVNDOLnEHR9FCOJIia5w6FPSkfVfS7YXDcBlkM4napOe1cZzs+DGd5LxUoJFLZu04CguMnR0KLsE1WVoGL9gUxi7VDMFNq6W86zJI4dMSJYb92kS/TfiopaxcqdcpmMnada8D/ceMSs+dxJXRImh+eVFWSoI5aZDJsIAR7lwCeNGuLcSPmOGcXQrXLl6V0AX+mkmpda8HwCa6jEORrmQAuomNBNV9VbISX5wLQlR83K/rDOtqH7r8VUoB0cuf9E72I3UKi9fFvJqdPhlE4jN4/7R28alez5T3wHnp9L/KeQfeO+/YG3nc3/Yj/4X/MvgSfAu+Bz8upYHf1tz3ViL4+Rs0RP43</latexit> <latexit sha1_base64="Vs+14vGXEYCWQa4/aBIirWhHyZg=">ADGnicbVJNb9NAELXNV0n5SOHIZUVElYoshESCFSpoiA49FBE01bKGmu9GSer7q6t3TFKsPxPuPJHuCGuXPg3rFMjSMJIlmbfe/N2Z8ZpIYXFMPzlB1euXrt+Y+tmZ/vW7Tt3uzv3Tm1eGg4jnsvcnKfMghQaRihQwnlhgKlUwl6cdjwZ5/AWJHrE1wUECs21SITnKGDku5XmsJU6IoZwxZ1JWXdoSrN5USWijxGWqyS6hiOEvT6k2dAJWQ4ZjaUiUV7kf1xNymGB/nuCgTHCPGjGdYUxpa2OHOGws5k79OKrJPsn+qgfQVNDOLnEHR9FCOJIia5w6FPSkfVfS7YXDcBlkM4napOe1cZzs+DGd5LxUoJFLZu04CguMnR0KLsE1WVoGL9gUxi7VDMFNq6W86zJI4dMSJYb92kS/TfiopaxcqdcpmMnada8D/ceMSs+dxJXRImh+eVFWSoI5aZDJsIAR7lwCeNGuLcSPmOGcXQrXLl6V0AX+mkmpda8HwCa6jEORrmQAuomNBNV9VbISX5wLQlR83K/rDOtqH7r8VUoB0cuf9E72I3UKi9fFvJqdPhlE4jN4/7R28alez5T3wHnp9L/KeQfeO+/YG3nc3/Yj/4X/MvgSfAu+Bz8upYHf1tz3ViL4+Rs0RP43</latexit> <latexit sha1_base64="Vs+14vGXEYCWQa4/aBIirWhHyZg=">ADGnicbVJNb9NAELXNV0n5SOHIZUVElYoshESCFSpoiA49FBE01bKGmu9GSer7q6t3TFKsPxPuPJHuCGuXPg3rFMjSMJIlmbfe/N2Z8ZpIYXFMPzlB1euXrt+Y+tmZ/vW7Tt3uzv3Tm1eGg4jnsvcnKfMghQaRihQwnlhgKlUwl6cdjwZ5/AWJHrE1wUECs21SITnKGDku5XmsJU6IoZwxZ1JWXdoSrN5USWijxGWqyS6hiOEvT6k2dAJWQ4ZjaUiUV7kf1xNymGB/nuCgTHCPGjGdYUxpa2OHOGws5k79OKrJPsn+qgfQVNDOLnEHR9FCOJIia5w6FPSkfVfS7YXDcBlkM4napOe1cZzs+DGd5LxUoJFLZu04CguMnR0KLsE1WVoGL9gUxi7VDMFNq6W86zJI4dMSJYb92kS/TfiopaxcqdcpmMnada8D/ceMSs+dxJXRImh+eVFWSoI5aZDJsIAR7lwCeNGuLcSPmOGcXQrXLl6V0AX+mkmpda8HwCa6jEORrmQAuomNBNV9VbISX5wLQlR83K/rDOtqH7r8VUoB0cuf9E72I3UKi9fFvJqdPhlE4jN4/7R28alez5T3wHnp9L/KeQfeO+/YG3nc3/Yj/4X/MvgSfAu+Bz8upYHf1tz3ViL4+Rs0RP43</latexit> Optimal control u x hP T i minimize t = 1 C t ( x t , u t ) E e e s.t. x t + 1 = f t ( x t , u t , e t ) x t u t = π t ( τ t ) C t is the cost. If you maximize, it’s called a reward. x t is the state, u t is the input, e t is a noise process f t is the state-transition function τ t = ( u 1 , . . . , u t − 1 , x 0 , . . . , x t ) is an observed trajectory is the policy. This is the optimization decision variable. π t ( τ t )
Recommend
More recommend