language modeling machine translation
play

Language modeling, Machine translation CS 4803 / 7643 Deep Learning - PowerPoint PPT Presentation

Language modeling, Machine translation CS 4803 / 7643 Deep Learning Georgia Tech, 31 March 2020 James Cross, Jean Maillard Facebook AI <latexit


  1. Language modeling, Machine translation CS 4803 / 7643 Deep Learning 
 Georgia Tech, 31 March 2020 James Cross, Jean Maillard Facebook AI

  2. � <latexit sha1_base64="+dNSvzyk3bTWsb78UPpoRGnYxc=">ACAXicjU+7TgJBFJ31ifhCLW020kCyIbsEY2VCxKiFBRoBE9dsZocLTphXZgaJ2VD5MYbO2PoRtvI3Lo9GaTzNPfe27uiRWjxvr+2FlaXldW89sZDe3tnd2c3v7TSP7mkCDSCb1fYwNMCqgYalcK80YB4zaMW92kRvPYM2VIo7+6LgkeOuoB1KsE1HUa4WSgUaW6kF5pCoYSFUPE7MsOieugvaIAo8dxCVPTdkbWnNpBHFKJf3S/4U7iIJ5iSP5qhHubewLUmfg7CEYWMeAl/ZxwRrSwmDYTbsG1CY9HAXEswNx/bJi7k3qUYBSRcMWMwYx1R0pLCmcE67NH3nOk0lvEsN0CsmF5hALGXPvQXcBj1zTW9MP+ypMmCvzkWSbNcCiql45tKvno2z5hBh+gIFVCATlAVXaE6aiCRugLfaOx8+qMnHfnY7a65Mw9B+gXnM8fQfKVsg=</latexit> <latexit sha1_base64="esm3pRcyzhZIwzpYc7P3G1Y=">ACenicjVDLbhNBEGwvj4Tl5QSJCxGjpBsYaxdJxEnpChEwCGIEMWJTayZmfbzsjz0syYEK32W/gGbvwKF67wERwYey2kJBfqMt1VXa3pyo3gzifJj0Z04+at2yurd+K79+4/eNhcWz92emYZDpgW2g5z6lBwhQPvcChsUhlLvAkn76e6yef0Tqu1ZG/MHgq6UTxMWfUB2rUPHxFMtM+H6WdxdPJC/Iv3Zz2fa7NcK7d1CUulVC/SqksyMVfqVFzI+klC5DrRbosNnZaEH19/O3pwWit0cwKzWYSlWeCOvcpTYw/Lan1nAms4vgZGeI+PcIhOef+jLyhDHOtp+QaYG2F/SBw3CIzMsvFRlrS3ItCsI9FZzF2cyhoWxKJ1hS6ST1Z91cVpf4OekMsA6DYhKVdjrbxr7/EJD7fth8RU961FnHbKz+oXYsdc8/WeJsD8O5Ft8H3weDlnpty8xUpalChunVxK4Xx/1eutXb/hjC3IUaq/AEWtCGF7CDryDAxgAg+/wE37B78afqBV1ouf1aNRYeh7BJURbfwHTV71t</latexit> � <latexit sha1_base64="Ynzh+YerLKBvzlqKqJatR2Jb+xA=">ACnicjVBNTxsxEHW2X5B+BXrkYjWqFKQ02kVUPSEhimgPIFLEwkpstPJ6J2G09tqyHUK12l/WX9Jjr/An6oSoKnDpXPw0z+/NzMu1QOvC8FcrePL02fMXK6vtl69ev3nbWVs/s2pqOMRcCWSnFkQWEHs0AlItAEmcwHneflzp9fgbGoqlP3Q8NIskmFY+TM+VbWiXdoqo0qMvQv7dE6XjWQzA4w6oU0NBZhg1NJRZ/2T2DvDyBYs7V+DFq+jQVhXK27xtRs5l1uEgXBR9DKIl6JlDbO1VpQWik8lVI4LZu1FGo3qplxyP0O7XY6taAZL9kELor1LZiEuyovl5s1PzL10xaydxlP5de+IEmcMhOIaEzdJf0gHIlSrpCbACzMDzsQV/vMzr64aOlaG5EgVFxwTye7ZzT6uB+2EWPC0kw2qsKmd7+zhBf/2hT7XqfzUA5Wb9YNKdauEx1/yfpJ3ug8/DwJHXHWswzPn4U93UuvEhRw8jfQzOtgbR9uDT9+3u7t4y7hWyQd6THonIZ7JLvpEhiQknP8lvckNuAxocBEfB8d3XoLXUvCP3Kkj+AN6uzEk=</latexit> Language modelling aims to assign probabilities to word sequences � ( � � ) = � ( � � , � � , . . . , � � ) = � ( � � ) � ( � � | � � ) � ( � � | � � , � � ) · · · � ( � � | � � − � , . . . , � � ) Y = � ( � � | � � − � , . . . , � � ) next word history

  3. Search Engine how do I stop my backpack from ruining Search ruining my life ruining my relationship ruining my meal ruining your day ruining my hair ruining my carpet ruining your hair ruining my garden

  4. <latexit sha1_base64="UmklOflgxR8UvG/xGutumsks+gA=">ACpnicjVDNbhMxGHSWvxJ+msKRi0WElEqraDcN4lq1FXAoUFC3idSNlm+9X1Ir9trYTlO02qfjKXgErvACOGmK0vbCnEYznvnkybXg1kXRz0Zw5+69+w82HjYfPX7ydLO19ezEqplhmDAlBnmYFHwEhPHncChNgyFzjIp/sLf3COxnJVHrvGkcSJiUfcwbOS1lrlFou8RtNe3Ms3h7SidZz2aSl7Qf9o827lSeuG63L+Sd8J1jxXK2azVjrREvQ2iVekTVY4yrYacVoNpNYOibA2tM40m5UgXGcCaybzXRmUQObwgRPi3OubQkS7ai6WG5Rr/sVSCvBnYW59MFXdIiHcIxDOufujL4FhrlSU/oFoUDT9X5i0f9J5tVFTcfK0FyJgnIHgrNrtYtOq5H5Yxa9LSTwcqxKZzsHfMKdDQ/9umX4ziBOt6sbly5Ty45F5v8izfQA/R4GP/jcJ40GnDJVqutK137k+Oakt8lJrxv3u68/9u7e6u5N8gL8pJ0SEzekF3ynhyRhDyg/wiv8mfoBN8DJgcPk0aKwyz8k1BF/AtiCyWM=</latexit> � <latexit sha1_base64="ASoTq3GdtjpdLVNoR5prulzuGI8=">ACa3icjVDBThsxFHS2tKWhLQFuhYNFhBSkNqtqHpCQi0qHECliEAkNorel/CU+y1ZTtAtdp7v6bX9lf6Ef2HOiGHApfOxaM3nl6kxlJzsfx71r0ZOHps+eL+pL1+9Xm6srJ47PbECu0JLbXsZOJRUYNeTl9gzFkFlEi+y8aepfnGN1pEuzvw3g30Fo4KGJMCH0aCxuctTY3U+oPDyFuc3U6YoD6Skt0nFtweNZtyJZ+CPSTInTbHyWClqS5FhOFhRcSnLtMYuP7JVhPQmJVr6cThwbEGEZ4mV+TcQUodP3ydnZQ9a9egnIK/FU7U8G4xXt4BGfY4zfkr/hnEJhpPeanCDnaTtC7DsMlKitvKz7Ulmda5pw8SBL3YqeZzqAIyxwGWSqgYqgL71r7NCLv2kehoqJ9YBH2+WDTXeuWcbU83+WerqPoQ+Lx8H3xaAFr2Zmqo0VSg5eVjpY3L+rpPsdN5/3WnufZzXvcjW2SZrsYR9YHvskJ2wLhPsO/vBfrJftT/RWvQm2rj7GtXmnjV2D9HWXxvrtzM=</latexit> � <latexit sha1_base64="ag/y4BxKNhqDscT2xbwQ/sQ9Nq4=">AChXicjVBNbxMxFHSWj5bwldIjF4sIKZHSaBcFeimigopyKgpo3UjVZvS+pFXt2U7Tyto/w6/hCjf+Tb1pDrS9MKfRmzfvaSbXglsXx38b0b37Dx6urT9qPn7y9Nnz1saLY6vmhuGQKaHMKAeLgpc4dNwJHGmDIHOBJ/nsU62fnKOxXJVH7lLjWMK05BPOwIVR1tpJNe2kWubeVt3NVGFRmn9ZTSRc0kL6hfZJ5vJVWPpqJQzvaClFTdrNWO+/ES9C5JVqRNVjMNhpJWig2l1g6JsDa0yTWbuzBOM4EVs1mOreogc1giqfFOde2BIl27C+WUat/dQ/SnBnvVwG42s6wgM4whFdcHdGPwPDXKkZ/YFQoOkHfWiRLpNeVHSiDM2VKCh3IDi7cba+aTWy8MxikIUEXk5U6Wxnj095SH8Qyit7+wZx1vW3Pl27ljdqz/9Zmukehj4Mfg2+bxoNOGV8qiuvq1BycrvSu+T4T8Z9N9+H7R3P67qXicvySvSIQnZJrvkCzkQ8LIT/KL/CZ/orVoKxpE765Xo8bKs0luIPpwBd2DwHQ=</latexit> � <latexit sha1_base64="t1SUIl5bqI5ryfcfn5vHGe8uUlk=">AC2HicjZFdaxNBFIZn14/W+NFUL3szGAsJxLBbKl4JRYt6UbFK0wa7IczOnqRD5suZSVsZFrwTr4T+u/4Q751NtmAbLzxX75znvOfAO7nmzLokuYziW7fv3F1Zvde4/+Dho7Xm+uNDq2aGQp8qrswgJxY4k9B3zHEYaANE5ByO8umbih+dgrFMyQP3TcNQkIlkY0aJC61R81emcRvjsxHDmWBFEJ49T0vcwa9wNjaE+ky3A+1ekU656NQPnFkm4OvVrNJgiFNGEgGeqpl05ZL73zM1HjVbS+ZF14WaS1aqK790XqUZoWiMwHSU6sPU4T7YaeGMcoh7LRyGYWNKFTMoHj4pRpWx2Q38+T6/8m3sirCDupJuLYNzEA9gjBzDAZ8yd4LeEQq7UFH8GUoDpBd63gDMtcn9e4rEyOFe8wMwRzui1tdVOq4GYxYC5oIwOVbS2fYumzBnu3vhP2T3nQGYdvyNSwvXfEfl+T9LI9uFkIeBD8H3sY48fF3pdRVyejPSZXG41Uu3ey8+bd2Xtdxr6IN9BS1UYpeoh30Hu2jPqLod7QRPYs24y/x9/hH/HMxGke15wm6VvHFHzoP4IU=</latexit> <latexit sha1_base64="7AkBSE30VOYkqJYeaQU2boOAdA=">ACoXicjVBLaxsxGJS3r9R9xGmPvYiagOL2XVceiqENvQBKXVDnCx4zfKt9rMjLK2EJMcpy/61/o/e21/Q2XHKU5y6ZyGc18aHItuHVR9LMR3Ll7/6DrYfNR4+fPN1u7Tw7sWpuGA6ZEsokOVgUvMSh405gog2CzAWe5rP3S/0HI3lqjx23zWOJUxLPuEMnJeyVvKWp2Flm8uyKULrIeTSUv6D9tke1dKb1wU+5fyXvhpscK5WzWakfdaAV6m8Rr0iZrDLKdRpwWis0lo4JsHYUR9qNKzCOM4F1s5nOLWpgM5jiqDjn2pYg0Y6ri9UM9aZfgbQS3FmYSx98RM8hGNM6IK7M/oBGOZKzegRQoGm6/2hRf8nmVcXNZ0oQ3MlCsodCM6u1S47rUbmj1n0tpDAy4kqne0c8Cl3Njz0w5bhR4M4261uXLpMrTqWmf+LNMD9HsY/OJzXzUacMpUqa4rXfuR45uT3iYnvW7c7+1m/v1vPvUVekJekQ2LyhuyT2RAhoSRH+QX+U3+BO3gczAIji6fBo15jm5hmD0FxNyxvM=</latexit> Y � ( � � ) = � ( � � | � � − � , . . . , � � ) conditional ( Markov assumption ) ' � ( � � ) � ( � � | � � ) � ( � � | � � , � � ) � ( � � | � � , � � , � � ) · · · = � ( � � ) � ( � � | � � ) � ( � � | � � , � � ) � ( � � | � � , � � , � � ) · · · independence Y = � ( � � | � � − � ) Bigram language model How many times � ( � � | � � − � ) = � ( � � , � � − � ) ' ����� ( � � , � � − � ) “ w i w i-1 ” appears … � ( � � − � ) ����� ( � � − � ) How many times “ w i-1 ” appears … … in a large body of text, like Wikipedia.

  5. � � <latexit sha1_base64="rGpfCw03yA1iTferFVMd3+82y28=">ACSnicjZDNThsxFIU94S+EQgldmM1QqJSFM0gKrYRIMoCVFoIRGKi6I5zk1ix5btBNBoHoEtvFRfgNdgV3VT52fRwIa7OvLxd+7VSbTg1oXhc1BYWFxaXimultY+rG983CxvXVk1NAwbTAlmglYFDzFhuNOYFMbBJkIvE4Gh2P/eoTGcpVeunuNLQm9lHc5A+efLm7bUXuzEtbCydC3IpqJCpnNebscRHFHsaHE1DEB1t5EoXatDIzjTGBeKsVDixrYAHp40xlxbVOQaFvZ3eTe/H8/A2kluH41kR7cpk08hUts0lvu+vQYGCZKDegvhA6amvcbFmsZLd5bSrDE2U6FDuQHA2FzvOtBqZX2bR20ICT7sqdXbniPe4s9VT30Ba/W4QB1+zV5um1CRjzLwPKcVH6PsweOa5HxoNOGWyWOeZzn3J0etK34qr3Vq0V/v2c69SP5jVXSfyReyQyKyT+rkhJyTBmGkRx7I3kKfgcvwZ/g7/RrIZgxn8jcFBb/AaY4rzM=</latexit> � � <latexit sha1_base64="dBYkVpPm536KmLosBInGLrJeQA=">ACSnicjZDNbhMxFIU9IaUlUEhycYiQipSFM1URd1GbQRdBEgaSNlouiOc5NasceW7fyg0TxCt+1L8QK8BjvEBudnQZoNd3Xk4+/cq5Nowa0Lw59B4VFx7/H+wZPS02eHz1+Uj15eWTU1DtMCW6CVgUPMWO405gVxsEmQi8TiYXS/96hsZylbd419CeOUjzgD5+zQcng3IlrIWrobsi2ogK2UxrcBRE8VCxqcTUMQHW9qJQu34GxnEmMC+V4qlFDWwCY+wNZ1zbFCTafrZY3Zv/62cgrQR3U02kB9/SLjahjV065+6GfgCGiVIT+hVhiKbm/Y5FGmuZIucjpShiRJDyh0IzrZil5lWI/PLHpbSODpSKXOHjf4mDtbfoG0upHgzh5lz3YtKZWGUvm/5BS3EDfh8FPnvus0YBTJot1nunclxw9rHRXJ3UotPa+y+nlfr5pu4D8pq8IckImekTi5Ji3QI2NyS+7IfAj+BX8Dv6svxaCDfOKbE2h+BeoHK80</latexit> � � <latexit sha1_base64="N7oW9JQqfcVeOF0u6dk8ufHIgE0=">ACSnicjZDNThsxFIU9IW0htOVvycYiQkqlKJpQd1GgFoWQ0tCZGYKLrj3AQr9tiynUA1mkdgW16KF+A12CE2dX4WBTa9qyMf+denUQLbl0Y3geFpeKbt+WV0qr7z98XFvf2GxbNTYMW0wJZToJWBQ8xZbjTmBHGwSZCDxPRodT/3yCxnKVnrnfGrsShikfcAbOP/26n3prZfDWjgb+lpEC1Emi2n2NoIo7is2lpg6JsDaiyjUrpuBcZwJzEuleGxRAxvBEC/6E65tChJtN7ue3Zv/62cgrQR3WU2kB3dpBxtwh16xd0l/QYME6VG9CdCH03N+y2LNYya5zOlCGJkr0KXcgOHsWO820GplfZtHbQgJPByp1tnLEh9zZasM3kFa/G8TRp+zFpjk1y5gy/4eU4iP0fRg8dwPjQacMlms80znvuToZaWvRftzLdqr7Z/ulesHi7qXyTbZIRUSka+kTo5Jk7QI0NyQ/6Q2+AueAgeg6f510KwYLbIsykU/wKqAK81</latexit> � � <latexit sha1_base64="Vno21VB0DMdmb10OrfE2uFj/e8k=">ACSnicjZDNbhMxFIU9oS1toCWBJRuLClIUTRTBbGNIugi0aSNlouiOc5NYsceW7TRBo3mEbtuX4gX6GuwQG5yfBWk3vasjH3/nXp1EC25dGN4FhSc7u3tP9w+Kz54fHr0olV9eWDUzDtMCW6CVgUPMWO405gVxsEmQi8TKaflv7lFRrLVXrufmrsSxinfMQZOP90Nh80BqVKWA9XQx+KaCMqZDOng3IQxUPFZhJTxwRY24tC7foZGMeZwLxYjGcWNbApjLE3vOLapiDR9rPF6t78fz8DaSW4S2RHnxLu9iGc+zSOXcT+hkYJkpN6Q+EIZq69zsWaxlki1yOlKGJkoMKXcgONuKXWZajcwvs+htIYGnI5U6W23xMXe21vYNpLUvBnH6Lru3aU2tMpbM45Bi3ELfh8Gvnvum0YBTJot1nunclxzdr/ShuDiuR436+NSvPjpu598pq8IVUSkQ+kSU7IKekQRsbkmtyQ2+BX8Dv4E/xdfy0EG+YV2ZrCzj+r5K82</latexit> <latexit sha1_base64="oTDo3PUehJQ5qM0T1Jo8JgA=">ACZXicjVBNTxsxFHSWtkBaSvhQL62ERVQpSFG0C0FcUYtKD1RQRCASG63eOi/Bir2bIdQbfSX9Nr+2/4BfwNvCGHApe+w2j0xjNPnlQLbl0Y3laCuRcvX80vLFZfv1l6u1xbWT23amwYdpgSynRTsCh4h3HncCuNgyFXiRj6X+sU1GstVduZ+aOxJGZ8wBk4v0pqH2LdmCS79CedJO2mh50StkuItpJaPWyF06HPSTQjdTKbk2SlEsV9xcYSM8cEWHsZhdr1cjCOM4FtRqPLWpgIxjiZf+a5uBRNvLb6ZfKf7Vc5BWgrtqptIbP9IuHsEZdumEuyv6BRimSo3oKUIfTcvrHYs01jLNbwo6UIamSvQpdyA4exRbZlqNzB+z6GUhgWcDlTnbOBD7mzyJeTNQ8N4mgrf3LpwTXNKD3/Z6nGB+j7MPjN+41GnDK5LEucl34kqOnlT4n59utqN3a/d6u73+a1b1A3pN0iAR2SP75Cs5IR3CyC/ym/whfyt3wVKwHrx7eBpUZp418miCjXveq7RE</latexit> RNN language models don’t need a Markov assumption. � ( � � | � � , � � , � � , � � ) RNN cell RNN cell RNN cell RNN cell The more context we have to predict the next word, the better!

Recommend


More recommend