Power-Driven DNN Dataflow Optimization on FPGA Qi Sun 1 , Tinghuan Chen 1 , Jin Miao 2 , Bei Yu 1 1 The Chinese University of Hong Kong 2 Cadence Design Systems, Inc. 1 / 22
FPGA Targeted DNN Accelerator Design Flow Dataflow Synthesis HLS Model DNN Model Design Space & Description Exploration Deployment FPGA Architecture Design ◮ Design power efficient dataflow with limited latency loss ◮ Enabled by proposed power modeling and a hierarchical strategy 2 / 22
<latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> Dataflow Optimization Basic Techniques for Loop Nest Optimization for ��� in range(0, M): for row in range(0, H): ◮ Notations: for col in range(0, W): N : # input channel for ti in range(0, N): M : # output channel for k1 in range(0, K): for k2 in range(0, K): K : kernel size OUT to,row,col +=WT to,ti,k1,k2 H : height of feature × IN ti,row+k1,col+k2 <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> <latexit sha1_base64="S8fEYyO32q/PyIaGQOLFJBshd6s=">AB7XicbVDLSgNBEOz1GeMr6tHLYBA8hV0R9Bjw4jGCeUCyhNnJbDJmdmaZ6RXCkn/w4kERr/6PN/GSbIHTSxoKq6e6KUiks+v63t7a+sbm1Xdop7+7tHxWjo5bVmeG8SbTUptORC2XQvEmCpS8kxpOk0jydjS+nfntJ26s0OoBJykPEzpUIhaMopNaPRQJt/1K1a/5c5BVEhSkCgUa/cpXb6BZlnCFTFJru4GfYphTg4JPi3MstTysZ0yLuOKuqWhPn82ik5d8qAxNq4Ukjm6u+JnCbWTpLIdSYUR3bZm4n/ed0M45swFyrNkCu2WBRnkqAms9fJQBjOUE4cocwIdythI2oQxdQ2YUQL+8SlqXtcCvBfdX1bpfxFGCUziDCwjgGupwBw1oAoNHeIZXePO09+K9ex+L1jWvmDmBP/A+fwCwY8i</latexit> × W : width of feature A 6-level loop convolutional layer 3 / 22
Recommend
More recommend