1
Changelog Changes made in this version not seen in fjrst lecture: 10 October 2017: remove duplication of stall logic slides 10 October 2017: slide 6: use d_dstE and reg_dstE instead of dstE, use P_pc and p_pc consistently 10 October 2017: move pipeline stages slide after mention of the stall for ret 10 October 2017: slide 8: show version without moved wires for dstE/dstM 10 October 2017: slide 16: add valA and valB, not valB and valB 1
addq processor timing 3 1700 11 1100 1000 13 12 0x6 9 4 900 800 11 10 0x4 2 9 9 9 0x2 2500 execute/writeback decode/execute fetch/decode 8 2500 6 13 8 8 800 1700 5 11 2100 13 1300 1200 8 1 PC next R[dstE] add 2 ADD ADD 0xF split next R[dstM] dstM dstE // R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. // initially %r8 = 800, %r9 = 900, etc. 0x0 addq %r8, %r9 0 dstE next R[dstE] rB rA PC cycle dstE next R[dstE] rB rA fetch addq %r9, %r8 addq %r12, %r13 addq %r10, %r11 2 R[srcA] R[srcB] dstE R[srcA] R[srcB] dstE
addq processor timing 3 1700 11 1100 1000 13 12 0x6 9 4 900 800 11 10 0x4 2 9 9 9 0x2 2500 execute/writeback decode/execute fetch/decode 8 2500 6 13 8 8 800 1700 5 11 2100 13 1300 1200 8 1 PC next R[dstE] add 2 ADD ADD 0xF split next R[dstM] dstM dstE // R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. // initially %r8 = 800, %r9 = 900, etc. 0x0 addq %r8, %r9 0 dstE next R[dstE] rB rA PC cycle dstE next R[dstE] rB rA fetch addq %r9, %r8 addq %r12, %r13 addq %r10, %r11 2 R[srcA] R[srcB] dstE R[srcA] R[srcB] dstE
addq processor timing 3 1700 11 1100 1000 13 12 0x6 9 4 900 800 11 10 0x4 2 9 9 9 0x2 2500 execute/writeback decode/execute fetch/decode 8 2500 6 13 8 8 800 1700 5 11 2100 13 1300 1200 8 1 PC next R[dstE] add 2 ADD ADD 0xF split next R[dstM] dstM dstE // R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. // initially %r8 = 800, %r9 = 900, etc. 0x0 addq %r8, %r9 0 dstE next R[dstE] rB rA PC cycle dstE next R[dstE] rB rA fetch addq %r9, %r8 addq %r12, %r13 addq %r10, %r11 2 R[srcA] R[srcB] dstE R[srcA] R[srcB] dstE
addq processor timing 3 1700 11 1100 1000 13 12 0x6 9 4 900 800 11 10 0x4 2 9 9 9 0x2 2500 execute/writeback decode/execute fetch/decode 8 2500 6 13 8 8 800 1700 5 11 2100 13 1300 1200 8 1 PC next R[dstE] add 2 ADD ADD 0xF split next R[dstM] dstM dstE // R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. // initially %r8 = 800, %r9 = 900, etc. 0x0 addq %r8, %r9 0 dstE next R[dstE] rB rA PC cycle dstE next R[dstE] rB rA fetch addq %r9, %r8 addq %r12, %r13 addq %r10, %r11 2 R[srcA] R[srcB] dstE R[srcA] R[srcB] dstE
addq processor timing 3 1700 11 1100 1000 13 12 0x6 9 4 900 800 11 10 0x4 2 9 9 9 0x2 2500 execute/writeback decode/execute fetch/decode 8 2500 6 13 8 8 800 1700 5 11 2100 13 1300 1200 8 1 PC next R[dstE] add 2 ADD ADD 0xF split next R[dstM] dstM dstE // R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. // initially %r8 = 800, %r9 = 900, etc. 0x0 addq %r8, %r9 0 dstE next R[dstE] rB rA PC cycle dstE next R[dstE] rB rA fetch addq %r9, %r8 addq %r12, %r13 addq %r10, %r11 2 R[srcA] R[srcB] dstE R[srcA] R[srcB] dstE
pipeline register naming convention split W_dstE e_dstE E_dstE d_dstE D_rA f_rA add 2 ADD ADD 0xF next R[dstM] PC dstM next R[dstE] dstE R[srcB] R[srcA] srcB srcA register fjle Mem. Instr. 3
pipeline register naming convention f — fetch sends values here D — decode receives values here d — decode sends values here … 4
addq HCL /* use register file: */ ... } valB : 64 = 0; valA : 64 = 0; dstE : 4 = REG_NONE; register dE { /* decode to execute */ ... d_valA = reg_outputA; reg_srcA = D_rA; d_dstE = D_rB; ... d: from decode */ /* D: to decode } rB : 4 = REG_NONE; rA : 4 = REG_NONE; register fD { /* f_rA -> D_rA, etc. */ /* fetch to decode */ f_rB = i10bytes[12..16]; f_rA = i10bytes[12..16]; /* f: from fetch */ 5
addq fetch/decode pc = P_pc; pipelined d_valB = reg_outputB; d_valA = reg_outputA; d_dstE = D_rB; reg_srcB = D_rB; reg_srcA = D_rA; /* Decode */ f_rB = i10bytes[8..12]; f_rA = i10bytes[12..16]; p_pc = pc + 2; /* Fetch+PC Update*/ /* Fetch+PC Update*/ unpipelined valB = reg_outputB; valA = reg_outputA; reg_dstE = rB; reg_srcB = rB; reg_srcA = rA; /* Decode */ rB = i10bytes[8..12]; rA = i10bytes[12..16]; p_pc = pc + 2; pc = P_pc; 6
addq pipeline registers valA : 64 = 0; valB : 64 = E; dstE : 4 = REG_NONE; /* Writeback */ } valE : 64 = 0; dstE : 4 = REG_NONE; register eW { /* Execute */ } register dE { register pP { /* Decode */ }; rA : 4 = REG_NONE; rB : 4 = REG_NONE; register fD { /* Fetch+PC Update*/ }; pc : 64 = 0; 7
SEQ without stages add/sub %rsp rA rB ALU aluA aluB valE 8 0 xor/and 0xF (function of instr.) write? function of opcode PC+9 instr. length + 0xF %rsp PC dstM Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] next R[dstM] %rsp Data Mem. ZF/SF Stat Data in Addr in Data out valC 0xF 0xF 8
SEQ with stages write? ALU aluA aluB valE 8 0 add/sub xor/and (function of instr.) function rA of opcode PC+9 instr. length + fetch decode execute memory writeback rule: signal to next stage (except fmow control) rB %rsp PC Data Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM next R[dstM] Mem. 0xF ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 0xF 9
SEQ with stages write? ALU aluA aluB valE 8 0 add/sub xor/and (function of instr.) function rA of opcode PC+9 instr. length + fetch decode execute memory writeback rule: signal to next stage (except fmow control) rB %rsp PC Data Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM next R[dstM] Mem. 0xF ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 0xF 9
SEQ with stages write? ALU aluA aluB valE 8 0 add/sub xor/and (function of instr.) function rA of opcode PC+9 instr. length + fetch decode execute memory writeback rule: signal to next stage (except fmow control) rB %rsp PC Data Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM next R[dstM] Mem. 0xF ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 0xF 9
SEQ with stages (actually sequential) of instr.) rB ALU aluA aluB valE 8 0 add/sub xor/and (function write? %rsp function of opcode PC+9 instr. length + fetch decode execute memory writeback rA 0xF PC next R[dstM] Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM Data 0xF Mem. ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 10
adding pipeline registers write? ALU aluA aluB valE 8 0 add/sub xor/and (function of instr.) function rA of opcode PC+9 instr. length + fetch decode execute memory writeback not shown — control logic rB %rsp PC Data Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM next R[dstM] Mem. 0xF ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 0xF 11
adding pipeline registers write? ALU aluA aluB valE 8 0 add/sub xor/and (function of instr.) function rA of opcode PC+9 instr. length + fetch decode execute memory writeback not shown — control logic rB %rsp PC Data Instr. Mem. register fjle srcA srcB R[srcA] R[srcB] dstE next R[dstE] dstM next R[dstM] Mem. 0xF ZF/SF Stat Data in Addr in Data out valC 0xF 0xF %rsp %rsp 0xF 11
passing values in pipeline read prior stage’s outputs e.g. decode: get from fetch via pipeline registers ( D_icode , …) e.g. decode: send to execute via pipeline registers ( d_icode , …) via register fjle/memory/etc. via control fmow instructions 12 send inputs for next stage exceptions: deliberate sharing between instructions
memory read/write logic data memory address data input data output is read? is write? icode from instr. mem from instr. mem. 13
memory read/write logic data memory address data input data output is read? is write? icode from instr. mem from instr. mem. 13
memory read/write logic data memory address data input data output is read? is write? icode from instr. mem from instr. mem. 13
memory read/write: SEQ code icode = i10bytes[4..8]; mem_readbit = [ icode == MRMOVQ || ...: 1; 0; ]; 14
memory read/write: PIPE code f_icode = i10bytes[4..8]; register fD { /* and dE and eM and mW */ icode : 4 = NOP; } d_icode = D_icode ... e_icode = E_icode; mem_readbit = [ M_icode == MRMOVQ || ...: 1; 0; ]; 15
memory read/write: PIPE code f_icode = i10bytes[4..8]; register fD { /* and dE and eM and mW */ icode : 4 = NOP; } d_icode = D_icode ... e_icode = E_icode; mem_readbit = [ M_icode == MRMOVQ || ...: 1; 0; ]; 15
Recommend
More recommend