From dacbd5e1f3bfe1828aa5ac1c5c4ab03466df6256 Mon Sep 17 00:00:00 2001 From: Roberto Hexsel <roberto@inf.ufpr.br> Date: Mon, 24 Aug 2015 16:08:27 -0300 Subject: [PATCH] fixed load delay slot --- cMIPS/tests/jr_2.expected | 5 ++ cMIPS/tests/jr_2.s | 102 ++++++++++++++++++++++++++++++++-- cMIPS/tests/lwFWDsw2.expected | 40 ++++++------- cMIPS/tests/lwFWDsw2.s | 1 + cMIPS/vhdl/core.vhd | 49 +++++++++++----- cMIPS/vhdl/pipestages.vhd | 37 ++++++------ 6 files changed, 178 insertions(+), 56 deletions(-) diff --git a/cMIPS/tests/jr_2.expected b/cMIPS/tests/jr_2.expected index 09972f8..6b4c665 100644 --- a/cMIPS/tests/jr_2.expected +++ b/cMIPS/tests/jr_2.expected @@ -7,6 +7,7 @@ 00000054 00000060 0000006c + 0000000c 00000018 00000024 @@ -16,3 +17,7 @@ 00000054 00000060 0000006c +ok +ok +ok +ok diff --git a/cMIPS/tests/jr_2.s b/cMIPS/tests/jr_2.s index b5f9208..78a6501 100644 --- a/cMIPS/tests/jr_2.s +++ b/cMIPS/tests/jr_2.s @@ -31,8 +31,11 @@ snd: #sw $31, 0($16) # $31 <- 0,snd+4 ## ## now let's check stalls for lw r1 ; jr r1 ## - -trd: la $10, loop # start of loop address +trd: la $20, x_IO_BASE_ADDR # print out a separator + li $19, '\n' + sw $19, x_IO_ADDR_RANGE($20) + + la $10, loop # start of loop address la $11, addr # keep it in in memory sw $10, 0($11) move $8, $zero @@ -45,20 +48,109 @@ loop: add $9, $8, $8 # $9 <- 12,24,36,48,60,72, sw $9, 0($16) slt $28, $9, $29 - beq $28, $0, .L1 + beq $28, $0, four nop la $11, addr # keep it in in memory lw $9, 0($11) jr $9 nop -.L1: -end: nop + +four: la $20, x_IO_BASE_ADDR # print out a separator + + la $10, f4 # destination address + la $11, addr # keep it in in memory + sw $10, 0($11) + nop + + lw $12, 0($11) + addiu $12, $12, 4 + addiu $12, $12, -4 + nop + jr $12 + nop + .align 8,0 + +f4: li $19, 'o' + sw $19, x_IO_ADDR_RANGE($20) + li $19, 'k' + sw $19, x_IO_ADDR_RANGE($20) + li $19, '\n' + sw $19, x_IO_ADDR_RANGE($20) + + +five: la $20, x_IO_BASE_ADDR # print out a separator + + la $10, f5 # destination address + la $11, addr # keep it in in memory + sw $10, 0($11) + nop + + lw $12, 0($11) + addiu $12, $12, 4 + addiu $12, $12, -4 + jr $12 + nop + .align 8,0 + +f5: li $19, 'o' + sw $19, x_IO_ADDR_RANGE($20) + li $19, 'k' + sw $19, x_IO_ADDR_RANGE($20) + li $19, '\n' + sw $19, x_IO_ADDR_RANGE($20) + + +six: la $20, x_IO_BASE_ADDR # print out a separator + + la $10, f6 # destination address + la $11, addr # keep it in in memory + addiu $10, $10, 4 + sw $10, 0($11) + nop + + lw $12, 0($11) + addiu $12, $12, -4 + jr $12 + nop + .align 4,0 + +f6: li $19, 'o' + sw $19, x_IO_ADDR_RANGE($20) + li $19, 'k' + sw $19, x_IO_ADDR_RANGE($20) + li $19, '\n' + sw $19, x_IO_ADDR_RANGE($20) + +seven: la $20, x_IO_BASE_ADDR # print out a separator + + la $10, f7 # destination address + la $11, addr # keep it in in memory + sw $10, 0($11) + nop + + lw $12, 0($11) + jr $12 + nop + .align 4,0 + +f7: li $19, 'o' + sw $19, x_IO_ADDR_RANGE($20) + li $19, 'k' + sw $19, x_IO_ADDR_RANGE($20) + li $19, '\n' + sw $19, x_IO_ADDR_RANGE($20) + + + + .align 4,0 +_end: nop nop wait nop .end _start + .data .align 4 .space 128 diff --git a/cMIPS/tests/lwFWDsw2.expected b/cMIPS/tests/lwFWDsw2.expected index 5438082..08575e2 100644 --- a/cMIPS/tests/lwFWDsw2.expected +++ b/cMIPS/tests/lwFWDsw2.expected @@ -1,41 +1,41 @@ 00040010 fffffff6 -00040014 +00000014 fffffff7 -00040018 +00000018 fffffff8 -0004001c +0000001c fffffff9 -00040020 +00000020 fffffffa -00040024 +00000024 fffffffb -00040028 +00000028 fffffffc -0004002c +0000002c fffffffd -00040030 +00000030 fffffffe -00040034 +00000034 ffffffff -00040038 +00000038 00000000 -0004003c +0000003c 00000001 -00040040 +00000040 00000002 -00040044 +00000044 00000003 -00040048 +00000048 00000004 -0004004c +0000004c 00000005 -00040050 +00000050 00000006 -00040054 +00000054 00000007 -00040058 +00000058 00000008 -0004005c +0000005c 00000009 -00040060 +00000060 diff --git a/cMIPS/tests/lwFWDsw2.s b/cMIPS/tests/lwFWDsw2.s index 72119a0..f71dc69 100644 --- a/cMIPS/tests/lwFWDsw2.s +++ b/cMIPS/tests/lwFWDsw2.s @@ -28,6 +28,7 @@ snd: lw $15, 0($17) # reload pointer from RAM sw $15, 0($15) # store pointer to RAM[i] nop lw $15, 0($15) # reload pointer from RAM[i] + andi $15, $15, 0x00ff # mask off address, keep least sign bits sw $15, 0($16) # and print it out, forwarding $15 nop slt $8,$3,$9 # done? diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd index 4b1ca94..896e2fc 100644 --- a/cMIPS/vhdl/core.vhd +++ b/cMIPS/vhdl/core.vhd @@ -332,7 +332,8 @@ architecture rtl of core is signal RF_EX_ld, EX_MM_ld, MM_WB_ld: std_logic; signal a_rs,EX_a_rs, a_rt,EX_a_rt,MM_a_rt, a_rd: reg5; signal a_c,EX_a_c,MM_a_c,WB_a_c: reg5; - signal move,EX_move,MM_move, is_load,EX_is_load : std_logic; + signal move,EX_move,MM_move : std_logic; + signal is_load,EX_is_load,MM_is_load : boolean; signal muxC,EX_muxC,MM_muxC,WB_muxC: reg3; signal wreg,EX_wreg_pre,EX_wreg,MM_wreg_cond,MM_wreg,WB_wreg: std_logic; signal aVal,EX_aVal,EX_aVal_cond,MM_aVal: std_logic; @@ -356,7 +357,7 @@ architecture rtl of core is signal rd_data_raw, rd_data, WB_rd_data, WB_mem_data: reg32; signal MM_B_data, WB_B_data: reg32; signal d_addr_pre: reg2; - signal jr_stall, br_stall, fwd_lwlr, sw_stall : std_logic; + signal jr_stall, br_stall, fwd_lwlr, sw_stall, lw_stall : std_logic; signal fwd_mem, WB_addr2: reg2; @@ -388,8 +389,8 @@ architecture rtl of core is EX_wrmem: out std_logic; mem_t: in std_logic_vector; EX_mem_t: out std_logic_vector; - is_load: in std_logic; - EX_is_load: out std_logic; + is_load: in boolean; + EX_is_load: out boolean; A: in std_logic_vector; EX_A: out std_logic_vector; B: in std_logic_vector; @@ -416,6 +417,8 @@ architecture rtl of core is MM_wrmem: out std_logic; EX_mem_t: in std_logic_vector; MM_mem_t: out std_logic_vector; + EX_is_load: in boolean; + MM_is_load: out boolean; EX_A: in std_logic_vector; MM_A: out std_logic_vector; EX_B: in std_logic_vector; @@ -733,7 +736,7 @@ begin -- PROGRAM COUNTER AND INSTRUCTION FETCH ------------------ pipe_stall <= rom_stall or ram_stall or jr_stall or br_stall or - sw_stall or tr_stall or exception_stall; + sw_stall or lw_stall or tr_stall or exception_stall; PCload <= '1' when pipe_stall = '1' else '0'; @@ -920,8 +923,8 @@ begin jr_stall <= '1'; i_dbg_jr_stall := 2; elsif ( (funct_word.PCsel = b"11")and -- 2nd load-delay slot - (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") and - (MM_aVal = '0') ) then + MM_is_load and + (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") ) then jr_stall <= '1'; i_dbg_jr_stall := 3; else @@ -932,19 +935,33 @@ begin end process RF_JR_STALL; + RF_LD_DELAY_SLOT: process (a_rs,a_rt,EX_a_c,EX_wreg,EX_is_load) + begin + if ( EX_is_load and + (EX_wreg = '0') and (EX_a_c /= b"00000") and + ( (EX_a_c = a_rs)or(EX_a_c = a_rt) ) ) then + lw_stall <= '1'; + else + lw_stall <= '0'; + end if; + end process RF_LD_DELAY_SLOT; + + RF_SW_STALL: process (ctrl_word,a_rs,EX_a_c,EX_wreg,EX_is_load) variable is_store : boolean := false; begin case ctrl_word.i is when LB | LH | LWL | LW | LBU | LHU | LWR => - is_load <= '1'; + is_load <= TRUE; is_store := FALSE; when SB | SH | SW => is_store := TRUE; - is_load <= '0'; - when others => is_load <= '0'; is_store := FALSE; + is_load <= FALSE; + when others => + is_load <= FALSE; + is_store := FALSE; end case; - if ( is_store and (EX_is_load = '1') and + if ( is_store and EX_is_load and (EX_a_c = a_rs)and(EX_wreg = '0')and(EX_a_c /= b"00000") ) then sw_stall <= '1'; else @@ -1382,13 +1399,17 @@ begin end process U_EX_ADDR_ERR_EXCP; ---------------------------------- - EX_addr <= phy_d_addr; -- with TLB + + -- uncomment this when making use of the TLB CHANGE + EX_addr <= phy_d_addr; -- with TLB + + -- uncomment this when NOT making use of the TLB + -- EX_addr <= v_addr; -- without TLB -- assert ( (phy_d_addr = v_addr) and (EX_aVal = '0') ) -- DEBUG -- report "mapping mismatch V:P "&SLV32HEX(v_addr)&":"&SLV32HEX(phy_d_addr); - EX_wreg <= EX_wreg_pre -- movz,movn, move/DO_NOT move or ( BOOL2SL(nullify) and not(MM_is_delayslot) ); -- abort wr if prev excep in EX @@ -1413,7 +1434,7 @@ begin port map (clk,rst, EX_MM_ld, EX_a_rt,MM_a_rt, EX_a_c,MM_a_c, EX_wreg,MM_wreg, EX_muxC,MM_muxC, EX_aVal_cond,MM_aVal, EX_wrmem_cond,MM_wrmem, - EX_mem_t,MM_mem_t, + EX_mem_t,MM_mem_t, EX_is_load,MM_is_load, EX_A,MM_A, alu_fwd_B,MM_B, result,MM_result, EX_addr,MM_addr, HI,MM_HI, LO,MM_LO, diff --git a/cMIPS/vhdl/pipestages.vhd b/cMIPS/vhdl/pipestages.vhd index a927519..a0bcc03 100644 --- a/cMIPS/vhdl/pipestages.vhd +++ b/cMIPS/vhdl/pipestages.vhd @@ -89,8 +89,8 @@ entity reg_RF_EX is EX_wrmem: out std_logic; mem_t: in reg4; EX_mem_t: out reg4; - is_load: in std_logic; - EX_is_load: out std_logic; + is_load: in boolean; + EX_is_load: out boolean; A: in reg32; EX_A: out reg32; B: in reg32; @@ -158,6 +158,8 @@ entity reg_EX_MM is MM_wrmem: out std_logic; EX_mem_t: in reg4; MM_mem_t: out reg4; + EX_is_load: in boolean; + MM_is_load: out boolean; EX_A: in reg32; MM_A: out reg32; EX_B: in reg32; @@ -188,22 +190,23 @@ begin MM_aVal <= '1'; elsif rising_edge(clk) then if ld = '0' then - MM_a_rt <= EX_a_rt ; - MM_a_c <= EX_a_c ; - MM_wreg <= EX_wreg ; - MM_muxC <= EX_muxC ; - MM_aVal <= EX_aVal ; - MM_wrmem <= EX_wrmem ; - MM_mem_t <= EX_mem_t ; - MM_A <= EX_A ; - MM_B <= EX_B ; - MM_result <= EX_result ; - MM_addr <= EX_addr ; - MM_HI <= HI ; - MM_LO <= LO ; + MM_a_rt <= EX_a_rt ; + MM_a_c <= EX_a_c ; + MM_wreg <= EX_wreg ; + MM_muxC <= EX_muxC ; + MM_aVal <= EX_aVal ; + MM_wrmem <= EX_wrmem ; + MM_mem_t <= EX_mem_t ; + MM_is_load <= EX_is_load ; + MM_A <= EX_A ; + MM_B <= EX_B ; + MM_result <= EX_result ; + MM_addr <= EX_addr ; + MM_HI <= HI ; + MM_LO <= LO ; MM_alu_move_ok <= EX_alu_move_ok ; - MM_move <= EX_move ; - MM_pc_p8 <= EX_pc_p8 ; + MM_move <= EX_move ; + MM_pc_p8 <= EX_pc_p8 ; end if; end if; end process; -- GitLab