From dacbd5e1f3bfe1828aa5ac1c5c4ab03466df6256 Mon Sep 17 00:00:00 2001
From: Roberto Hexsel <roberto@inf.ufpr.br>
Date: Mon, 24 Aug 2015 16:08:27 -0300
Subject: [PATCH] fixed load delay slot

---
 cMIPS/tests/jr_2.expected     |   5 ++
 cMIPS/tests/jr_2.s            | 102 ++++++++++++++++++++++++++++++++--
 cMIPS/tests/lwFWDsw2.expected |  40 ++++++-------
 cMIPS/tests/lwFWDsw2.s        |   1 +
 cMIPS/vhdl/core.vhd           |  49 +++++++++++-----
 cMIPS/vhdl/pipestages.vhd     |  37 ++++++------
 6 files changed, 178 insertions(+), 56 deletions(-)

diff --git a/cMIPS/tests/jr_2.expected b/cMIPS/tests/jr_2.expected
index 09972f8..6b4c665 100644
--- a/cMIPS/tests/jr_2.expected
+++ b/cMIPS/tests/jr_2.expected
@@ -7,6 +7,7 @@
 00000054
 00000060
 0000006c
+
 0000000c
 00000018
 00000024
@@ -16,3 +17,7 @@
 00000054
 00000060
 0000006c
+ok
+ok
+ok
+ok
diff --git a/cMIPS/tests/jr_2.s b/cMIPS/tests/jr_2.s
index b5f9208..78a6501 100644
--- a/cMIPS/tests/jr_2.s
+++ b/cMIPS/tests/jr_2.s
@@ -31,8 +31,11 @@ snd:	#sw   $31, 0($16)  # $31 <- 0,snd+4
 	##
 	## now let's check stalls for lw r1 ; jr r1
 	##
-	
-trd:	la   $10, loop	# start of loop address
+trd:	la   $20, x_IO_BASE_ADDR	# print out a separator
+        li   $19, '\n'
+        sw   $19, x_IO_ADDR_RANGE($20)
+
+	la   $10, loop	# start of loop address
 	la   $11, addr	# keep it in in memory
 	sw   $10, 0($11)
 	move $8, $zero
@@ -45,20 +48,109 @@ loop:
 	add  $9, $8, $8    # $9  <- 12,24,36,48,60,72,
 	sw   $9, 0($16)
 	slt  $28, $9, $29
-        beq  $28, $0, .L1
+        beq  $28, $0, four
 	nop
 	la   $11, addr	# keep it in in memory
 	lw   $9, 0($11)
 	jr   $9
 	nop
 
-.L1:
-end:	nop
+
+four:	la   $20, x_IO_BASE_ADDR	# print out a separator
+
+	la   $10, f4	# destination address
+	la   $11, addr	# keep it in in memory
+	sw   $10, 0($11)
+	nop
+
+	lw    $12, 0($11)
+	addiu $12, $12, 4
+	addiu $12, $12, -4
+	nop
+	jr $12
+	nop
+	.align 8,0
+
+f4:	li   $19, 'o'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, 'k'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, '\n'
+        sw   $19, x_IO_ADDR_RANGE($20)
+
+
+five:	la   $20, x_IO_BASE_ADDR	# print out a separator
+
+	la   $10, f5	# destination address
+	la   $11, addr	# keep it in in memory
+	sw   $10, 0($11)
+	nop
+
+	lw    $12, 0($11)
+	addiu $12, $12, 4
+	addiu $12, $12, -4
+	jr $12
+	nop
+	.align 8,0
+
+f5:	li   $19, 'o'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, 'k'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, '\n'
+        sw   $19, x_IO_ADDR_RANGE($20)
+
+
+six:	la   $20, x_IO_BASE_ADDR	# print out a separator
+
+	la    $10, f6	# destination address
+	la    $11, addr	# keep it in in memory
+	addiu $10, $10, 4
+	sw    $10, 0($11)
+	nop
+
+	lw    $12, 0($11)
+	addiu $12, $12, -4
+	jr $12
+	nop
+	.align 4,0
+
+f6:	li   $19, 'o'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, 'k'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, '\n'
+        sw   $19, x_IO_ADDR_RANGE($20)
+
+seven:	la   $20, x_IO_BASE_ADDR	# print out a separator
+
+	la    $10, f7	# destination address
+	la    $11, addr	# keep it in in memory
+	sw    $10, 0($11)
+	nop
+
+	lw    $12, 0($11)
+	jr $12
+	nop
+	.align 4,0
+
+f7:	li   $19, 'o'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, 'k'
+        sw   $19, x_IO_ADDR_RANGE($20)
+        li   $19, '\n'
+        sw   $19, x_IO_ADDR_RANGE($20)
+
+
+	
+	.align 4,0
+_end:	nop
 	nop
 	wait
 	nop
 	.end _start
 
+	
 	.data
 	.align 4
 	.space 128
diff --git a/cMIPS/tests/lwFWDsw2.expected b/cMIPS/tests/lwFWDsw2.expected
index 5438082..08575e2 100644
--- a/cMIPS/tests/lwFWDsw2.expected
+++ b/cMIPS/tests/lwFWDsw2.expected
@@ -1,41 +1,41 @@
 00040010
 fffffff6
-00040014
+00000014
 fffffff7
-00040018
+00000018
 fffffff8
-0004001c
+0000001c
 fffffff9
-00040020
+00000020
 fffffffa
-00040024
+00000024
 fffffffb
-00040028
+00000028
 fffffffc
-0004002c
+0000002c
 fffffffd
-00040030
+00000030
 fffffffe
-00040034
+00000034
 ffffffff
-00040038
+00000038
 00000000
-0004003c
+0000003c
 00000001
-00040040
+00000040
 00000002
-00040044
+00000044
 00000003
-00040048
+00000048
 00000004
-0004004c
+0000004c
 00000005
-00040050
+00000050
 00000006
-00040054
+00000054
 00000007
-00040058
+00000058
 00000008
-0004005c
+0000005c
 00000009
-00040060
+00000060
diff --git a/cMIPS/tests/lwFWDsw2.s b/cMIPS/tests/lwFWDsw2.s
index 72119a0..f71dc69 100644
--- a/cMIPS/tests/lwFWDsw2.s
+++ b/cMIPS/tests/lwFWDsw2.s
@@ -28,6 +28,7 @@ snd:	lw   $15, 0($17)          # reload pointer from RAM
 	sw   $15, 0($15)          # store pointer to RAM[i]
 	nop
 	lw   $15, 0($15)          # reload pointer from RAM[i]
+	andi $15, $15, 0x00ff	  # mask off address, keep least sign bits
 	sw   $15, 0($16)          #  and print it out, forwarding $15
 	nop
 	slt  $8,$3,$9             # done?
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 4b1ca94..896e2fc 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -332,7 +332,8 @@ architecture rtl of core is
   signal RF_EX_ld, EX_MM_ld, MM_WB_ld: std_logic;
   signal a_rs,EX_a_rs, a_rt,EX_a_rt,MM_a_rt, a_rd: reg5;
   signal a_c,EX_a_c,MM_a_c,WB_a_c: reg5;
-  signal move,EX_move,MM_move, is_load,EX_is_load : std_logic;
+  signal move,EX_move,MM_move : std_logic;
+  signal is_load,EX_is_load,MM_is_load : boolean;
   signal muxC,EX_muxC,MM_muxC,WB_muxC: reg3;
   signal wreg,EX_wreg_pre,EX_wreg,MM_wreg_cond,MM_wreg,WB_wreg: std_logic;
   signal aVal,EX_aVal,EX_aVal_cond,MM_aVal: std_logic;
@@ -356,7 +357,7 @@ architecture rtl of core is
   signal rd_data_raw, rd_data, WB_rd_data, WB_mem_data: reg32;
   signal MM_B_data, WB_B_data: reg32;
   signal d_addr_pre: reg2;
-  signal jr_stall, br_stall, fwd_lwlr, sw_stall : std_logic;
+  signal jr_stall, br_stall, fwd_lwlr, sw_stall, lw_stall : std_logic;
   signal fwd_mem, WB_addr2: reg2;
 
 
@@ -388,8 +389,8 @@ architecture rtl of core is
          EX_wrmem:   out std_logic;
          mem_t:      in  std_logic_vector;
          EX_mem_t:   out std_logic_vector;
-         is_load:    in  std_logic;
-         EX_is_load: out std_logic;
+         is_load:    in  boolean;
+         EX_is_load: out boolean;
          A:          in  std_logic_vector;
          EX_A:       out std_logic_vector;
          B:          in  std_logic_vector;
@@ -416,6 +417,8 @@ architecture rtl of core is
          MM_wrmem:   out std_logic;
          EX_mem_t:   in  std_logic_vector;
          MM_mem_t:   out std_logic_vector;
+         EX_is_load: in  boolean;
+         MM_is_load: out boolean;
          EX_A:       in  std_logic_vector;
          MM_A:       out std_logic_vector;
          EX_B:       in  std_logic_vector;
@@ -733,7 +736,7 @@ begin
   -- PROGRAM COUNTER AND INSTRUCTION FETCH ------------------
 
   pipe_stall <= rom_stall or ram_stall or jr_stall or br_stall or
-                sw_stall  or tr_stall  or exception_stall;
+                sw_stall  or lw_stall  or tr_stall  or exception_stall;
 
   
   PCload   <= '1' when pipe_stall = '1' else '0';
@@ -920,8 +923,8 @@ begin
       jr_stall <= '1';
       i_dbg_jr_stall := 2;
     elsif ( (funct_word.PCsel = b"11")and       -- 2nd load-delay slot
-            (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") and
-            (MM_aVal = '0') ) then
+            MM_is_load and
+            (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") ) then
       jr_stall <= '1';
       i_dbg_jr_stall := 3;
     else
@@ -932,19 +935,33 @@ begin
   end process RF_JR_STALL;
   
   
+  RF_LD_DELAY_SLOT: process (a_rs,a_rt,EX_a_c,EX_wreg,EX_is_load)
+  begin
+    if ( EX_is_load and
+         (EX_wreg = '0') and (EX_a_c /= b"00000") and
+         ( (EX_a_c =  a_rs)or(EX_a_c = a_rt) ) ) then
+      lw_stall <= '1';
+    else
+      lw_stall <= '0';
+    end if;
+  end process RF_LD_DELAY_SLOT;
+  
+
   RF_SW_STALL: process (ctrl_word,a_rs,EX_a_c,EX_wreg,EX_is_load)
     variable is_store : boolean := false;
   begin
     case ctrl_word.i is
       when LB | LH | LWL | LW | LBU | LHU | LWR =>
-        is_load <= '1';
+        is_load  <= TRUE;
         is_store := FALSE;
       when SB | SH | SW  =>
         is_store := TRUE;
-        is_load <= '0';
-      when others => is_load <= '0'; is_store := FALSE;
+        is_load  <= FALSE;
+      when others =>
+        is_load <= FALSE;
+        is_store := FALSE;
     end case;
-    if ( is_store and (EX_is_load = '1') and
+    if ( is_store and EX_is_load and
          (EX_a_c =  a_rs)and(EX_wreg = '0')and(EX_a_c /= b"00000") ) then
       sw_stall <= '1';
     else
@@ -1382,13 +1399,17 @@ begin
 
   end process U_EX_ADDR_ERR_EXCP; ----------------------------------
 
-  EX_addr <= phy_d_addr;                  -- with TLB  
+
+  -- uncomment this when making use of the TLB CHANGE
+  EX_addr <= phy_d_addr;                -- with TLB
+
+  -- uncomment this when NOT making use of the TLB
+  -- EX_addr <= v_addr;                    -- without TLB  
 
   -- assert ( (phy_d_addr = v_addr) and (EX_aVal = '0') )  -- DEBUG
   --  report "mapping mismatch V:P "&SLV32HEX(v_addr)&":"&SLV32HEX(phy_d_addr);
 
 
-
   EX_wreg <= EX_wreg_pre                  -- movz,movn, move/DO_NOT move
              or ( BOOL2SL(nullify) and not(MM_is_delayslot) );
                                           -- abort wr if prev excep in EX
@@ -1413,7 +1434,7 @@ begin
     port map (clk,rst, EX_MM_ld,
               EX_a_rt,MM_a_rt, EX_a_c,MM_a_c, EX_wreg,MM_wreg,
               EX_muxC,MM_muxC, EX_aVal_cond,MM_aVal, EX_wrmem_cond,MM_wrmem,
-              EX_mem_t,MM_mem_t,
+              EX_mem_t,MM_mem_t, EX_is_load,MM_is_load, 
               EX_A,MM_A, alu_fwd_B,MM_B,
               result,MM_result, EX_addr,MM_addr,
               HI,MM_HI, LO,MM_LO,
diff --git a/cMIPS/vhdl/pipestages.vhd b/cMIPS/vhdl/pipestages.vhd
index a927519..a0bcc03 100644
--- a/cMIPS/vhdl/pipestages.vhd
+++ b/cMIPS/vhdl/pipestages.vhd
@@ -89,8 +89,8 @@ entity reg_RF_EX is
        EX_wrmem:   out std_logic;
        mem_t:      in  reg4;
        EX_mem_t:   out reg4;              
-       is_load:    in  std_logic;
-       EX_is_load: out std_logic;
+       is_load:    in  boolean;
+       EX_is_load: out boolean;
        A:          in  reg32;
        EX_A:       out reg32;
        B:          in  reg32;
@@ -158,6 +158,8 @@ entity reg_EX_MM is
        MM_wrmem:   out std_logic;
        EX_mem_t:   in  reg4;
        MM_mem_t:   out reg4;              
+       EX_is_load: in  boolean;
+       MM_is_load: out boolean;
        EX_A:       in  reg32;
        MM_A:       out reg32;
        EX_B:       in  reg32;
@@ -188,22 +190,23 @@ begin
       MM_aVal  <= '1';
     elsif rising_edge(clk) then
       if ld = '0' then
-        MM_a_rt     <= EX_a_rt   ;
-        MM_a_c      <= EX_a_c    ;
-        MM_wreg     <= EX_wreg   ;
-        MM_muxC     <= EX_muxC   ;
-        MM_aVal     <= EX_aVal   ;
-        MM_wrmem    <= EX_wrmem  ;
-        MM_mem_t    <= EX_mem_t  ;
-        MM_A        <= EX_A      ;
-        MM_B        <= EX_B      ;
-        MM_result   <= EX_result ;
-        MM_addr     <= EX_addr   ;
-        MM_HI       <= HI        ;
-        MM_LO       <= LO        ;
+        MM_a_rt     <= EX_a_rt    ;
+        MM_a_c      <= EX_a_c     ;
+        MM_wreg     <= EX_wreg    ;
+        MM_muxC     <= EX_muxC    ;
+        MM_aVal     <= EX_aVal    ;
+        MM_wrmem    <= EX_wrmem   ;
+        MM_mem_t    <= EX_mem_t   ;
+        MM_is_load  <= EX_is_load ;
+        MM_A        <= EX_A       ;
+        MM_B        <= EX_B       ;
+        MM_result   <= EX_result  ;
+        MM_addr     <= EX_addr    ;
+        MM_HI       <= HI         ;
+        MM_LO       <= LO         ;
         MM_alu_move_ok <= EX_alu_move_ok ;
-        MM_move     <= EX_move   ;        
-        MM_pc_p8    <= EX_pc_p8  ;
+        MM_move     <= EX_move    ;        
+        MM_pc_p8    <= EX_pc_p8   ;
       end if;
     end if;
   end process;
-- 
GitLab