From 1b3be08babd0ab42f2c564f4bbf1fe0d2f1989b1 Mon Sep 17 00:00:00 2001
From: Roberto Hexsel <roberto@inf.ufpr.br>
Date: Fri, 29 May 2015 21:41:30 -0300
Subject: [PATCH] faster memory interface

---
 cMIPS/vhdl/core.vhd     | 169 ++++++++++++++++++++++------------------
 cMIPS/vhdl/tb_cMIPS.vhd |   4 +-
 2 files changed, 94 insertions(+), 79 deletions(-)

diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 750ebf0..8e173c8 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -845,7 +845,7 @@ begin
 
       -- simulation aborted by instruction "wait N"
       assert not(exception = exWAIT and syscall_n /= x"80000")
-        report LF & "INVALID REFERENCE at PC="& SLV32HEX(EPC) &
+        report LF & "INVALID REFERENCE at EPC="& SLV32HEX(EPC) &
         " opc="& SLV2STR(opcode) & " fun=" & SLV2STR(func) &
         " instr=" & SLV32HEX(RF_instruction) & 
         LF & "SIMULATION ABORTED AT EXCEPTION HANDLER;"
@@ -935,57 +935,50 @@ begin
   begin
     br_stall <= '0';
 
-    if ( (is_branch = '1') and          -- forward_A:
+    if ( (is_branch = '1') and          -- forward_A
          (EX_wreg = '0') and (EX_a_c = a_rs) and (EX_a_c /= b"00000") ) then
       br_stall <= '1';
       eq_fwd_A <= regs_A;
-    elsif ((MM_wreg = '0') and (MM_a_c = a_rs) and (MM_a_c /= b"00000")) then
-      if (MM_aVal = '0') then    -- LW load-delay slot
-        if (is_branch = '1') then
-          br_stall <= '1';
-        end if;
+    elsif ( (MM_wreg = '0') and (MM_a_c = a_rs) and (MM_a_c /= b"00000") ) then
+      if ( (MM_aVal = '0') and (is_branch = '1') ) then   -- LW load-delay slot
+        br_stall <= '1';
         eq_fwd_A <= regs_A;
-      else    -- non-LW
-        if MM_mfc0 /= '1' then
-          eq_fwd_A <= MM_result;
-        else
-          eq_fwd_A <= MM_cop0_val;
-        end if;
+      elsif MM_mfc0 = '1' then          -- non-LW
+        eq_fwd_A <= MM_cop0_val;
+      else
+        eq_fwd_A <= MM_result; 
       end if;
     else
       eq_fwd_A <= regs_A;
     end if;
 
-
-    if ( (is_branch = '1') and          -- forward_B:
+    if ( (is_branch = '1') and          -- forward_B
          (EX_wreg = '0') and (EX_a_c = a_rt) and (EX_a_c /= b"00000") ) then
       br_stall <= '1';
       eq_fwd_B <= regs_B;
-    elsif ((MM_wreg = '0') and (MM_a_c = a_rt) and (MM_a_c /= b"00000")) then
-      if (MM_aVal = '0') then    -- LW load-delay slot
-        if (is_branch = '1') then
-          br_stall <= '1';
-        end if;
+    elsif ( (MM_wreg = '0') and (MM_a_c = a_rt) and (MM_a_c /= b"00000") ) then
+      if ( (MM_aVal = '0') and (is_branch = '1') ) then   -- LW load-delay slot
+        br_stall <= '1';
         eq_fwd_B <= regs_B;
-      else    -- non-LW
-        if MM_mfc0 /= '1' then
-          eq_fwd_B <= MM_result;
-        else
-          eq_fwd_B <= MM_cop0_val;
-        end if;
+      elsif MM_mfc0 = '1' then          -- non-LW
+        eq_fwd_B <= MM_cop0_val;
+      else
+        eq_fwd_B <= MM_result;
       end if;
     else
       eq_fwd_B <= regs_B;
     end if;
+
   end process RF_FORWARDING_BRANCH;
 
+  
   br_equal    <= (eq_fwd_A = eq_fwd_B);
   br_negative <= (eq_fwd_A(31) = '1');
   br_eq_zero  <= (eq_fwd_A = x"00000000");
   
 
   RF_BR_tgt_select: process (br_equal,br_negative,br_eq_zero,
-                        ctrl_word,rimm_word) 
+                             ctrl_word,rimm_word) 
     variable branch_type, regimm_br_type : t_comparison;
     variable i_br_opr : reg2;
   begin
@@ -995,26 +988,26 @@ begin
     i_br_opr := b"01";          -- assume not taken, PC+4 + 4 (delay slot)
     case branch_type is
       when cNOP =>              -- no branch, PC+4
-        i_br_opr := b"00";      -- x"00000000";
+        i_br_opr := b"00";
       when cEQU =>              -- beq
         if br_equal then i_br_opr := b"10";  -- br_target;
         end if;
       when cNEQ =>              -- bne
-        if (not br_equal) then i_br_opr := b"10";  -- br_target;
+        if not(br_equal) then i_br_opr := b"10";  -- br_target;
         end if;
       when cLEZ =>
         if (br_negative or br_eq_zero) then i_br_opr := b"10";  -- br_target;
         end if;
       when cGTZ =>
-        if not(br_negative or br_eq_zero) then i_br_opr := b"10";  -- br_tgt;
+        if not(br_negative or br_eq_zero) then i_br_opr := b"10";  -- br_target;
         end if;
       when cOTH =>              -- bltz,blez,bgtz,bgez
         case regimm_br_type is
           when cLTZ =>
-            if (br_negative) then i_br_opr := b"10";  -- br_target;
+            if br_negative then i_br_opr := b"10";  -- br_target;
             end if;
           when cGEZ =>
-            if (not br_negative) then i_br_opr := b"10";  -- br_target;
+            if not(br_negative) then i_br_opr := b"10";  -- br_target;
             end if;
           when others => 
             i_br_opr := b"00";    -- x"00000000";
@@ -1033,7 +1026,7 @@ begin
 
   -- branch target computation is in the citical path; add early, select late
   br_addend <= displ32(29 downto 0) & b"00";
-  U_BR_tgt_pl_4:     mf_alt_add_4 port map (RF_PCincd, br_tgt_pl4 );
+  U_BR_tgt_pl_4:     mf_alt_add_4 port map (RF_PCincd, br_tgt_pl4);
   U_BR_tgt_pl_displ: mf_alt_adder port map (RF_PCincd, br_addend, br_tgt_displ);
     
   with br_opr select
@@ -1394,13 +1387,49 @@ begin
   rd_data_raw <= data_inp when (MM_wrmem = '1' and MM_aVal = '0') else
                  (others => 'X');
   
-  d_addr <= d_addr_pre;  -- without TLB
-
+  d_addr <= d_addr_pre;
 
-  MM_MEM_INTERFACE: process(MM_mem_t,MM_aVal,MM_wrmem, MM_addr, rd_data_raw)
+  MM_MEM_CTRL_INTERFACE: process(MM_mem_t, MM_aVal, MM_addr)
     variable i_d_addr : reg32;
-    variable bytes_read : reg32;
     variable i_byte_sel : reg4;
+  begin
+
+    case MM_mem_t(1 downto 0) is                      -- xx,by,hf,wd
+      when b"11" =>
+        i_byte_sel := b"1111";                        -- LW, SW, LWL, LWR
+        i_d_addr   := MM_addr(31 downto 2) & b"00";   -- align reference
+        
+      when b"10" =>
+        i_d_addr     := MM_addr(31 downto 1) & '0';   -- align reference
+        if MM_addr(1) = '0' then                      -- LH*, SH
+          i_byte_sel := b"0011";
+        else
+          i_byte_sel := b"1100";
+        end if;
+
+      when b"01" =>                                   -- LB*, SB
+        i_d_addr := MM_addr;
+        case MM_addr(1 downto 0) is
+          when b"00"  => i_byte_sel := b"0001";
+          when b"01"  => i_byte_sel := b"0010";
+          when b"10"  => i_byte_sel := b"0100";
+          when others => i_byte_sel := b"1000";
+        end case;
+        
+      when others =>
+        i_d_addr   := (others => 'X');  -- MM_addr;
+        i_byte_sel := b"0000";
+
+    end case;
+
+    d_addr_pre <= i_d_addr;
+    b_sel      <= i_byte_sel;
+
+  end process MM_MEM_CTRL_INTERFACE; ---------------------------------
+
+
+  MM_MEM_DATA_INTERFACE: process(MM_mem_t, MM_addr, rd_data_raw)
+    variable bytes_read : reg32;
     variable i_byte : reg8;
     variable i_half : reg16;
     constant c_24_ones  : reg24 := b"111111111111111111111111";
@@ -1411,17 +1440,12 @@ begin
 
     case MM_mem_t(1 downto 0) is  -- 10:xx,by,hf,wd
       when b"11" =>
-        i_byte_sel := b"1111";              -- LW, SW, LWL, LWR
         bytes_read := rd_data_raw;
-        i_d_addr   := MM_addr(31 downto 2) & b"00";   -- align reference
         
       when b"10" =>
-        i_d_addr     := MM_addr(31 downto 1) & '0' ;    -- align reference
         if MM_addr(1) = '0' then                      -- LH*, SH
-          i_byte_sel := b"0011";
           i_half     := rd_data_raw(15 downto 0);
         else
-          i_byte_sel := b"1100";
           i_half     := rd_data_raw(31 downto 16);
         end if;
         if MM_mem_t(2) = '1' and i_half(15) = '1' then  -- mem_t(2):signed=1
@@ -1431,16 +1455,11 @@ begin
         end if;
 
       when b"01" =>                                     -- LB*, SB
-        i_d_addr := MM_addr;
         case MM_addr(1 downto 0) is
-          when b"00"  => i_byte_sel := b"0001";
-                         i_byte     := rd_data_raw(7  downto  0);
-          when b"01"  => i_byte_sel := b"0010";
-                         i_byte     := rd_data_raw(15 downto  8);
-          when b"10"  => i_byte_sel := b"0100";
-                         i_byte     := rd_data_raw(23 downto 16);
-          when others => i_byte_sel := b"1000";
-                         i_byte     := rd_data_raw(31 downto 24);
+          when b"00"  => i_byte := rd_data_raw(7  downto  0);
+          when b"01"  => i_byte := rd_data_raw(15 downto  8);
+          when b"10"  => i_byte := rd_data_raw(23 downto 16);
+          when others => i_byte := rd_data_raw(31 downto 24);
         end case;
         if MM_mem_t(2) = '1' and i_byte(7) = '1' then -- mem_t(2):signed=1
           bytes_read := c_24_ones  & i_byte;
@@ -1449,19 +1468,15 @@ begin
         end if;
         
       when others =>
-        i_d_addr   := "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";  -- MM_addr;
-        i_byte_sel := b"0000";
         bytes_read := (others => 'X');
 
     end case;
 
-    d_addr_pre <= i_d_addr;
-
-    b_sel    <= i_byte_sel;
     rd_data  <= bytes_read;
 
-  end process MM_MEM_INTERFACE; ---------------------------------
+  end process MM_MEM_DATA_INTERFACE; ---------------------------------
 
+  
   -- forwarding for LW -> SW 
   MM_FORWARDING_MEM: process (MM_aVal,MM_wrmem,MM_a_rt,WB_a_c,WB_wreg,WB_C,MM_B)
     variable f_m: reg2;
@@ -1571,7 +1586,7 @@ begin
     WB_LO        when b"101",           -- MFLO
     WB_cop0_val  when b"110",           -- from COP0 registers
     (x"0000000" & b"000" & WB_LLbit) when b"111",  -- from LLbit
-    x"00000000"  when others;           -- invalid selection
+    (others => 'X') when others;           -- invalid selection
 
   --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
   -- end of data pipeline 
@@ -2684,7 +2699,7 @@ begin
                  PC(VA_HI_BIT downto VA_LO_BIT);
 
 
-  -- TLB entry 0 -- initialized to 1st page of ROM
+  -- TLB entry 0 -- initialized to 1st,2nd pages of ROM
   --   this mapping must be pinned down at all times (Wired >= 2, see next entry)
   
   MMU_TAG0: register32 generic map(MMU_ini_tag_ROM0)
@@ -2730,14 +2745,14 @@ begin
 
 
   
-  -- TLB entry 2 -- initialized to 3rd page of ROM
+  -- TLB entry 2 -- initialized to 3rd,4th pages of ROM
   
-  MMU_TAG2: register32 generic map(MMU_ini_tag_ROM4)
+  MMU_TAG2: register32 generic map(MMU_ini_tag_ROM2)
     port map (clk, rst, tlb_tag2_updt, tlb_tag_inp, tlb_tag2);
 
-  MMU_DAT2_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM4)  -- d=1,v=1,g=1
+  MMU_DAT2_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM2)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat2_updt, tlb_dat0_inp, tlb_dat2_0);
-  MMU_DAT2_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM5)  -- d=1,v=1,g=1
+  MMU_DAT2_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM3)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat2_updt, tlb_dat1_inp, tlb_dat2_1);
 
   hit2_pc <= TRUE when (tlb_tag2(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
@@ -2752,14 +2767,14 @@ begin
 
 
 
-  -- TLB entry 3 -- initialized to 4th page of ROM
+  -- TLB entry 3 -- initialized to 5th,6th pages of ROM
   
-  MMU_TAG3: register32 generic map(MMU_ini_tag_ROM6)
+  MMU_TAG3: register32 generic map(MMU_ini_tag_ROM4)
     port map (clk, rst, tlb_tag3_updt, tlb_tag_inp, tlb_tag3);
 
-  MMU_DAT3_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM6)  -- d=1,v=1,g=1
+  MMU_DAT3_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM5)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat3_updt, tlb_dat0_inp, tlb_dat3_0);
-  MMU_DAT3_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM7)  -- d=1,v=1,g=1
+  MMU_DAT3_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM6)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat3_updt, tlb_dat1_inp, tlb_dat3_1);
 
   hit3_pc <= TRUE when (tlb_tag3(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
@@ -2774,7 +2789,7 @@ begin
 
 
   
-  -- TLB entry 4 -- initialized to 1st page of RAM
+  -- TLB entry 4 -- initialized to 1st,2nd pages of RAM
 
   MMU_TAG4: register32 generic map(MMU_ini_tag_RAM0)
     port map (clk, rst, tlb_tag4_updt, tlb_tag_inp, tlb_tag4);
@@ -2796,7 +2811,7 @@ begin
 
 
   
-  -- TLB entry 5 -- initialized to 2nd page of RAM
+  -- TLB entry 5 -- initialized to 3rd,4th pages of RAM
   
   MMU_TAG5: register32 generic map(MMU_ini_tag_RAM2)
     port map (clk, rst, tlb_tag5_updt, tlb_tag_inp, tlb_tag5);
@@ -2818,14 +2833,14 @@ begin
 
 
 
-  -- TLB entry 6 -- initialized to top of RAM =  stack
+  -- TLB entry 6 -- initialized to 5th,6th pages of RAM
   
-  MMU_TAG6: register32 generic map(MMU_ini_tag_RAM6)
+  MMU_TAG6: register32 generic map(MMU_ini_tag_RAM4)
     port map (clk, rst, tlb_tag6_updt, tlb_tag_inp, tlb_tag6);
 
-  MMU_DAT6_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM6)  -- d=1,v=1,g=1
+  MMU_DAT6_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM4)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat6_updt, tlb_dat0_inp, tlb_dat6_0);
-  MMU_DAT6_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM7)  -- d=1,v=1,g=1
+  MMU_DAT6_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM5)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat6_updt, tlb_dat1_inp, tlb_dat6_1);
 
   hit6_pc <= TRUE when (tlb_tag6(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
@@ -2839,14 +2854,14 @@ begin
              else FALSE;
 
 
-  -- TLB entry 7 -- initialized to 3rd page of ROM  
+  -- TLB entry 7 -- initialized to 7th,8th pages of RAM = stack
   
-  MMU_TAG7: register32 generic map(MMU_ini_tag_ROM2)
+  MMU_TAG7: register32 generic map(MMU_ini_tag_RAM6)
     port map (clk, rst, tlb_tag7_updt, tlb_tag_inp, tlb_tag7);
 
-  MMU_DAT7_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM2)  -- d=1,v=1,g=1
+  MMU_DAT7_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM6)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat7_updt, tlb_dat0_inp, tlb_dat7_0);
-  MMU_DAT7_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM3)  -- d=1,v=1,g=1
+  MMU_DAT7_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM7)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat7_updt, tlb_dat1_inp, tlb_dat7_1);
 
   hit7_pc <= TRUE when (tlb_tag7(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 9ac9957..0060278 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -522,7 +522,7 @@ begin  -- TB
     port map (rst, clk, mem_i_sel,rom_rdy, phi3, mem_i_addr,datrom);
 
   U_IO_ADDR_DEC: io_addr_decode
-    port map (phi0,rst, cpu_d_aVal, d_addr, dev_select_io,
+    port map (phi0, rst, cpu_d_aVal, d_addr, dev_select_io,
               io_print_sel, io_stdout_sel, io_stdin_sel,io_read_sel, 
               io_write_sel, io_counter_sel, io_fpu_sel, io_uart_sel,
               io_sstats_sel, io_7seg_sel, io_keys_sel, io_lcd_sel,
@@ -557,7 +557,7 @@ begin  -- TB
 
   U_RAM: simul_RAM generic map ("data.bin", "dump.data")
   -- U_RAM: fpga_RAM generic map ("data.bin", "dump.data")
-    port map (rst, clk, mem_d_sel, ram_rdy, mem_wr, phi3,
+    port map (rst, clk, mem_d_sel, ram_rdy, mem_wr, phi1,
               mem_addr, datram_out, datram_inp, mem_xfer, dump_ram);
   
   U_read_inp: read_data_file generic map ("input.data")
-- 
GitLab