diff --git a/cMIPS/tests/lwFWDsw.s b/cMIPS/tests/lwFWDsw.s
index 242839113f5d8277a3724b135836a354fdcc709b..13662d92ce1d65d5c88ecd8784aa4a75c691e13e 100644
--- a/cMIPS/tests/lwFWDsw.s
+++ b/cMIPS/tests/lwFWDsw.s
@@ -21,6 +21,11 @@ snd:	sw   $3, 4($15)		# mem[i+1] <= count
 	slt  $8, $3, $9		# reached 10 rounds?
         bne  $8, $0, snd	#    no, continue
         nop
+
+	nop
+	nop
+	nop
+	nop
         wait
         nop
 	.end _start
diff --git a/cMIPS/tests/mmu_tlbwi.expected b/cMIPS/tests/mmu_tlbwi.expected
index 582e4e8e1ff93a62b6e86cd4e731b9e902ce641c..c642d6473c336e6a40c8a904a720af81d81f05eb 100644
--- a/cMIPS/tests/mmu_tlbwi.expected
+++ b/cMIPS/tests/mmu_tlbwi.expected
@@ -8,9 +8,9 @@
 00000007
 00000047
 1
-0f000000
-003c0007
-003c0047
+3c000000
+00f00007
+00f00047
 2
 00014000
 00001016
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index 16752497dbaeb26a2f02e04d5453bc4c8f7ef85b..36a866b80c742750ec9b7ef94223fd3110697923 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -44,9 +44,11 @@ package p_MEMORY is
   constant x_INST_MEM_SZ    : reg32   := x"00004000";
   constant x_DATA_BASE_ADDR : reg32   := x"00010000";
   constant x_DATA_MEM_SZ    : reg32   := x"00008000";
-  constant x_IO_BASE_ADDR   : reg32   := x"0F000000";
+  constant x_IO_BASE_ADDR   : reg32   := x"3c000000";
   constant x_IO_MEM_SZ      : reg32   := x"00002000";
   constant x_IO_ADDR_RANGE  : reg32   := x"00000020";
+  constant x_SDRAM_BASE_ADDR : reg32  := x"04000000";
+  constant x_SDRAM_MEM_SZ    : reg32  := x"02000000";
   constant x_EXCEPTION_0000 : reg32   := x"00000130";  -- TLBrefill
   constant x_EXCEPTION_0100 : reg32   := x"00000200";  -- CacheError
   constant x_EXCEPTION_0180 : reg32   := x"00000280";  -- generalExcpHandler
@@ -63,6 +65,9 @@ package p_MEMORY is
   constant DATA_BASE_ADDR  : integer := to_integer(signed(x_DATA_BASE_ADDR));
   constant DATA_MEM_SZ     : integer := to_integer(signed(x_DATA_MEM_SZ));
 
+  constant SDRAM_BASE_ADDR  : integer := to_integer(signed(x_SDRAM_BASE_ADDR));
+  constant SDRAM_MEM_SZ     : integer := to_integer(signed(x_SDRAM_MEM_SZ));
+
   constant IO_BASE_ADDR    : integer := to_integer(signed(x_IO_BASE_ADDR));
   constant IO_MEM_SZ       : integer := to_integer(signed(x_IO_MEM_SZ));
   constant IO_ADDR_RANGE   : integer := to_integer(signed(x_IO_ADDR_RANGE));
diff --git a/cMIPS/vhdl/sdram.vhd b/cMIPS/vhdl/sdram.vhd
index dad1dc439e9fcb27480467dd4f4f99e8abb4d513..4b6191b33dcb28113025ad3f8d234db92a0e780e 100644
--- a/cMIPS/vhdl/sdram.vhd
+++ b/cMIPS/vhdl/sdram.vhd
@@ -56,8 +56,13 @@ entity SDRAM_controller is
         saddr    : out   reg12;         -- ram side address
         sdata    : inout  reg16);       -- ram side data
 
-  constant REFRESH_doit : integer := 704;  -- force a refresh every 704 cycles
+--  constant RESET_INTERVAL : integer := 5000; -- reset after 100us = 5.000*20n
+--  constant REFRESH_INTERVAL : integer := 704;-- do a refresh every 704 cycles
+  constant RESET_INTERVAL : integer := 5; -- reset after 100us = 5.000*20n
+  constant REFRESH_INTERVAL : integer := 7;-- do a refresh every 704 cycles
 
+
+  
   subtype cmd_index is integer range 0 to 13;
   
   constant cDSL  : cmd_index := 0;
@@ -85,10 +90,40 @@ entity SDRAM_controller is
   end record;
 
   type t_cmd_mem is array (0 to 12) of t_cmd_type;
-
   
 end entity SDRAM_controller;
 
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- fake SDRAM controller for Macnica's development board Mercurio IV
+--       IS42S16320B, 512Mbit SDRAM, 146MHz, 32Mx16bit
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+architecture fake of SDRAM_controller is
+begin
+  
+  rdy <= '1';
+  hDout <= (others => 'X');
+
+  cke      <= '1';
+  scs      <= '1';
+  ras      <= '1';
+  cas      <= '1';
+  we       <= '1';
+  dqm0     <= '1';
+  dqm1     <= '1';
+  ba0      <= '1';
+  ba1      <= '1';
+  saddr    <= (others => 'X');
+  sdata    <= (others => 'X');
+
+end architecture fake;
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- real SDRAM controller for Macnica's development board Mercurio IV
+--       IS42S16320B, 512Mbit SDRAM, 146MHz, 32Mx16bit
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 architecture simple of SDRAM_controller is
 
   constant cmd_table : t_cmd_mem := (  -- page 9
@@ -133,7 +168,7 @@ architecture simple of SDRAM_controller is
      st_idle);                          -- 54
   
   signal curr_st, next_st : ctrl_state;
-  signal ctrl_dbg_st : integer;         -- for debugging only
+  signal ctrl_dbg_st, cmd_dbg : integer;      -- for debugging only
   
   signal reset_done, same_row, do_refresh, refresh_done : boolean := FALSE;
   signal is_accs, is_rd, is_wr : boolean := FALSE;
@@ -153,8 +188,13 @@ begin  -- simple
   cas  <= command.cas;
   we   <= command.we;
 
+  cmd_dbg <= command.cmd;               -- DEBUG only
+  
   saddr(10) <= addr(10) when command.cmd = cACT else
                command.a10;
+
+  saddr(9 downto 0) <= b"1000100000" when command.cmd = cMRS else
+                       addr(9 downto 0);
   
   U_address: registerN  generic map (26, b"00"&x"000000")
     port map (clk2x, rst, hcs, haddr, addr);
@@ -344,7 +384,7 @@ begin  -- simple
         elsif is_rd then
           next_st <= st_rdcol;
         elsif is_wr then
-          next_st <= st_rdcol;
+          next_st <= st_wrcol;
         else
           next_st <= st_idle;
         end if;
@@ -352,7 +392,7 @@ begin  -- simple
         assert false report "CTRL stateMachine broken"
           & integer'image(ctrl_state'pos(curr_st)) severity failure;
     end case;
-  end process U_CTRL_st_transitions;   ------------------------------------
+  end process U_CTRL_st_transitions;   ---------------------------
 
 
   
@@ -395,13 +435,17 @@ begin  -- simple
       do_refresh <= FALSE;
       cnt := 0;
     elsif rising_edge(clk2x) then
-      if cnt > REFRESH_doit then
+      if cnt > REFRESH_INTERVAL then
         if refresh_done then
           do_refresh <= FALSE;
           cnt := 0;
         else
           do_refresh <= TRUE;           -- add some hysteresis
-          cnt := cnt + 1;               --   to accomodate slow commands
+          if cnt = 1023 then
+            cnt := 0;
+          else
+            cnt := cnt + 1;               --   to accomodate slow commands
+          end if;
         end if;
       else
         do_refresh <= FALSE;
@@ -412,17 +456,19 @@ begin  -- simple
   
 
   -- do wait for 100us after reset
-  U_rst_100us: process
-    variable cnt : integer range 0 to 16383:= 0;
-  begin  -- process clk2x
-    reset_done <= FALSE;
-    wait until rst = '1';
-    cnt := 0;
-    wait until rising_edge(clk2x);
-    cnt := cnt + 1;
-    if cnt = 10000 then               -- 100us elapsed
-      reset_done <= TRUE;
-      wait;
+  U_rst_100us: process(rst, clk2x)
+    variable cnt : integer range 0 to (8*1024 - 1):= 0;
+  begin
+    if rst = '0' then
+      reset_done <= FALSE;
+      cnt := 0;
+    elsif rising_edge(clk2x) then
+      if cnt >= RESET_INTERVAL then     -- 100us elapsed
+        reset_done <= TRUE;
+        cnt := 0;
+      else
+        cnt := cnt + 1;
+      end if;
     end if;
   end process U_rst_100us;
  
@@ -431,30 +477,3 @@ end simple;
 -- ---------------------------------------------------------------------
     
 
-
--- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--- fake SDRAM controller for Macnica's development board Mercurio IV
---       IS42S16320B, 512Mbit SDRAM, 146MHz, 32Mx16bit
--- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-architecture fake of SDRAM_controller is
-begin
-  
-  rdy <= '1';
-  hDout <= (others => 'X');
-
-  cke      <= '1';
-  scs      <= '1';
-  ras      <= '1';
-  cas      <= '1';
-  we       <= '1';
-  dqm0     <= '1';
-  dqm1     <= '1';
-  ba0      <= '1';
-  ba1      <= '1';
-  saddr    <= (others => 'X');
-  sdata    <= (others => 'X');
-
-end architecture fake;
-
-
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 226e0c1025876416cb804f2fbc34f68968b57b0e..2ccd1757bf2aeab841191de7f022fc87d4757470 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -187,6 +187,14 @@ architecture TB of tb_cMIPS is
           dev_select  : out std_logic_vector);
   end component ram_addr_decode;
 
+  component sdram_addr_decode is
+    port (rst         : in  std_logic;
+          cpu_d_aVal  : in  std_logic;
+          addr        : in  std_logic_vector;
+          aVal        : out std_logic;
+          dev_select  : out std_logic_vector);
+  end component sdram_addr_decode;
+
   component io_addr_decode is
     port (clk         : in  std_logic;
           rst         : in  std_logic;
@@ -373,6 +381,26 @@ architecture TB of tb_cMIPS is
     outclk : OUT STD_LOGIC); 
   end component mf_altclkctrl;
 
+  
+  -- use fake / behavioral
+    for U_I_CACHE : I_cache use entity work.I_cache(fake);
+
+  -- use simulation / rtl
+  for U_ROM : ROM         use entity work.ROM(simulation);
+
+  -- use simulation / rtl
+  for U_RAM : RAM         use entity work.RAM(simulation);
+
+  -- use fake / behavioral
+  for U_D_CACHE : D_cache use entity work.D_cache(fake);
+
+  -- use fake / rtl
+  for U_FPU: FPU          use entity work.FPU(rtl);
+
+  -- use fake / simple
+  for U_SDRAMc : SDRAM_controller
+                          use entity work.SDRAM_controller(simple);
+
 
 
   
@@ -384,9 +412,11 @@ architecture TB of tb_cMIPS is
   signal cpu_i_aVal, cpu_i_wait, wr, cpu_d_aVal, cpu_d_wait : std_logic;
   signal nmi, i_busError, d_busError : std_logic;
   signal irq : reg6;
-  signal inst_aVal, inst_wait, rom_rdy : std_logic := '1';
+  signal inst_aVal, inst_wait, rom_rdy : std_logic;
   signal data_aVal, data_wait, ram_rdy, mem_wr : std_logic;
-  signal cpu_xfer, mem_xfer, dev_select, dev_select_ram, dev_select_io : reg4;
+  signal sdram_aVal, sdram_rdy, sdram_wr : std_logic;
+  signal cpu_xfer, mem_xfer : reg4;
+  signal dev_select, dev_select_ram, dev_select_io, dev_select_sdram : reg4;
   signal io_print_sel   : std_logic := '1';
   signal io_stdout_sel  : std_logic := '1';
   signal io_stdin_sel   : std_logic := '1';
@@ -401,7 +431,7 @@ architecture TB of tb_cMIPS is
   signal io_lcd_sel,     io_lcd_wait     : std_logic := '1';
   signal d_cache_d_out, stdin_d_out, read_d_out, counter_d_out : reg32;
   signal fpu_d_out, uart_d_out, sstats_d_out, keybd_d_out : reg32;
-  signal lcd_d_out : reg32;
+  signal lcd_d_out, sdram_d_out : reg32;
 
   signal counter_irq : std_logic;
   signal io_wait, not_waiting : std_logic;
@@ -428,7 +458,6 @@ architecture TB of tb_cMIPS is
 
 
   signal        hcs      :     std_logic;     -- host side chip select (=0)
-  signal   sdram_rdy    :     std_logic;     -- host side chip select (=0)
   signal          haddr    :     reg26;         -- host side address
   signal          hDinp    :     reg32;         -- host side data input
   signal          hDout    :    reg32;         -- host side data output
@@ -525,7 +554,10 @@ begin  -- TB
   U_DATA_ADDR_DEC: ram_addr_decode
     port map (rst, cpu_d_aVal, d_addr,data_aVal, dev_select_ram);
 
-  dev_select <= dev_select_io or dev_select_ram;
+  U_SDRAM_ADDR_DEC: sdram_addr_decode
+    port map (rst, cpu_d_aVal, d_addr,sdram_aVal, dev_select_sdram);
+
+  dev_select <= dev_select_io or dev_select_ram; --  or dev_select_sdram;
   
   with dev_select select
     cpu_data_inp <= d_cache_d_out   when b"0001",
@@ -537,6 +569,7 @@ begin  -- TB
                     sstats_d_out    when b"1010",
                     keybd_d_out     when b"1100",
                     lcd_d_out       when b"1101",
+--                    sdram_d_out     when b"1110",
                     (others => 'X') when others;
   
   U_D_CACHE: d_cache
@@ -552,7 +585,8 @@ begin  -- TB
               mem_addr, datram_out, datram_inp, mem_xfer, dump_ram);
 
   U_SDRAMc: SDRAM_controller port map 
-    (rst, clk,hcs,sdram_rdy,wr,cpu_xfer,haddr,hDinp,hDout,
+    (rst, clk, sdram_aVal, sdram_rdy, wr, cpu_xfer, d_addr(25 downto 0),
+     hDinp,hDout,
      sdcke,sdscs,sdras,sdcas,sdwe,sddqm0,sddqm1,sdba0,sdba1,sdaddr,sddata); 
 
   
@@ -859,6 +893,7 @@ end architecture behavioral;
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
+
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- I/O address decoding 
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -996,31 +1031,98 @@ end architecture behavioral;
 
 
 
--- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-use work.all;
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- SDRAM address decoding 
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+use work.p_wires.all;
+use work.p_memory.all;
 
-configuration CFG_TB of TB_CMIPS is
-  for TB
+entity sdram_addr_decode is               -- CPU side triggers access
+  port (rst         : in  std_logic;
+        cpu_d_aVal  : in  std_logic;    -- CPU data addr valid (active=0)
+        addr        : in  reg32;        -- CPU address
+        aVal        : out std_logic;    -- data address (act=0)
+        dev_select  : out reg4);        -- select input to CPU
+  constant LO_ADDR  : integer := log2_ceil(SDRAM_BASE_ADDR);
+  constant HI_ADDR  : integer := log2_ceil(SDRAM_BASE_ADDR + SDRAM_MEM_SZ - 1);
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
+end entity sdram_addr_decode;
+
+architecture behavioral of sdram_addr_decode is
   
-  -- use fake / behavioral
-    for U_I_CACHE : I_cache use entity work.I_cache(fake); end for;
+  constant all_0  : std_logic_vector(31 downto 0)         := (others=>'0');
+  
+  constant a_hi   : std_logic_vector(31 downto HI_ADDR+1) := (others=>'0');
+  constant a_lo   : std_logic_vector(LO_ADDR-1 downto 0)  := (others=>'0');
+  constant a_bits : std_logic_vector(HI_ADDR downto LO_ADDR) := (others=>'1');
+  constant a_mask : std_logic_vector := a_hi & a_bits & a_lo;
 
-  -- use simulation / rtl
-  for U_ROM : ROM         use entity work.ROM(simulation); end for;
+  constant LO_RAM : natural := 0;
+  constant HI_RAM : natural := log2_ceil(SDRAM_MEM_SZ-1);
+  constant r_hi   : std_logic_vector(31 downto HI_RAM+1)   := (others=>'1');
+  constant r_lo   : std_logic_vector(HI_RAM downto LO_RAM) := (others=>'0');
+  constant r_mask : std_logic_vector := r_hi & r_lo;
+    
+  signal in_range : boolean;
 
-  -- use simulation / rtl
-  for U_RAM : RAM         use entity work.RAM(simulation); end for;
+  constant SDRAM_ADDR_BOTTOM : natural :=
+        to_integer(signed(x_SDRAM_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)));
+  constant SDRAM_ADDR_RANGE : natural :=
+    (to_integer(signed(x_SDRAM_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)))
+     +
+     to_integer(signed(x_SDRAM_MEM_SZ(HI_SEL_BITS downto LO_SEL_BITS))));
+  constant SDRAM_ADDR_TOP : natural := SDRAM_ADDR_BOTTOM + SDRAM_ADDR_RANGE;
+  
+begin
 
-  -- use fake / behavioral
-  for U_D_CACHE : D_cache use entity work.D_cache(fake); end for;
+  -- this is ONLY acceptable for simulations;
+  -- computing these differences is TOO expensive for synthesis
+  in_range <= ( (to_integer(signed(addr(HI_SEL_BITS downto LO_SEL_BITS)))
+                 >= 
+                 SDRAM_ADDR_BOTTOM)
+                and
+                (to_integer(signed(addr(HI_SEL_BITS downto LO_SEL_BITS)))
+                 <
+                 SDRAM_ADDR_TOP)
+              );
+  
+  aVal <= '0' when (cpu_d_aVal = '0' and in_range) else '1';
 
-  -- use fake / rtl
-  for U_FPU: FPU          use entity work.FPU(rtl); end for;
-    
-  -- use fake / simple
-  for U_SDRAMc : SDRAM_controller
-                          use entity work.SDRAM_controller(fake);  end for;
+  dev_select <= b"1110" when (cpu_d_aVal = '0' and in_range) else b"0000";
 
+  assert true --  cpu_d_aVal = '1'
+    report  "e "  & SLV32HEX(addr) & 
+    " addr " & SLV2str(addr(15 downto 0)) & LF & 
+    " LO_AD " & integer'image(LO_ADDR) &
+    " HI_AD " & integer'image(HI_ADDR) &
+    " a_hi "    & SLV2STR(a_hi) &
+    " a_lo "    & SLV2STR(a_lo) &
+    " a_bits "  & SLV2STR(a_bits) &
+    " a_mask "  & SLV32HEX(a_mask) & LF &
+    " LO_RAM " & integer'image(LO_RAM) &
+    " HI_RAM " & integer'image(HI_RAM) &
+    " r_hi "    & SLV2STR(r_hi) &
+    " r_lo "    & SLV2STR(r_lo) &
+    " r_mask "  & SLV32HEX(r_mask)
+    severity NOTE;
+  
+end architecture behavioral;
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+use work.all;
+
+configuration CFG_TB of TB_CMIPS is
+  for TB
   end for;
 end configuration CFG_TB;
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++