diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 133163c875e303020ea26da81a0f0085391435fa..dbbb8e592f6c484e882da256aa17365b208c141a 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -115,10 +115,6 @@ architecture rtl of core is
          MM_tlb_excp_type: out exception_type;
          EX_tlb_stage_MM:  in  boolean;
          MM_tlb_stage_MM:  out boolean;
-         EX_nmi:           in  std_logic;
-         MM_nmi:           out std_logic;       
-         EX_interrupt:     in  std_logic;
-         MM_interrupt:     out std_logic;
          EX_int_req:       in  reg8;
          MM_int_req:       out reg8;
          EX_is_SC:         in  boolean;
@@ -146,7 +142,7 @@ architecture rtl of core is
   end component reg_excp_MM_WB;
 
   signal annul_1, annul_2, annul_twice : std_logic;
-  signal interrupt,MM_interrupt, exception_stall : std_logic;
+  signal interrupt, exception_stall : std_logic;
   signal exception_taken, interrupt_taken : std_logic;
   signal nullify_fetch, nullify, MM_nullify : boolean;
   signal addrError, MM_addrError, abort_ref, MM_ll_sc_abort : boolean;
@@ -160,7 +156,6 @@ architecture rtl of core is
   signal LL_update, LL_SC_abort, LL_SC_differ: std_logic;
   signal EX_trapped, MM_trapped, EX_ovfl, trap_taken: boolean;
   signal int_req, MM_int_req: reg8;
-  signal EX_nmi,MM_nmi : std_logic;
   signal can_trap,EX_can_trap : reg2;
   signal is_trap, tr_signed, tr_stall: std_logic;
   signal tr_is_equal, tr_less_than: std_logic;
@@ -364,8 +359,8 @@ architecture rtl of core is
   -- data memory --
   signal rd_data_raw, rd_data, WB_rd_data, WB_mem_data: reg32;
   signal MM_B_data, WB_B_data: reg32;
-  signal d_addr_pre: reg2;
-  signal jr_stall, br_stall, fwd_lwlr, sw_stall, lw_stall : std_logic;
+  signal jr_stall, br_stall, sw_stall, lw_stall : std_logic;
+  signal fwd_lwlr : boolean;
   signal fwd_mem, WB_addr2: reg2;
 
 
@@ -918,7 +913,8 @@ begin
   
   j_target <= RF_PCincd(31 downto 28) & RF_instruction(25 downto 0) & b"00";
 
-  RF_JR_STALL: process (funct_word,a_rs,EX_a_c,MM_a_c,EX_wreg,MM_wreg)
+  RF_JR_STALL: process (funct_word,a_rs,EX_a_c,MM_a_c,EX_wreg,MM_wreg,
+                        MM_is_load)
     variable i_dbg_jr_stall : integer := 0;  -- debug only
   begin
     if ( (funct_word.PCsel = b"11")and          -- load-delay slot
@@ -1482,9 +1478,7 @@ begin
   rd_data_raw <= data_inp when (MM_wrmem = '1' and MM_aVal = '0') else
                  (others => 'X');
   
-  d_addr <= MM_addr(31 downto 2) & d_addr_pre;
-
-  MM_MEM_CTRL_INTERFACE: process(MM_mem_t, MM_aVal, MM_addr(1 downto 0))
+  MM_MEM_CTRL_INTERFACE: process(MM_mem_t, MM_addr)
     variable i_d_addr   : reg2;
     variable i_byte_sel : reg4;
   begin
@@ -1517,7 +1511,7 @@ begin
 
     end case;
 
-    d_addr_pre <= i_d_addr;
+    d_addr     <= MM_addr(31 downto 2) & i_d_addr;
     b_sel      <= i_byte_sel;
 
   end process MM_MEM_CTRL_INTERFACE; ---------------------------------
@@ -1577,6 +1571,7 @@ begin
     variable f_m: reg2;
     variable i_data : reg32;
   begin
+    f_m := "XX";
     if ( (MM_wrmem = '0') and (MM_aVal = '0') ) then
       if ( (MM_a_rt = WB_a_c) and (WB_wreg = '0') and (WB_a_c /= b"00000")) then
         f_m    := "01";                 -- forward from WB
@@ -1596,19 +1591,19 @@ begin
 
   -- forwarding for LWL, LWR
   MM_FWD_LWLR: process (MM_aVal,MM_wreg,MM_a_rt,WB_a_c,WB_wreg,WB_C,MM_B)
-    variable f_m: std_logic;
+    variable f_m: boolean;
     variable i_data : reg32;
   begin
     if ( (MM_wreg = '0') and (MM_aVal = '0') and
          (MM_a_rt = WB_a_c) and (WB_wreg = '0') and
          (WB_a_c /= b"00000") ) then
-      f_m    := '1';                  -- forward from WB
+      f_m    := TRUE;                 -- forward from WB
       i_data := WB_C;
     else
-      f_m    := '0';                  -- not forwarding
+      f_m    := FALSE;                -- not forwarding
       i_data := MM_B;
     end if;
-    fwd_lwlr <= f_m;                  -- for debugging
+    fwd_lwlr  <= f_m;                 -- for debugging
     MM_B_data <= i_data;
   end process MM_FWD_LWLR;
 
@@ -1847,7 +1842,7 @@ begin
               tlb_exception,MM_tlb_exception,
               tlb_excp_type,MM_tlb_excp_type, 
               tlb_stage_mm,MM_tlb_stage_mm,
-              EX_nmi,MM_nmi, interrupt,MM_interrupt, int_req,MM_int_req,
+              int_req,MM_int_req,
               is_SC, MM_is_SC, is_MFC0, MM_is_MFC0,
               ext_event, MM_ext_event,
               ext_event_type, MM_ext_event_type,
@@ -1871,7 +1866,7 @@ begin
   -- STATUS -- pg 79 -- cop0_12 --------------------
   COP0_DECODE_EXCEPTION_AND_UPDATE_STATUS:
   process (MM_a_rt, is_exception, cop0_inp,
-           MM_cop0_reg, MM_cop0_sel, MM_nmi, MM_interrupt,MM_int_req,
+           MM_cop0_reg, MM_cop0_sel, MM_int_req,
            RF_is_delayslot, EX_is_delayslot, MM_is_delayslot, WB_is_delayslot,
            rom_stall,ram_stall, MM_is_mfc0,
            INDEX, RANDOM, EntryLo0, EntryLo1, CONTEXT, PAGEMASK, WIRED,
@@ -2260,8 +2255,7 @@ begin
 
 
   -- Select input to PC on an exception --------------------
-  COP0_SEL_EPC: process (is_exception, MM_nmi, MM_interrupt, STATUS, CAUSE,
-                         MM_trapped, rom_stall, ram_stall)
+  COP0_SEL_EPC: process (is_exception, STATUS, CAUSE, MM_trapped)
     variable i_excp_PCsel : reg3;
   begin
 
@@ -2275,7 +2269,7 @@ begin
            | exTLBdblFaultIF | exTLBdblFaultRD | exTLBdblFaultWR 
            | exTLBinvalIF | exTLBinvalRD | exTLBinvalWR | exTLBmod
            | exIBE | exDBE =>
-        i_excp_PCsel := PCsel_EXC_0180;   -- PC <= exception_180
+        i_excp_PCsel := PCsel_EXC_0180;    -- PC <= exception_180
 
        when exTRAP =>
          if MM_trapped then
@@ -2291,16 +2285,14 @@ begin
         i_excp_PCsel := PCsel_EXC_BFC0; -- PC <= 0xBFC0.0000
 
       when exInterr =>                  -- normal interrupt
-        if (rom_stall = '0' and ram_stall = '0') then
-          if CAUSE(CAUSE_IV) = '1' then
-            i_excp_PCsel := PCsel_EXC_0200; -- PC <= exception_0200
-          else
-            i_excp_PCsel := PCsel_EXC_0180; -- PC <= exception_0180
-          end if;
+        if CAUSE(CAUSE_IV) = '1' then
+          i_excp_PCsel := PCsel_EXC_0200; -- PC <= exception_0200
+        else
+          i_excp_PCsel := PCsel_EXC_0180; -- PC <= exception_0180
         end if;
 
-      when exNOP =>
-        i_excp_PCsel := PCsel_EXC_none; -- no exception, do nothing to PC
+      -- when exNOP =>
+      --   i_excp_PCsel := PCsel_EXC_none; -- no exception, do nothing to PC
 
       when others =>                    -- should never get here
         i_excp_PCsel := PCsel_EXC_none;
diff --git a/cMIPS/vhdl/exception.vhd b/cMIPS/vhdl/exception.vhd
index ca089e6f24d7218a7a8c5ef5feaf033224879b45..b6c60f5f0bfd44a9d2dc96198e786d0029bfafad 100644
--- a/cMIPS/vhdl/exception.vhd
+++ b/cMIPS/vhdl/exception.vhd
@@ -146,10 +146,6 @@ entity reg_excp_EX_MM is
        MM_tlb_excp_type: out exception_type;
        EX_tlb_stage_mm:  in  boolean;
        MM_tlb_stage_mm:  out boolean;
-       EX_nmi:           in  std_logic;
-       MM_nmi:           out std_logic;
-       EX_interrupt:     in  std_logic;
-       MM_interrupt:     out std_logic;
        EX_int_req:       in  reg8;
        MM_int_req:       out reg8;
        EX_is_SC:         in  boolean;
@@ -193,12 +189,10 @@ begin
         MM_tlb_exception <= EX_tlb_exception;
         MM_tlb_excp_type <= EX_tlb_excp_type;
         MM_tlb_stage_MM  <= EX_tlb_stage_MM;
-        MM_nmi           <= EX_nmi         ;
-        MM_interrupt     <= EX_interrupt   ;
         MM_int_req       <= EX_int_req     ;
         MM_is_SC         <= EX_is_SC       ;
         MM_is_MFC0       <= EX_is_MFC0     ;
-        MM_ext_event     <= EX_ext_event;
+        MM_ext_event     <= EX_ext_event   ;
         MM_ext_event_type <= EX_ext_event_type;
         MM_int_event_type <= EX_int_event_type;
       end if;
diff --git a/cMIPS/vhdl/fpu.vhd b/cMIPS/vhdl/fpu.vhd
index b612728754196be5570b69c66488a099bb3aab62..452e5eb60aea6779ebd7ff5463d5b2e4a3c4fbdc 100644
--- a/cMIPS/vhdl/fpu.vhd
+++ b/cMIPS/vhdl/fpu.vhd
@@ -846,6 +846,7 @@ begin
   U_Mult_float: mult32float
     port map (data_inp,clk,rst,'0',selA_mul,selB_mul,selC_mul,RES_MUL,pt0,wt0);
 
+  RES_SUM <= (others => 'X');
   -- U_Sum_float : sum32float
   -- port map (data_inp,clk,rst,wt,selA_sum,selB_sum,selC_sum,RES_SUM,pt1,wt1);
 
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index 2458190015c4ee163930f69074af127ad86a42c9..8edf7bcc3d775edf7bb1ff433ecf95cc6b611cb2 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -41,7 +41,7 @@ package p_MEMORY is
   -- begin DO NOT change these names as several scripts depend on them --
   --  you may change the values, not names neither formatting          --
   constant x_INST_BASE_ADDR : reg32   := x"00000000";
-  constant x_INST_MEM_SZ    : reg32   := x"00010000";
+  constant x_INST_MEM_SZ    : reg32   := x"00004000";
   constant x_DATA_BASE_ADDR : reg32   := x"00010000";
   constant x_DATA_MEM_SZ    : reg32   := x"00008000";
   constant x_IO_BASE_ADDR   : reg32   := x"0F000000";
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 2643bc6a00305b2f4f4acdcc62d30a036e7a1816..c86a12ccfde8d7b961f596a95b9c4c4115b8a737 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -476,9 +476,11 @@ begin  -- TB
   pll : mf_altpll port map (areset => a_reset, inclk0 => clock_50mhz,
    c0 => phi0in, c1 => phi1in, c2 => phi2in, c3 => phi3in, c4 => clkin);
 
-  -- clk and clk4x must be in opposite phases
-  pll_io : mf_altpll_io port map (areset => a_reset, inclk0 => clock_50mhz,
-   c0 => clk2x, c1 => clk4x0, c2 => clk4x180);
+  -- pll_io : mf_altpll_io port map (areset => a_reset, inclk0 => clock_50mhz,
+  --  c0 => clk2x, c1 => clk4x0, c2 => clk4x180);
+  clk2x    <= '0';
+  clk4x0   <= '0';
+  clk4x180 <= '0';
 
   mf_altclkctrl_inst_clk : mf_altclkctrl port map (
     inclk => clkin, outclk => clk);
@@ -563,8 +565,8 @@ begin  -- TB
                     lcd_d_out       when b"1101",
                     (others => 'X') when others;
   
-  U_D_CACHE: fake_d_cache   -- or d_cache
-  -- U_D_CACHE: d_cache   -- or fake_d_cache
+  U_D_CACHE: fake_d_cache  -- or d_cache
+  -- U_D_CACHE: d_cache  -- or fake_d_cache
     port map (rst, clk4x,
               data_aVal, data_wait, wr,
               d_addr, cpu_data, d_cache_d_out, cpu_xfer,
@@ -676,7 +678,7 @@ architecture behavioral of inst_addr_decode is
   signal in_range : boolean;
 begin
 
-  in_range <= (addr(HI_ADDR downto LO_ADDR) = PREFIX); 
+  in_range <= (addr(HI_ADDR downto LO_ADDR) = PREFIX);
 
   aVal <= '0' when ( cpu_i_aVal = '0' and rst = '1' and in_range ) else
           '1';
@@ -704,11 +706,17 @@ entity ram_addr_decode is               -- CPU side triggers access
         addr        : in  reg32;        -- CPU address
         aVal        : out std_logic;    -- data address (act=0)
         dev_select  : out reg4);        -- select input to CPU
+  constant LO_ADDR  : integer := log2_ceil(DATA_BASE_ADDR);
+  constant HI_ADDR  : integer := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
 end entity ram_addr_decode;
 
 architecture behavioral of ram_addr_decode is
-  constant LO_ADDR : natural := log2_ceil(DATA_BASE_ADDR);
-  constant HI_ADDR : natural := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
+--   constant LO_ADDR : natural := log2_ceil(DATA_BASE_ADDR);
+--   constant HI_ADDR : natural := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
   
   constant all_0  : std_logic_vector(31 downto 0)         := (others=>'0');
   
@@ -727,11 +735,15 @@ architecture behavioral of ram_addr_decode is
   
 begin
 
-  in_range <= ( rst = '1'
-                and ((addr and a_mask) = x_DATA_BASE_ADDR)
-                and ((addr and r_mask) = x_DATA_BASE_ADDR) );
+--   in_range <= ( rst = '1'
+--                 and ((addr and a_mask) = x_DATA_BASE_ADDR)
+--                 and ((addr and r_mask) = x_DATA_BASE_ADDR) );
 
-  aVal <= '0' when (cpu_d_aVal = '0' and in_range) else '1';
+  in_range <= ( addr(HI_SEL_BITS downto LO_SEL_BITS)
+                =
+                x_DATA_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS) );
+  
+  aVal <= '0' when (rst = '1' and cpu_d_aVal = '0' and in_range) else '1';
 
   dev_select <= b"0001" when (cpu_d_aVal = '0' and in_range) else b"0000";
 
@@ -877,10 +889,16 @@ architecture behavioral of io_addr_decode is
   constant LO_SEL_ADDR : integer := log2_ceil(IO_ADDR_RANGE);
   constant HI_SEL_ADDR : integer := LO_SEL_ADDR + (IO_MAX_NUM_DEVS - 1);
 
+  constant IO_RANGE : integer := IO_ADDR_RANGE * IO_MAX_NUM_DEVS;
+  constant LO_ADDR  : integer := log2_ceil(IO_BASE_ADDR);
+  constant HI_ADDR  : integer := log2_ceil(IO_BASE_ADDR + IO_RANGE - 1);
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
   constant all_0  : std_logic_vector(31 downto 0)         := (others=>'0');
 
   -- I/O constants
-  constant IO_RANGE : integer := IO_ADDR_RANGE * IO_MAX_NUM_DEVS;
   constant LO_DEV : natural := 0;
   constant HI_DEV : natural := log2_ceil(IO_RANGE-1);
 
@@ -892,11 +910,17 @@ architecture behavioral of io_addr_decode is
   signal dev  : integer;                    -- DEBUGGING only
 begin
 
+--   aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and not_waiting = '1' and
+--                      ((addr and x_mask) = x_IO_BASE_ADDR) ) else
+--           '1';
+
+  dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
+   
   aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and not_waiting = '1' and
-                     ((addr and x_mask) = x_IO_BASE_ADDR) ) else
+                     ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
+                     ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o)
+                   ) else
           '1';
-          
-  dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
 
   U_decode: process(clk, aVal, addr, dev)
     variable dev_sel    : reg4;
diff --git a/cMIPS/vhdl/uart.vhd b/cMIPS/vhdl/uart.vhd
index 37aedafbe764aeed07c0362780934d4a683a2b7d..bb3e3228eeca0c702edafb7483ea653759ec5019 100644
--- a/cMIPS/vhdl/uart.vhd
+++ b/cMIPS/vhdl/uart.vhd
@@ -69,19 +69,19 @@ architecture estrutural of uart_int is
   constant CLOCK_DIVIDER : integer := 50;
   
   component register8 is
-    port(rel, rst, ld: in  std_logic;
+    port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
   end component register8;
 
   component par_ser10 is
-    port(rel, rst, ld, desl: in  std_logic;
+    port(clk, rst, ld, desl: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic);
   end component par_ser10;
 
   component ser_par10 is
-    port(rel, rst, desl: in  std_logic;
+    port(clk, rst, desl: in  std_logic;
          D:            in  std_logic;
          Q:            out std_logic_vector);
   end component ser_par10;
@@ -659,7 +659,7 @@ library ieee; use ieee.std_logic_1164.all;
 use work.p_WIRES.all;
 
 entity register8 is
-  port(rel, rst, ld: in  std_logic;
+  port(clk, rst, ld: in  std_logic;
         D:           in  reg8;
         Q:           out reg8);
 end register8;
@@ -668,11 +668,11 @@ architecture functional of register8 is
   signal value: reg8;
 begin
 
-  process(rel, rst, ld)
+  process(clk, rst, ld)
   begin
     if rst = '0' then
       value <= x"00";
-    elsif ld = '1' and rising_edge(rel) then
+    elsif ld = '1' and rising_edge(clk) then
       value <= D;
     end if;
   end process;
@@ -691,7 +691,7 @@ library ieee; use ieee.std_logic_1164.all;
 use work.p_WIRES.all;
 
 entity par_ser10 is
-  port(rel, rst, ld, desl: in  std_logic;
+  port(clk, rst, ld, desl: in  std_logic;
        D:                  in  reg10;
        Q:                  out std_logic);
 end par_ser10;
@@ -699,15 +699,15 @@ end par_ser10;
 architecture functional of par_ser10 is
 begin
 
-  process(rel, rst, ld, desl, D)
+  process(clk, rst, ld, desl, D)
     variable value: reg10;
   begin
     if rst = '0' then
       value := b"1111111111";
       Q <= '1';
-    elsif ld = '1' and rising_edge(rel) then
+    elsif ld = '1' and rising_edge(clk) then
       value := D;
-    elsif desl = '1' and rising_edge(rel) then
+    elsif desl = '1' and rising_edge(clk) then
       Q <= value(0);
       value(8 downto 0) := value(9 downto 1);
       value(9) := '1';                  -- when idle, send stop-bits
@@ -726,7 +726,7 @@ library ieee; use ieee.std_logic_1164.all;
 use work.p_WIRES.all;
 
 entity ser_par10 is
-  port(rel, rst, desl: in  std_logic;
+  port(clk, rst, desl: in  std_logic;
        D:              in  std_logic;
        Q:              out reg10);
 end ser_par10;
@@ -734,12 +734,12 @@ end ser_par10;
 architecture functional of ser_par10 is
 begin
 
-  process(rel, rst, desl)
+  process(clk, rst, desl)
     variable value: reg10;
   begin
     if rst = '0' then
       value := b"0000000000";
-    elsif desl = '1' and rising_edge(rel) then
+    elsif desl = '1' and rising_edge(clk) then
       value(8 downto 0) := value(9 downto 1);
       value(9) := D;
     end if;
@@ -772,7 +772,7 @@ end remota;
 architecture behavior of remota is
 
   component counter8 is
-    port(rel, rst, ld, en: in  std_logic;
+    port(clk, rst, ld, en: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
   end component counter8;
diff --git a/cMIPS/vhdl/units.vhd b/cMIPS/vhdl/units.vhd
index dff8a7d38e1a13b8912b34a60277d6608bf50282..bd43137874ac5b7b20c9b2b05d9210f6cded6b93 100644
--- a/cMIPS/vhdl/units.vhd
+++ b/cMIPS/vhdl/units.vhd
@@ -667,9 +667,9 @@ begin
   U_OLD_CYCLE: FFD port map
     (clk => clk, rst => rst, set => '1', D => cycle, Q => this);
 
-  w_d <= this xor cycle;                -- active for ONE cycle only
+  -- w_d <= this xor cycle;                -- active for ONE cycle only
 
-  waiting <= not(w_d) and n_sel and will_wait;
+  waiting <= not(this xor cycle) and n_sel and will_wait;
 
 end;
 -- ++ wait_states +++++++++++++++++++++++++++++++++++++++++++++++++++++