diff --git a/cMIPS/altera/tb_cMIPS.vhd b/cMIPS/altera/tb_cMIPS.vhd
index 2082ee677892be77d9b75d36ffa3b3035f8cf2b4..67f0b7e22a046fc651587ce8ec17e6a267a07d96 100644
--- a/cMIPS/altera/tb_cMIPS.vhd
+++ b/cMIPS/altera/tb_cMIPS.vhd
@@ -185,7 +185,6 @@ architecture ppl_type of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           wr       : in  std_logic;
           data     : in  std_logic_vector;
           display0 : out std_logic_vector;
@@ -197,20 +196,16 @@ architecture ppl_type of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           data     : out reg32;
           kbd      : in  std_logic_vector (11 downto 0);
           sw       : in  std_logic_vector (3 downto 0));
   end component read_keys;
 
-
   component print_data is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
-          addr    : in  std_logic_vector;
           data    : in  std_logic_vector);
   end component print_data;
 
@@ -218,9 +213,7 @@ architecture ppl_type of tb_cMIPS is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
-          addr    : in  std_logic_vector;
           data    : in  std_logic_vector);
   end component to_stdout;
 
@@ -229,7 +222,6 @@ architecture ppl_type of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           wr       : in  std_logic;
           addr     : in  std_logic_vector;
           data     : in  std_logic_vector;
@@ -242,7 +234,6 @@ architecture ppl_type of tb_cMIPS is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
           addr    : in  std_logic_vector;
           data    : out std_logic_vector;
@@ -253,9 +244,7 @@ architecture ppl_type of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
-          addr    : in    std_logic_vector;
           data_inp : in   std_logic_vector;
           data_out : out  std_logic_vector;
           irq      : out  std_logic);
@@ -265,7 +254,6 @@ architecture ppl_type of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
           addr    : in    std_logic;
           data_inp : in   std_logic_vector;
@@ -278,6 +266,28 @@ architecture ppl_type of tb_cMIPS is
           bit_rt  : out   std_logic_vector);-- communication speed - TB only
   end component simple_uart;
 
+  component FPU is
+    port (rst      : in   std_logic;
+          clk      : in   std_logic;
+          sel      : in   std_logic;
+          rdy      : out  std_logic;
+          wr       : in   std_logic;
+          addr     : in   std_logic_vector;
+          data_inp : in   std_logic_vector;
+          data_out : out  std_logic_vector);
+  end component FPU;
+
+  component fake_FPU is
+    port (rst      : in   std_logic;
+          clk      : in   std_logic;
+          sel      : in   std_logic;
+          rdy      : out  std_logic;
+          wr       : in   std_logic;
+          addr     : in   std_logic_vector;
+          data_inp : in   std_logic_vector;
+          data_out : out  std_logic_vector);
+  end component fake_FPU;
+
   component remota is
     generic(OUTPUT_FILE_NAME : string; INPUT_FILE_NAME : string);
     port(rst, clk  : in  std_logic;
@@ -291,7 +301,6 @@ architecture ppl_type of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
           addr    : in    std_logic_vector;
           data    : out   std_logic_vector;
@@ -313,7 +322,7 @@ architecture ppl_type of tb_cMIPS is
 
   component io_addr_decode is
     port (rst         : in  std_logic;
-          clk         : in  std_logic;    -- no use, except in synch-ing asserts
+          clk         : in  std_logic;
           cpu_d_aVal  : in  std_logic;
           addr        : in  std_logic_vector;
           dev_select  : out std_logic_vector;
@@ -332,11 +341,19 @@ architecture ppl_type of tb_cMIPS is
           not_waiting : in  std_logic);
   end component io_addr_decode;
 
+  component busError_addr_decode is
+    port (rst         : in  std_logic;
+          cpu_d_aVal  : in  std_logic;
+          addr        : in  reg32;
+          d_busError  : out std_logic); -- decoded address not in range (act=0)
+  end component busError_addr_decode;
+  
   component inst_addr_decode is
     port (rst         : in  std_logic;
           cpu_i_aVal  : in  std_logic;
           addr        : in  std_logic_vector;
-          aVal        : out std_logic);
+          aVal        : out std_logic;
+          i_busError  : out std_logic);
   end component inst_addr_decode;
     
   component simul_ROM is 
@@ -489,7 +506,9 @@ architecture ppl_type of tb_cMIPS is
   component core is
     port (rst    : in    std_logic;
           clk    : in    std_logic;
+          phi1   : in    std_logic;
           phi2   : in    std_logic;
+          phi3   : in    std_logic;
           i_aVal : out   std_logic;
           i_wait : in    std_logic;
           i_addr : out   std_logic_vector;
@@ -502,7 +521,9 @@ architecture ppl_type of tb_cMIPS is
           wr     : out   std_logic;
           b_sel  : out   std_logic_vector;
           nmi    : in    std_logic;
-          irq    : in    std_logic_vector);
+          irq    : in    std_logic_vector;
+          i_busErr : in  std_logic;
+          d_busErr : in  std_logic);
   end component core;
 
   component mf_altpll port (
@@ -530,26 +551,26 @@ architecture ppl_type of tb_cMIPS is
 
   signal clk,clkin,clk_locked,clk_50mhz : std_logic;
   signal clk2x, clk4x,clk4x0,clk4x180 : std_logic;
-  signal phi0,phi2,phi3,phi0in,phi2in,phi3in : std_logic;
+  signal phi0,phi1,phi2,phi3,phi0in,phi1in,phi2in,phi3in : std_logic;
   signal cpu_i_aVal, cpu_i_wait, wr, cpu_d_aVal, cpu_d_wait : std_logic;
   signal rst, ic_reset, cpu_reset : std_logic;
   signal a_reset, a_rst0,a_rst1,a_rst2,a_rst3,a_rst4,a_rst5,a_rst6,a_rst7,a_rst8,a_rst9, a_rstA, a_rstB, a_rst :std_logic;
-  signal nmi : std_logic;
+  signal nmi, i_busError, d_busError : std_logic;
   signal irq : reg6;
   signal inst_aVal, inst_wait, rom_rdy : std_logic := '1';
-  signal data_aVal, data_wait, ram_rdy, mem_wr, mem_strobe : std_logic;
+  signal data_aVal, data_wait, ram_rdy, mem_wr : std_logic;
   signal cpu_xfer, mem_xfer, dev_select, dev_select_ram, dev_select_io : reg4;
-  signal io_print_sel,   io_print_wait   : std_logic := '1';
-  signal io_stdout_sel,  io_stdout_wait  : std_logic := '1';
-  signal io_stdin_sel,   io_stdin_wait   : std_logic := '1';
-  signal io_write_sel,   io_write_wait   : std_logic := '1';
-  signal io_read_sel,    io_read_wait    : std_logic := '1';
-  signal io_counter_sel, io_counter_wait : std_logic := '1';
+  signal io_print_sel   : std_logic := '1';
+  signal io_stdout_sel  : std_logic := '1';
+  signal io_stdin_sel   : std_logic := '1';
+  signal io_write_sel   : std_logic := '1';
+  signal io_read_sel    : std_logic := '1';
+  signal io_counter_sel : std_logic := '1';
+  signal io_uart_sel    : std_logic := '1';
+  signal io_sstats_sel  : std_logic := '1';
+  signal io_7seg_sel    : std_logic := '1';
+  signal io_keys_sel    : std_logic := '1';
   signal io_fpu_sel,     io_fpu_wait     : std_logic := '1';
-  signal io_uart_sel,    io_uart_wait    : std_logic := '1';
-  signal io_sstats_sel,  io_sstats_wait  : std_logic := '1';
-  signal io_7seg_sel,    io_7seg_wait    : std_logic := '1';
-  signal io_keys_sel,    io_keys_wait    : std_logic := '1';
   signal io_lcd_sel,     io_lcd_wait     : std_logic := '1';
   signal d_cache_d_out, stdin_d_out, read_d_out, counter_d_out : reg32;
   signal fpu_d_out, uart_d_out, sstats_d_out, keybd_d_out : reg32;
@@ -567,7 +588,7 @@ architecture ppl_type of tb_cMIPS is
 
   signal dump_ram : std_logic;
   
-  signal uart_irq, start_remota : std_logic;
+  signal uart_irq, start_remota, uart_inp, uart_out : std_logic;
   signal bit_rt : reg3;
 
 
@@ -576,7 +597,7 @@ begin
 -- {ALTERA_INSTANTIATION_END} DO NOT REMOVE THIS LINE!
   
   pll : mf_altpll port map (inclk0 => clock_50mhz, locked => clk_locked,
-    c0 => phi0in, c1 => mem_strobe, c2 => phi2in, c3 => phi3in, c4 => clkin);
+    c0 => phi0in, c1 => phi1in, c2 => phi2in, c3 => phi3in, c4 => clkin);
 
   -- pll_io : mf_altpll_io port map (areset => rst, inclk0 => clock_50mhz,
   --   c0 => clk2x, c1 => clk4x0, c2 => clk4x180);
@@ -590,13 +611,14 @@ begin
   mf_altclkctrl_inst_clk4x : mf_altclkctrl port map (
     inclk => clk4x180, outclk => clk4x);
 
+  mf_altclkctrl_inst_phi0 : mf_altclkctrl port map (
+    inclk => phi0in, outclk => phi0);
+  mf_altclkctrl_inst_phi1 : mf_altclkctrl port map (
+    inclk => phi1in, outclk => phi1);
   mf_altclkctrl_inst_phi2 : mf_altclkctrl port map (
     inclk => phi2in, outclk => phi2);
-
-  -- mf_altclkctrl_inst_phi0 : mf_altclkctrl port map (
-  --   inclk => phi0in, outclk => phi0);
-  -- mf_altclkctrl_inst_phi3 : mf_altclkctrl port map (
-  --   inclk => phi3in, outclk => phi3);
+  mf_altclkctrl_inst_phi3 : mf_altclkctrl port map (
+    inclk => phi3in, outclk => phi3);
 
   -- synchronize external asynchronous reset = key(9) at lower left
   a_reset <= not(key(9));
@@ -624,30 +646,24 @@ begin
 
   cpu_i_wait <= inst_wait;
   cpu_d_wait <= data_wait and io_wait;
-  io_wait    <= io_lcd_wait;
-                -- '1'; io_print_wait and io_stdout_wait and io_stdin_wait and
-                -- io_write_wait and io_read_wait and
-                -- io_counter_wait and -- io_uart_wait and
-                -- io_sstats_wait and --  io_fpu_wait
-                -- io_7seg_wait and  io_keys_wait;
-
-  not_waiting <= (inst_wait and data_wait); -- and io_wait);
+  io_wait    <= io_lcd_wait and io_fpu_wait;
 
+  not_waiting <= (inst_wait and data_wait);  -- for I/O references
   
   -- irq <= b"000000"; -- NO interrupt requests
-  irq <= b"0000" & uart_irq & counter_irq; -- uart+counter interrupts
-  -- irq <= b"00000" & counter_irq; -- counter interrupts
-  -- irq <= b"000000"; -- no interrupts
+  irq <= uart_irq & counter_irq & b"0000"; -- uart+counter interrupts
+  -- irq <= counter_irq & b"00000"; -- counter interrupts
   nmi <= '0'; -- input port to TB
 
 
-  U_CORE: core port map (cpu_reset, clk, phi2,
+  U_CORE: core port map (cpu_reset, clk, phi1,phi2,phi3,
                          cpu_i_aVal, cpu_i_wait, i_addr, cpu_instr,
                          cpu_d_aVal, cpu_d_wait, d_addr, cpu_data_inp, cpu_data,
-                         wr, cpu_xfer, nmi, irq);
+                         wr, cpu_xfer, nmi, irq, i_busError, d_busError);
 
+  
   U_INST_ADDR_DEC: inst_addr_decode
-    port map (rst, cpu_i_aVal, i_addr, inst_aVal);
+    port map (rst, cpu_i_aVal, i_addr, inst_aVal, i_busError);
   
   U_I_CACHE: fake_i_cache   -- or i_cache
   -- U_I_CACHE: i_cache  -- or fake_i_cache
@@ -658,38 +674,35 @@ begin
 
   -- U_ROM: simul_ROM generic map ("prog.bin")
   U_ROM: fpga_ROM generic map ("prog.bin")
-    port map (rst,clk, mem_i_sel,rom_rdy, phi2, mem_i_addr,datrom);
+    port map (rst,clk, mem_i_sel,rom_rdy, phi3, mem_i_addr,datrom);
 
+  U_DATA_BUS_ERROR_DEC: busError_addr_decode
+    port map (rst, cpu_d_aVal, d_addr, d_busError);
+  
   U_IO_ADDR_DEC: io_addr_decode
-    port map (rst,clk, cpu_d_aVal, d_addr, dev_select_io,
+    port map (phi0, rst, cpu_d_aVal, d_addr, dev_select_io,
               io_print_sel, io_stdout_sel, io_stdin_sel,io_read_sel, 
               io_write_sel, io_counter_sel, io_fpu_sel, io_uart_sel,
               io_sstats_sel, io_7seg_sel, io_keys_sel, io_lcd_sel,
               not_waiting);
 
   U_DATA_ADDR_DEC: ram_addr_decode
-    port map (rst, cpu_d_aVal, d_addr, data_aVal, dev_select_ram);
+    port map (rst, cpu_d_aVal, d_addr,data_aVal, dev_select_ram);
 
   dev_select <= dev_select_io or dev_select_ram;
   
   with dev_select select
-    cpu_data_inp <= (others => 'X') when b"0000",
-                    d_cache_d_out   when b"0001",
+    cpu_data_inp <= d_cache_d_out   when b"0001",
                  --    stdin_d_out     when b"0100",
                  --    read_d_out      when b"0101",
                     counter_d_out   when b"0111",
-                 --    fpu_d_out       when b"1000",
+                    fpu_d_out       when b"1000",
                     uart_d_out      when b"1001",
-                 --    sstats_d_out    when b"1010",
-                 --    ext_data_inp    when b"1100",
                  --    sstats_d_out    when b"1010",
                     keybd_d_out     when b"1100",
                     lcd_d_out       when b"1101",
                     (others => 'X') when others;
 
-  -- U_D_MMU: mem_d_addr <=        -- access Dcache with physical addresses
-  --   std_logic_vector(unsigned(d_addr) - unsigned(x_DATA_BASE_ADDR));
-
   U_D_CACHE: fake_d_cache  -- or d_cache
   -- U_D_CACHE: d_cache  -- or fake_d_cache
     port map (rst, clk4x,
@@ -706,60 +719,70 @@ begin
   dump_ram <= '0';
   
   -- U_read_inp: read_data_file generic map ("input.data")
-  --   port map (rst,clk, io_read_sel,  io_read_wait,  wr, d_addr, read_d_out,
-  --             cpu_xfer);
+  --   port map (rst,clk, io_read_sel, wr, d_addr, read_d_out, cpu_xfer);
 
   -- U_write_out: write_data_file generic map ("output.data")
-  --   port map (rst,clk, io_write_sel, io_write_wait, wr, d_addr, cpu_data,
-  --             cpu_xfer, dump_ram);
+  --   port map (rst,clk, io_write_sel, wr, d_addr,cpu_data,cpu_xfer,dump_ram);
 
   -- U_print_data: print_data
-  --   port map (rst,clk, io_print_sel, io_print_wait, wr, d_addr, cpu_data);
+  --   port map (rst,clk, io_print_sel, wr, cpu_data);
 
   -- U_to_stdout: to_stdout
-  --  port map (rst,clk, io_stdout_sel, io_stdout_wait, wr, d_addr, cpu_data);
-
-  U_simple_uart: simple_uart
-    port map (rst,clk, io_uart_sel, open, -- io_uart_wait,
-              wr, d_addr(2), cpu_data, uart_d_out,
-              uart_txd, uart_rxd, uart_rts, uart_cts, uart_irq, bit_rt);
-
-  -- start_remota <= '0', '1' after 100*CLOCK_PER;
-  
-  -- U_uart_remota: remota generic map ("serial.out","serial.inp")
-  --   port map (rst, clk, start_remota, txdat, rxdat, bit_rt);
-
-  -- U_FPU: FPU
-  --   port map (rst,clk, io_FPU_sel, io_FPU_wait,
-  --             wr, d_addr, cpu_data);
+  --  port map (rst,clk, io_stdout_sel, wr, cpu_data);
 
   U_interrupt_counter: do_interrupt     -- external counter+interrupt
-    port map (rst,clk, io_counter_sel, open, -- io_counter_wait,
-              wr, d_addr, cpu_data, counter_d_out, counter_irq);
+    port map (rst,clk, io_counter_sel, wr, cpu_data,
+              counter_d_out, counter_irq);
 
   U_to_7seg: to_7seg
-    port map (rst,clk,io_7seg_sel,io_7seg_wait,wr,cpu_data,disp0,disp1);
-
-  U_read_keys: read_keys generic map (1000)
-    port map (rst,clk, io_keys_sel,io_keys_wait,keybd_d_out,key,sw);
+    port map (rst,clk,io_7seg_sel, wr, cpu_data, disp0, disp1);
 
-  led_r <= sw(2); -- keybd_d_out(6);
-  led_g <= sw(1); -- keybd_d_out(5);
-  led_b <= sw(0); -- keybd_d_out(4);
+  U_read_keys: read_keys
+    generic map (1000)       -- debouncing interval, in clock cycles
+    port map (rst,clk, io_keys_sel, keybd_d_out, key, sw);
 
+  lcd_backlight <= sw(3);
+  led_g <= sw(2);
+  led_r <= sw(1);
+  led_b <= sw(0);
 
   U_LCD_display: LCD_display
     port map (rst, clk, io_lcd_sel, io_lcd_wait,
               wr, d_addr(2), cpu_data, lcd_d_out,
               lcd_d, lcd_rs, lcd_rw, lcd_en, open);
-  lcd_backlight <= sw(3);
+
+  U_simple_uart: simple_uart
+    port map (rst,clk, io_uart_sel, wr, d_addr(2), cpu_data, uart_d_out,
+              uart_txd, uart_rxd, uart_rts, uart_cts, uart_irq, bit_rt);
+  -- to test in loopback mode, uncoment next line & replace 2nd line for above
+  -- uart_inp <= uart_out;                 -- looping back;
+  --            uart_out, uart_inp, uart_rts, uart_cts, uart_irq, bit_rt);
+
+  -- uart_cts <= '1';
+
+  -- start_remota <= '0', '1' after 200*CLOCK_PER;
   
+  -- U_uart_remota: remota generic map ("serial.out","serial.inp")
+  --   port map (rst, clk, start_remota, txdat, rxdat, bit_rt);
+
+  -- U_FPU: fake_FPU
+  U_FPU: FPU
+    port map (rst,clk, io_FPU_sel,io_FPU_wait, wr, d_addr(5 downto 2),
+              cpu_data,fpu_d_out);
+
   -- U_sys_stats: sys_stats                -- CPU reads system counters
-  --   port map (cpu_reset,clk, io_sstats_sel, io_sstats_wait,
-  --            wr, d_addr, sstats_d_out,
+  --   port map (cpu_reset,clk, io_sstats_sel, wr, d_addr, sstats_d_out,
   --             cnt_d_ref,cnt_d_rd_hit,cnt_d_wr_hit,cnt_d_flush,
   --             cnt_i_ref,cnt_i_hit);
 
+  -- U_clock: process    -- simulate external clock
+  -- begin
+  --   clock_50mhz <= '1';
+  --   wait for CLOCK_PER / 2;
+  --   clock_50mhz <= '0';
+  --   wait for CLOCK_PER / 2;
+  -- end process;  -- -------------------------------------------------------
+  
 end architecture ppl_type;
 -- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -778,24 +801,30 @@ entity inst_addr_decode is              -- CPU side triggers access
   port (rst         : in  std_logic;
         cpu_i_aVal  : in  std_logic;    -- CPU instr addr valid (act=0)
         addr        : in  reg32;        -- CPU address
-        aVal        : out std_logic);   -- decoded address in range (act=0)
-  constant LO_ADDR  : integer := 0;
-  constant HI_ADDR  : integer := log2_ceil(INST_MEM_SZ);
+        aVal        : out std_logic;    -- decoded address in range (act=0)
+        i_busError  : out std_logic);   -- decoded address not in range (act=0)
 end entity inst_addr_decode;
 
 architecture behavioral of inst_addr_decode is
+  signal in_range : boolean;
 begin
 
-  aVal <= '0' when ( cpu_i_aVal = '0' and rst = '1'
-                     and (addr(HI_SEL_BITS downto LO_SEL_BITS)
-                          =
-                          x_INST_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)) )
-          else '1';
-      
+  in_range <= (addr(HI_SEL_BITS downto LO_SEL_BITS)
+               =
+               x_INST_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS));
+
+  aVal <= '0' when ( cpu_i_aVal = '0' and rst = '1' and in_range ) else
+          '1';
+
+  i_busError <= '0' when ( cpu_i_aVal = '0' and rst = '1'
+                           and not(in_range) ) else
+                '1';
+     
 end architecture behavioral;
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
+
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- RAM address decoding 
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -811,32 +840,73 @@ entity ram_addr_decode is              -- CPU side triggers access
         addr        : in  reg32;        -- CPU address
         aVal        : out std_logic;    -- data address (act=0)
         dev_select  : out reg4);        -- select input to CPU
-  constant LO_ADDR  : integer := 0;
-  constant HI_ADDR  : integer := log2_ceil(DATA_MEM_SZ);
+  constant LO_ADDR  : integer := log2_ceil(DATA_BASE_ADDR);
+  constant HI_ADDR  : integer := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
 end entity ram_addr_decode;
 
 architecture behavioral of ram_addr_decode is
+--   constant LO_ADDR : natural := log2_ceil(DATA_BASE_ADDR);
+--   constant HI_ADDR : natural := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
+  
+  constant all_0  : std_logic_vector(31 downto 0)         := (others=>'0');
+  
+  constant a_hi   : std_logic_vector(31 downto HI_ADDR+1) := (others=>'0');
+  constant a_lo   : std_logic_vector(LO_ADDR-1 downto 0)  := (others=>'0');
+  constant a_bits : std_logic_vector(HI_ADDR downto LO_ADDR) := (others=>'1');
+  constant a_mask : std_logic_vector := a_hi & a_bits & a_lo;
+
+  constant LO_RAM : natural := 0;
+  constant HI_RAM : natural := log2_ceil(DATA_MEM_SZ-1);
+  constant r_hi   : std_logic_vector(31 downto HI_RAM+1)   := (others=>'1');
+  constant r_lo   : std_logic_vector(HI_RAM downto LO_RAM) := (others=>'0');
+  constant r_mask : std_logic_vector := r_hi & r_lo;
+    
+  signal in_range : boolean;
+  
 begin
 
-  aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1'
-                     and (addr(HI_SEL_BITS downto LO_SEL_BITS)
-                          =
-                          x_DATA_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)) )
-          else '1';
-
-  dev_select <= b"0001"
-                when(cpu_d_aVal = '0' and rst = '1' and
-                     (addr(HI_SEL_BITS downto LO_SEL_BITS)
-                      =
-                      x_DATA_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)))
-          else b"0000";
+--   in_range <= ( rst = '1'
+--                 and ((addr and a_mask) = x_DATA_BASE_ADDR)
+--                 and ((addr and r_mask) = x_DATA_BASE_ADDR) );
+
+--   in_range <= ( ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
+--                 ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o) );
+
+  in_range <= ( addr(HI_SEL_BITS downto LO_SEL_BITS)
+                =
+                x_DATA_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS) );
+  
+  aVal <= '0' when (rst = '1' and cpu_d_aVal = '0' and in_range) else '1';
+
+  dev_select <= b"0001" when (cpu_d_aVal = '0' and in_range) else b"0000";
+
+--   assert true --  cpu_d_aVal = '1'
+--     report  "e "  & SLV32HEX(addr) & 
+--     " addr " & SLV2str(addr(15 downto 0)) & LF & 
+--     " LO_AD " & integer'image(LO_ADDR) &
+--     " HI_AD " & integer'image(HI_ADDR) &
+--     " a_hi "    & SLV2STR(a_hi) &
+--     " a_lo "    & SLV2STR(a_lo) &
+--     " a_bits "  & SLV2STR(a_bits) &
+--     " a_mask "  & SLV32HEX(a_mask) & LF &
+--     " LO_RAM " & integer'image(LO_RAM) &
+--     " HI_RAM " & integer'image(HI_RAM) &
+--     " r_hi "    & SLV2STR(r_hi) &
+--     " r_lo "    & SLV2STR(r_lo) &
+--     " r_mask "  & SLV32HEX(r_mask)
+--     severity NOTE;
       
 end architecture behavioral;
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
+
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--- I/O address decoding 
+-- busError address decoding 
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 library IEEE;
 use IEEE.std_logic_1164.all;
@@ -844,9 +914,120 @@ use IEEE.numeric_std.all;
 use work.p_wires.all;
 use work.p_memory.all;
 
-entity io_addr_decode is              -- CPU side triggers access
+entity busError_addr_decode is          -- CPU side triggers access
   port (rst         : in  std_logic;
-        clk         : in  std_logic;    -- no use, except synch-ing asserts
+        cpu_d_aVal  : in  std_logic;    -- CPU data addr valid (active=0)
+        addr        : in  reg32;        -- CPU address
+        d_busError  : out std_logic);   -- decoded address not in range (act=0)
+end entity busError_addr_decode;
+
+architecture behavioral of busError_addr_decode is
+
+  constant all_0  : std_logic_vector(31 downto 0) := (others=>'0');
+
+  -- I/O constants
+  constant IO_RANGE : integer := IO_ADDR_RANGE * IO_MAX_NUM_DEVS;
+  constant LO_DEV : natural := 0;
+  constant HI_DEV : natural := log2_ceil(IO_RANGE-1);
+  constant IO_LO_ADDR  : integer := log2_ceil(IO_BASE_ADDR);
+  constant IO_HI_ADDR  : integer := log2_ceil(IO_BASE_ADDR + IO_RANGE - 1);
+  constant iin_r:std_logic_vector(IO_HI_ADDR downto IO_LO_ADDR) := (others=>'1');
+  constant ing_r:std_logic_vector(IO_HI_ADDR downto IO_LO_ADDR) := (others=>'0');
+  constant ioth:std_logic_vector(HI_SEL_BITS downto IO_HI_ADDR+1):=(others=>'1');
+  constant ing_o:std_logic_vector(HI_SEL_BITS downto IO_HI_ADDR+1):=(others=>'0');
+  constant x_hi   : std_logic_vector(31 downto HI_DEV)  := (others=>'1');
+  constant x_lo   : std_logic_vector(HI_DEV-1 downto 0) := (others=>'0');
+  constant x_mask : std_logic_vector := x_hi & x_lo;  -- 1..10..0
+
+  -- RAM constants
+  constant LO_ADDR : natural := log2_ceil(DATA_BASE_ADDR);
+  constant HI_ADDR : natural := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
+    
+  constant a_hi   : std_logic_vector(31 downto HI_ADDR+1) := (others=>'0');
+  constant a_lo   : std_logic_vector(LO_ADDR-1 downto 0)  := (others=>'0');
+  constant a_bits : std_logic_vector(HI_ADDR downto LO_ADDR) := (others=>'1');
+  constant a_mask : std_logic_vector := a_hi & a_bits & a_lo;  -- 0..0110..0
+
+  constant LO_RAM : natural := 0;
+  constant HI_RAM : natural := log2_ceil(DATA_MEM_SZ-1);
+  constant r_hi   : std_logic_vector(31 downto HI_RAM+1)   := (others=>'1');
+  constant r_lo   : std_logic_vector(HI_RAM downto LO_RAM) := (others=>'0');
+  constant r_mask : std_logic_vector := r_hi & r_lo;  -- 1..10..0
+
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
+
+  signal in_range, io_in_range : boolean;
+
+begin
+
+  -- in_range <= ( rst = '1' and
+  --               ((addr and a_mask) = x_DATA_BASE_ADDR) and
+  --               ((addr and r_mask) = x_DATA_BASE_ADDR) );
+
+  -- io_in_range <= ( (rst = '1') and ((addr and x_mask) = x_IO_BASE_ADDR) );
+
+  in_range <= ( addr(HI_SEL_BITS downto LO_SEL_BITS)
+                =
+                x_DATA_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS) );
+
+  io_in_range <= (addr(HI_SEL_BITS downto LO_SEL_BITS)
+                  =
+                  x_IO_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS));
+
+  -- in_range <= ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
+  --             ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o);
+
+  -- io_in_range <= ((addr(IO_HI_ADDR downto IO_LO_ADDR) and iin_r)/=ing_r)and
+  --                ((addr(HI_SEL_BITS downto IO_HI_ADDR+1) and ioth) = ing_o);
+    
+  d_busError <= '0' when ( (rst = '1') and (cpu_d_aVal = '0') and
+                           (not(in_range) and not(io_in_range)) ) else '1';
+
+  assert true -- cpu_d_aVal = '1'
+    report  "e "  & SLV32HEX(addr) & 
+    " addr " & SLV2str(addr(15 downto 0)) & LF & 
+    " LO_AD " & integer'image(LO_ADDR) &
+    " HI_AD " & integer'image(HI_ADDR) &
+    " a_hi "    & SLV2STR(a_hi) &
+    " a_lo "    & SLV2STR(a_lo) &
+    " a_bits "  & SLV2STR(a_bits) &
+    " a_mask "  & SLV32HEX(a_mask) & LF &
+    " LO_RAM " & integer'image(LO_RAM) &
+    " HI_RAM " & integer'image(HI_RAM) &
+    " r_hi "    & SLV2STR(r_hi) &
+    " r_lo "    & SLV2STR(r_lo) &
+    " r_mask "  & SLV32HEX(r_mask)
+    severity NOTE;
+  
+  assert true -- cpu_d_aVal = '1' and io_busError
+    report  "e "  & SLV32HEX(addr) & 
+    " addr " & SLV2str(addr(15 downto 0)) & LF & 
+    " x_hi "    & SLV2STR(x_hi) &
+    " x_lo "    & SLV2STR(x_lo) &
+    " x_mask "  & SLV32HEX(x_mask) & LF &
+    " LO_DEV " & integer'image(LO_DEV) &
+    " HI_DEV " & integer'image(HI_DEV)
+    severity NOTE;
+  
+end architecture behavioral;
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- I/O address decoding 
+--++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+use work.p_wires.all;
+use work.p_memory.all;
+
+entity io_addr_decode is                -- CPU side triggers access
+  port (rst, clk    : in  std_logic;    -- clk sparates back-to-back refs
         cpu_d_aVal  : in  std_logic;    -- CPU data addr valid (active=0)
         addr        : in  reg32;        -- CPU address
         dev_select  : out reg4;         -- select input to CPU
@@ -863,21 +1044,52 @@ entity io_addr_decode is              -- CPU side triggers access
         keybd_sel   : out std_logic;    -- telephone keyboard (act=0)
         lcd_sel     : out std_logic;    -- telephone keyboard (act=0)
         not_waiting : in  std_logic);   -- no other device is waiting
-  constant LO_ADDR : integer := log2_ceil(IO_ADDR_RANGE);
-  constant HI_ADDR : integer := LO_ADDR + (IO_MAX_NUM_DEVS - 1);
 end entity io_addr_decode;
 
 architecture behavioral of io_addr_decode is
+  constant LO_SEL_ADDR : integer := log2_ceil(IO_ADDR_RANGE);
+  constant HI_SEL_ADDR : integer := LO_SEL_ADDR + log2_ceil(IO_ADDR_BITS);
+
+  constant IO_RANGE : integer := IO_ADDR_RANGE * IO_MAX_NUM_DEVS;
+  constant LO_ADDR  : integer := log2_ceil(IO_BASE_ADDR);
+  constant HI_ADDR  : integer := log2_ceil(IO_BASE_ADDR + IO_RANGE - 1);
+  constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
+  constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
+  constant all_0  : std_logic_vector(31 downto 0)         := (others=>'0');
+
+  -- I/O constants
+  constant LO_DEV : natural := 0;
+  constant HI_DEV : natural := log2_ceil(IO_RANGE-1);
+
+  constant x_hi   : std_logic_vector(31 downto HI_DEV)  := (others=>'1');
+  constant x_lo   : std_logic_vector(HI_DEV-1 downto 0) := (others=>'0');
+  constant x_mask : std_logic_vector := x_hi & x_lo;  -- 1..10..0
+
+  signal in_range : boolean;
   signal aVal : std_logic;
+  signal dev  : integer;                    -- DEBUGGING only
 begin
 
-  aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and not_waiting = '1'
-                     and (addr(HI_SEL_BITS downto LO_SEL_BITS)
-                          =
-                          x_IO_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)) )
-          else '1';
+  -- in_range <= ((addr and x_mask) = x_IO_BASE_ADDR);
+
+  -- in_range <= ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
+  --             ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o);
+
+  -- in_range <= (addr(HI_SEL_BITS downto LO_SEL_BITS)
+  --                =
+  --                x_IO_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS));
+
+  in_range <= ( ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
+                ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o) );
+
+  dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
   
-  U_decode: process(aVal, addr)
+  aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and not_waiting = '1' and
+                     in_range ) else '1';
+
+  U_decode: process(aVal, addr, dev)
     variable dev_sel    : reg4;
     constant is_noise   : integer := 0;
     constant is_print   : integer := 2;
@@ -907,7 +1119,7 @@ begin
     keybd_sel   <= '1';
     lcd_sel     <= '1';
 
-    case to_integer(signed(addr(HI_ADDR downto LO_ADDR))) is
+    case dev is -- to_integer(signed(addr(HI_ADDR downto LO_ADDR))) is
       when  0 => dev_sel     := std_logic_vector(to_signed(is_print, 4));
                  print_sel   <= aVal;
       when  1 => dev_sel     := std_logic_vector(to_signed(is_stdout, 4));
diff --git a/cMIPS/bin/elf2mif.sh b/cMIPS/bin/elf2mif.sh
index 8a86c330b258e11d35ce5188a372d406f18d6438..8538abaa7d42e025c27027416fde3195e6296eaa 100755
--- a/cMIPS/bin/elf2mif.sh
+++ b/cMIPS/bin/elf2mif.sh
@@ -2,6 +2,7 @@
 
 # set -x
 
+
 if [ ! -v tree ] ; then
   # you must set the location of the cMIPS root directory in the variable tree
   # tree=${HOME}/cMIPS
@@ -54,17 +55,18 @@ x_ROM_SIZE=$(sed -n '/x_INST_MEM_SZ/s/.*:= x"\(.*\)".*$/\1/p' $tree/vhdl/package
 ROM_SZ=$((16#$x_ROM_SIZE))
 
 mif=ROM.mif
+tmp=ROM.tmp
 
 mips-objdump -z -D -EL --section .text $elf |\
-    sed -e '1,6d' -e '/^$/d' -e '/^ /!d' -e 's:\t: :g' -e 's#^ *\([a-f0-9]*\): *\(........\)  *\(.*\)$#\2;#' |\
-    awk 'BEGIN{c='$ROM_BASE';} //{ printf "%d : %s\n",c,$1 ; c=c+1; }' > xxxx
+    sed -e '1,6d' -e '/^$/d' -e '/^ /!d' -e 's:\t: :g' \
+        -e 's#^ *\([a-f0-9]*\): *\(........\)  *\(.*\)$#\2;#' |\
+    awk 'BEGIN{c='$ROM_BASE';} //{ printf "%d : %s\n",c,$1 ; c=c+1; }' > $tmp
 
 echo -e "\n-- cMIPS code\n\nDEPTH=${ROM_SZ};\nWIDTH=32;\n" > $mif
 echo -e "ADDRESS_RADIX=DEC;\nDATA_RADIX=HEX;\nCONTENT BEGIN" >> $mif 
-cat xxxx >> $mif
+cat $tmp >> $mif
 echo "END;" >> $mif
 
-rm -f xxxx
 
 
 
@@ -79,17 +81,21 @@ RAM_SZ=$((16#$x_RAM_SIZE))
 
 
 mif=RAM.mif
+tmp=RAM.tmp
 
 mips-objdump -z -D -EL --section .data --section .rodata --section rodata1 --section .data1 --section .sdata --section .lit8 --section .lit4 --section .sbss --section .bss   $elf |\
-    sed -e '1,6d' -e '/^$/d' -e '/^ /!d' -e 's:\t: :g' -e 's#^ *\([a-f0-9]*\): *\(........\)  *\(.*\)$#\2;#' |\
-    awk 'BEGIN{c='$RAM_BASE';} //{ printf "%d : %s\n",c,$1 ; c=c+1; }' > xxxx
+    sed -e '1,6d' -e '/^$/d' -e '/^ /!d' -e 's:\t: :g' \
+        -e 's#^ *\([a-f0-9]*\): *\(........\)  *\(.*\)$#\2;#' |\
+    awk 'BEGIN{c='$RAM_BASE';} //{ printf "%d : %s\n",c,$1 ; c=c+1; }' > $tmp
 
 echo -e "\n-- cMIPS data\n\nDEPTH=${RAM_SZ};\nWIDTH=32;\n" > $mif
 echo -e "ADDRESS_RADIX=DEC;\nDATA_RADIX=HEX;\nCONTENT BEGIN" >> $mif 
-cat xxxx >> $mif
+cat $tmp >> $mif
 echo "END;" >> $mif
 
-rm -f xxxx
+
+# 
+rm -f {ROM,RAM}.tmp
 
 
 exit 0
diff --git a/cMIPS/include/syn_start.s b/cMIPS/include/syn_start.s
index 34e35cdefaf4b32f30897918323c99adb2553a3a..34297fd358f5a9df59974875a639f5dbe10d018c 100644
--- a/cMIPS/include/syn_start.s
+++ b/cMIPS/include/syn_start.s
@@ -2,6 +2,7 @@
 	##== synthesis version of startup code ===========================
 	##
 	##   simple startup code for synthesis
+	##
 
 	.include "cMIPS.s"
 	.text
@@ -20,21 +21,75 @@
         ## reset leaves processor in kernel mode, all else disabled
         ##
 _start:	nop
-	li   $k0, 0x10000000
-        mtc0 $k0, cop0_STATUS
 
-        li   $k0, MMU_WIRED
+        # get physical page number for 2 pages at the bottom of RAM, for .data
+        #  needed so systems without a page table will not break
+        #  read TLB[4] and write it to TLB[2]
+        li    $k0, 4
+        mtc0  $k0, cop0_Index
+        ehb
+        tlbr
+        li    $k1, 2
+        mtc0  $k1, cop0_Index
+        ehb
+        tlbwi
+
+
+        #  then set another mapping onto TLB[4], to avoid replicated entries
+        li    $a0, ( (x_DATA_BASE_ADDR + 8*4096) >>12 )
+        sll   $a2, $a0, 12      # tag for RAM[8,9] double-page
+        mtc0  $a2, cop0_EntryHi
+
+        li    $a0, ((x_DATA_BASE_ADDR + 8*4096) >>12 )
+        sll   $a1, $a0, 6       # RAM[8] (even)
+        ori   $a1, $a1, 0b00000000000000000000000000000111 # ccc=0, d,v,g1
+        mtc0  $a1, cop0_EntryLo0
+
+        li    $a0, ( (x_DATA_BASE_ADDR + 9*4096) >>12 )
+        sll   $a1, $a0, 6       # RAM[9] (odd)
+        ori   $a1, $a1, 0b00000000000000000000000000000111 # ccc=0, d,v,g1
+        mtc0  $a1, cop0_EntryLo1
+
+        # and write it to TLB[4]
+        li    $k0, 4
+        mtc0  $k0, cop0_Index
+        tlbwi 
+
+
+
+        # pin down first four TLB entries: ROM[0], RAM[0], stack and I/O
+        li   $k0, 4
         mtc0 $k0, cop0_Wired
 
+
+        # initialize SP at top of RAM: ramTop - 16
+        li   $sp, ((x_DATA_BASE_ADDR+x_DATA_MEM_SZ) - 16)
+
+        # set STATUS, cop0, hw interrupt IRQ7,IRQ6,IRQ5 enabled, user mode
+        li   $k0, 0x1000e011
+        mtc0 $k0, cop0_STATUS
+
+	
         j main
         nop
 
+	##
+	## signal normal program ending
+	##
 exit:	
-_exit:	j exit	  # wait forever
+_exit:	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0311		# display .1.1
+	sw   $k1, 0($k0)		# write to 7 segment display
+
+	j exit	  # wait forever
 	nop
 	.end _start
-	
 
+	
+        ##
+        ##================================================================
+        ## exception vector_0000 TLBrefill
+        ##
         .org x_EXCEPTION_0000,0
 _excp_0000:
 	la   $k0, HW_dsp7seg_addr  	# 7 segment display
@@ -42,15 +97,26 @@ _excp_0000:
 	sw   $k1, 0($k0)		# write to 7 segment display
 h0000:	j    h0000			# wait forever
 	nop
-	
-        .org x_EXCEPTION_0100,0
+
+
+        ##
+        ##================================================================
+        ## exception vector_0100 Cache Error (hw not implemented)
+        ##   print CAUSE and stop simulation
+        ##
+         .org x_EXCEPTION_0100,0
 _excp_0100:
 	la   $k0, HW_dsp7seg_addr  	# 7 segment display
 	li   $k1, 0x0388		# display .8.8
 	sw   $k1, 0($k0)		# write to 7 segment display
 h0100:	j    h0100			# wait forever
 	nop
-	
+
+
+        ##
+        ##================================================================
+        ## handler for all exceptions except interrupts and TLBrefill
+        ##
         .org x_EXCEPTION_0180,0
 _excp_0180:
 	la   $k0, HW_dsp7seg_addr  	# 7 segment display
@@ -58,7 +124,12 @@ _excp_0180:
 	sw   $k1, 0($k0)		# write to 7 segment display
 h0180:	j    h0180			# wait forever
 	nop
-	
+
+
+        ##
+        ##===============================================================
+        ## interrupt handlers at exception vector 0200
+        ##
         .org x_EXCEPTION_0200,0
 _excp_0200:
 	la   $k0, HW_dsp7seg_addr  	# 7 segment display
@@ -67,7 +138,12 @@ _excp_0200:
 h0200:	j    h0200			# wait forever
 	nop
 	
-        .org x_EXCEPTION_BFC0,0
+
+        ##
+        ##================================================================
+        ## exception vector_BFC0 NMI or soft-reset
+        ##
+	.org x_EXCEPTION_BFC0,0
 _excp_BFC0:
 	la   $k0, HW_dsp7seg_addr  	# 7 segment display
 	li   $k1, 0x0355		# display .5.5
diff --git a/cMIPS/tests/mac_kbd_lcd.c b/cMIPS/tests/mac_kbd_lcd.c
index 5b6de11ed83701b6ab5fb330f38f3b46fb2d82cb..670765dfaf312f5f5c2851621063c752c67f26f3 100644
--- a/cMIPS/tests/mac_kbd_lcd.c
+++ b/cMIPS/tests/mac_kbd_lcd.c
@@ -1,60 +1,12 @@
 #include "cMIPS.h"
 
-typedef struct control { // control register fields (uses only ls byte)
-  int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send
-    ign2    : 2,         // bits 6,5 ignored
-    intTX   : 1,         // interrupt on TX buffer empty (bit 4)
-    intRX   : 1,         // interrupt on RX buffer full (bit 3)
-    speed   : 3;         // 4,8,16..256 tx-rx clock data rates  (bits 0..2)
-} Tcontrol;
-
-typedef struct status { // status register fields (uses only ls byte)
-  int s;
-  // int ign   : 24,      // ignore uppermost bits
-  //  ign7    : 1,        // ignored (bit 7)
-  //  txEmpty : 1,        // TX register is empty (bit 6)
-  //  rxFull  : 1,        // octet available from RX register (bit 5)
-  //  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
-  //  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
-  //  ign2    : 1,        // ignored (bit 2)
-  //  framing : 1,        // framing error (bit 1)
-  //  overun  : 1;        // overun error (bit 0)
-} Tstatus;
-
-#define RXfull  0x00000020
-#define TXempty 0x00000040
-
-
-typedef union ctlStat { // control + status on same address
-  Tcontrol  ctl;        // write-only
-  Tstatus   stat;       // read-only
-} TctlStat;
-
-typedef union data {    // data registers on same address
-  int tx;               // write-only
-  int rx;               // read-only
-} Tdata;
-
-typedef struct serial {
-  TctlStat cs;
-  Tdata    d;
-} Tserial;
-
-
-#if 0
-char s[32]; // = "the quick brown fox jumps over the lazy dog";
-#else
-// char s[32]; // = "               ";
-#endif
-
-int main(void) { // receive a string through the UART serial interface
-                 // and write it to the LCD display
+//
+// read a key from keypad and write it to the LCD display
+//
+
+int main(void) {
   int i;
   volatile int state;
-  volatile Tserial *uart;  // tell GCC not to optimize away code
-  volatile Tstatus status;
-  Tcontrol ctrl;
   int c, k, s;
 
   LCDinit();
diff --git a/cMIPS/vhdl/aux.vhd b/cMIPS/vhdl/aux.vhd
index b1bcd499adf77fc0e466011a023fdd6b811c5724..a091c2689929e7e71aaf4aa192d2d7647284889c 100644
--- a/cMIPS/vhdl/aux.vhd
+++ b/cMIPS/vhdl/aux.vhd
@@ -33,7 +33,7 @@ end register32;
 
 architecture functional of register32 is
 begin
-  process(clk, rst, ld)
+  process(clk, rst)
     variable state: reg32;
   begin
     if rst = '0' then
@@ -67,7 +67,7 @@ end registerN;
 
 architecture functional of registerN is
 begin
-  process(clk, rst, ld)
+  process(clk, rst)
     variable state: std_logic_vector(NUM_BITS-1 downto 0);
   begin
     if rst = '0' then
@@ -138,27 +138,26 @@ entity countNup is
 end countNup;
 
 architecture functional of countNup is
+  signal count: std_logic_vector(NUM_BITS downto 0);
 begin
 
-  process(clk, rst, ld)
+  process(clk, rst)
     constant ZERO : std_logic_vector(NUM_BITS downto 0) := (others => '0');
-    variable count: std_logic_vector(NUM_BITS downto 0);
   begin
     if rst = '0' then
-      count := ZERO;
-    else
-      if rising_edge(clk) then
-        if ld = '1' then
-          count := '0' & D;
-        elsif en = '1' then
-          count := std_logic_vector(unsigned(count) + 1);
-        end if;
+      count <= ZERO;
+    elsif rising_edge(clk) then
+      if ld = '1' then
+        count <= '0' & D;
+      elsif en = '1' then
+        count <= std_logic_vector(unsigned(count) + 1);
       end if;
     end if;
-    Q  <= count((NUM_BITS - 1) downto 0);
-    co <= count(NUM_BITS);
   end process;
 
+  Q  <= count((NUM_BITS - 1) downto 0);
+  co <= count(NUM_BITS);
+  
 end functional;
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -166,7 +165,6 @@ end functional;
 
 
 
-
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- ring-counter, generates four-phase internal clock, on falling-edge
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -234,6 +232,35 @@ end functional;
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- D-type flip-flop with reset
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE; use IEEE.std_logic_1164.all;
+
+entity FFDsimple is
+  port(clk, rst : in std_logic;
+        D : in  std_logic;
+        Q : out std_logic);
+end FFDsimple;
+
+architecture functional of FFDsimple is
+begin
+
+  process(clk, rst)
+    variable state: std_logic;
+  begin
+    if rst = '0' then
+      state := '0';
+    elsif rising_edge(clk) then
+      state := D;
+    end if;
+    Q <= state;
+  end process;
+
+end functional;
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- T-type flip-flop with reset (active 0)
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index b966cbc7ab2a08b7fef9737f6a06507f61d8103f..ffbc964a004e15c88e5ad20c03d09f6caad98eea 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -852,7 +852,8 @@ begin
 
       -- simulation aborted by instruction "wait N"
       assert not(exception = exWAIT and syscall_n /= x"80000")
-        report LF & "ABORTED at EPC="& SLV32HEX(EPC) &
+        report LF & " PC="& SLV32HEX(PC) &
+        " EPC="& SLV32HEX(EPC) &
         " bad="& SLV32HEX(BadVAddr) &
         " opc="& SLV2STR(opcode) & " wait=" & SLV2STR(syscall_n(7 downto 0)) &
         " instr=" & SLV32HEX(RF_instruction) &
diff --git a/cMIPS/vhdl/io.vhd b/cMIPS/vhdl/io.vhd
index 728fd1df909b53cf6d33ca82d87d430e3c06ed73..e285c5a022a29c6a558411c97e16c87eaa259142 100644
--- a/cMIPS/vhdl/io.vhd
+++ b/cMIPS/vhdl/io.vhd
@@ -66,9 +66,10 @@ end behavioral;
 -- ++ from_stdin +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
+
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--- peripheral: to_stdout
---             print a signle character to stdout
+-- peripheral: print_data
+--             print an integer to stdout, 32bit hexadecimal
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 library IEEE;
 use IEEE.std_logic_1164.all;
@@ -76,40 +77,36 @@ use IEEE.numeric_std.all;
 use std.textio.all;
 use work.p_wires.all;
 
-entity to_stdout is
+entity print_data is
   port (rst     : in  std_logic;
         clk     : in  std_logic;
         sel     : in  std_logic;
         wr      : in  std_logic;
-        data    : in  std_logic_vector);
-end to_stdout;
+        data    : in  reg32);
+end print_data;
+
+architecture behavioral of print_data is
 
-architecture behavioral of to_stdout is
-  
   file output : text open write_mode is "STD_OUTPUT";
 
 begin
 
-  U_WRITE_OUT: process(clk,sel)
+  U_WRITE_OUT: process(sel,clk)
     variable msg : line;
   begin
     if falling_edge(clk) and sel = '0' then
-      if (data(7 downto 0) = x"00") or (data(7 downto 0) = x"0a") then
-        writeline( output, msg );
-      else
-        write(msg, character'val(to_integer( unsigned(data(7 downto 0)))));
-      end if;
+      write ( msg, string'(SLV32HEX(data)) );
+      writeline( output, msg );
     end if;
   end process U_WRITE_OUT;
-  
-end behavioral;
--- ++ to_stdout +++++++++++++++++++++++++++++++++++++++++++++++++++++++++*
 
+end behavioral;
+-- ++ print_data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--- peripheral: print_data
---             print an integer to stdout, 32bit hexadecimal
+-- peripheral: to_stdout
+--             print a signle character to stdout
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 library IEEE;
 use IEEE.std_logic_1164.all;
@@ -117,31 +114,34 @@ use IEEE.numeric_std.all;
 use std.textio.all;
 use work.p_wires.all;
 
-entity print_data is
+entity to_stdout is
   port (rst     : in  std_logic;
         clk     : in  std_logic;
         sel     : in  std_logic;
         wr      : in  std_logic;
-        data    : in  reg32);
-end print_data;
-
-architecture behavioral of print_data is
+        data    : in  std_logic_vector);
+end to_stdout;
 
+architecture behavioral of to_stdout is
+  
   file output : text open write_mode is "STD_OUTPUT";
 
 begin
 
-  U_WRITE_OUT: process(sel,clk)
+  U_WRITE_OUT: process(clk,sel)
     variable msg : line;
   begin
     if falling_edge(clk) and sel = '0' then
-      write ( msg, string'(SLV32HEX(data)) );
-      writeline( output, msg );
+      if (data(7 downto 0) = x"00") or (data(7 downto 0) = x"0a") then
+        writeline( output, msg );
+      else
+        write(msg, character'val(to_integer( unsigned(data(7 downto 0)))));
+      end if;
     end if;
   end process U_WRITE_OUT;
-
+  
 end behavioral;
--- ++ print_data +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- ++ to_stdout +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
 
 
@@ -206,9 +206,9 @@ end behavioral;                         -- write_file_data
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- peripheral: read_data_from_file
 --             read one 32bit integer from file "input.data"
---  if not EOF then read data from file
---  else status = 1
---  on a read, return last status (EOF=1, otherwise=0)
+--  if not EOF then write data to file
+--  else status <= 1
+--  on a read, return last status (EOF=1 or otherwise=0)
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 library IEEE;
 use IEEE.std_logic_1164.all;
@@ -286,6 +286,7 @@ end behavioral;
 --++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 library IEEE;
 use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
 use work.p_wires.all;
 
 entity do_interrupt is
@@ -318,11 +319,11 @@ architecture behavioral of do_interrupt is
          co:           out std_logic);
   end component countNup;
 
-  component FFD is
-    port(clk, rst, set : in std_logic;
+  component FFDsimple is
+    port(clk, rst : in std_logic;
          D : in  std_logic;
          Q : out std_logic);
-  end component FFD;
+  end component FFDsimple;
 
   signal Dlimit, Qlimit, Q: c_width;
   signal ld_cnt, ld_reg, en, cnt_en, int_en, equals : std_logic;
@@ -342,11 +343,11 @@ begin
   U_COUNTER: countNup generic map (NUM_BITS)
     port map (clk, rst, ld_cnt, en, START_COUNT, Q, open);
 
-  i_ena <= data_inp(31) when (sel='0' and wr='0') else int_en;
-  U_INTERR_EN: FFD port map (clk, rst, '1', i_ena, int_en);
-
   c_ena <= data_inp(30) when (sel='0' and wr='0') else cnt_en;
-  U_COUNT_EN:  FFD port map (clk, rst, '1', c_ena, cnt_en);
+  U_COUNT_EN:  FFDsimple port map (clk, rst, c_ena, cnt_en);
+
+  i_ena <= data_inp(31) when (sel='0' and wr='0') else int_en;
+  U_INTERR_EN: FFDsimple port map (clk, rst, i_ena, int_en);
 
   equals <= '1' when (Q = Qlimit(NUM_BITS-1 downto 0)) else '0';
   
@@ -763,6 +764,10 @@ architecture behavioral of LCD_display is
     port(clk, rst, set, D : in std_logic; Q : out std_logic);
   end component FFD;
 
+  component FFDsimple is
+    port(clk, rst, D : in std_logic; Q : out std_logic);
+  end component FFDsimple;
+
   type lcd_state is (st_init, st_idle, st_n, st_n1, st_n2, st_n3,
                      st_n4, st_n5, st_n6, st_n7, st_n8, st_n9, st_na, st_nb);
   attribute SYN_ENCODING of lcd_state : type is "safe";
@@ -780,12 +785,12 @@ begin
   U_WAIT_ON_READS: component wait_states generic map (1)
     port map (rst, clk, sel, wait1);
 
-  U_WAIT2: FFD port map (clk, rst, '1', wait1, wait2);
+  U_WAIT2: FFDsimple port map (clk, rst, wait1, wait2);
 
   rdy <= not(wait1 or wait2 or waiting);  -- wait for 260ns
 
   sel_rs <= addr when sel = '0' else RS;
-  U_INPUT_RS: FFD port map (clk, rst, '1', sel_rs, RS);
+  U_INPUT_RS: FFDsimple port map (clk, rst, sel_rs, RS);
 
   U_INPUT: registerN generic map (NUM_BITS, START_VALUE)
   port map (clk, rst, sel, data_inp(NUM_BITS-1 downto 0), inp_data);
@@ -795,12 +800,12 @@ begin
   data_out(31 downto NUM_BITS) <= (others => 'X');
 
   -- TESTING ONLY
-  out_data <= b"00000000" when RW = '1' else (others => 'X');
-  -- out_data <= LCD_DATA when RW = '1' else (others => 'Z');
+  -- out_data <= b"00000000" when RW = '1' else (others => 'X');
+  out_data <= LCD_DATA when RW = '1' else (others => 'Z');
   
   LCD_DATA <= inp_data when RW = '0' else (others => 'Z');
 
-  LCD_RS  <= RS;          -- LCD register select 0=ctrl, 1=data
+  LCD_RS   <= RS;         -- LCD register select 0=ctrl, 1=data
 
   sel_rw <= wr when sel = '0' else RW;
   U_INPUT_RW: FFD port map (clk, '1', rst, sel_rw, RW);
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index 8edf7bcc3d775edf7bb1ff433ecf95cc6b611cb2..f5e0a9fb7ffc2d0ca848f272170908c2f6a15c3b 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -41,9 +41,9 @@ package p_MEMORY is
   -- begin DO NOT change these names as several scripts depend on them --
   --  you may change the values, not names neither formatting          --
   constant x_INST_BASE_ADDR : reg32   := x"00000000";
-  constant x_INST_MEM_SZ    : reg32   := x"00004000";
+  constant x_INST_MEM_SZ    : reg32   := x"00002000";
   constant x_DATA_BASE_ADDR : reg32   := x"00010000";
-  constant x_DATA_MEM_SZ    : reg32   := x"00008000";
+  constant x_DATA_MEM_SZ    : reg32   := x"00002000";
   constant x_IO_BASE_ADDR   : reg32   := x"0F000000";
   constant x_IO_MEM_SZ      : reg32   := x"00002000";
   constant x_IO_ADDR_RANGE  : reg32   := x"00000020";
@@ -64,11 +64,10 @@ package p_MEMORY is
   constant IO_MEM_SZ       : integer := to_integer(signed(x_IO_MEM_SZ));
   constant IO_ADDR_RANGE   : integer := to_integer(signed(x_IO_ADDR_RANGE));
 
-  constant IO_ADDR_MASK    : integer := (0 - IO_ADDR_RANGE);
-  constant x_IO_ADDR_MASK  : reg32   := std_logic_vector(to_signed(0 - IO_ADDR_RANGE, 32));
-
   -- maximum number of IO devices, must be a power of two.
   constant IO_MAX_NUM_DEVS : integer := 16;
+
+  constant IO_ADDR_BITS : integer := log2_ceil(IO_MAX_NUM_DEVS * IO_ADDR_RANGE);
   
   -- I/O addresses are IO_ADDR_RANGE apart 
   constant IO_PRINT_ADDR   : integer := IO_BASE_ADDR;
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 1fca33fb53c93ea8db48b766858484596ad5bc8e..982bacb25a4a871e5da62e99f2ff13090fb3649f 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -698,11 +698,10 @@ begin
 
   in_range <= (addr(HI_ADDR downto LO_ADDR) = PREFIX);
 
-  aVal <= '0' when ( cpu_i_aVal = '0' and rst = '1' and in_range ) else
+  aVal <= '0' when ( cpu_i_aVal = '0' and in_range ) else
           '1';
 
-  i_busError <= '0' when ( cpu_i_aVal = '0' and rst = '1'
-                           and not(in_range) ) else
+  i_busError <= '0' when ( cpu_i_aVal = '0' and not(in_range) ) else
                 '1';
   
 end architecture behavioral;
@@ -782,7 +781,7 @@ begin
                  RAM_ADDR_TOP)
               );
   
-  aVal <= '0' when (rst = '1' and cpu_d_aVal = '0' and in_range) else '1';
+  aVal <= '0' when (cpu_d_aVal = '0' and in_range) else '1';
 
   dev_select <= b"0001" when (cpu_d_aVal = '0' and in_range) else b"0000";
 
@@ -858,10 +857,10 @@ begin
                 ((addr and a_mask) = x_DATA_BASE_ADDR) and
                 ((addr and r_mask) = x_DATA_BASE_ADDR) );
 
-  io_in_range <= ( (rst = '1') and ((addr and x_mask) = x_IO_BASE_ADDR) );
+  io_in_range <= ( (addr and x_mask) = x_IO_BASE_ADDR );
 
   
-  d_busError <= '0' when ( (rst = '1') and (cpu_d_aVal = '0') and
+  d_busError <= '0' when ( (cpu_d_aVal = '0') and
                            (not(in_range) and not(io_in_range)) ) else '1';
 
   
@@ -926,7 +925,7 @@ end entity io_addr_decode;
 
 architecture behavioral of io_addr_decode is
   constant LO_SEL_ADDR : integer := log2_ceil(IO_ADDR_RANGE);
-  constant HI_SEL_ADDR : integer := LO_SEL_ADDR + (IO_MAX_NUM_DEVS - 1);
+  constant HI_SEL_ADDR : integer := LO_SEL_ADDR + log2_ceil(IO_ADDR_BITS);
 
   constant IO_RANGE : integer := IO_ADDR_RANGE * IO_MAX_NUM_DEVS;
   constant LO_ADDR  : integer := log2_ceil(IO_BASE_ADDR);
@@ -955,9 +954,9 @@ begin
   in_range <= ((addr(HI_ADDR downto LO_ADDR) and in_r) /= ng_r) and
               ((addr(HI_SEL_BITS downto HI_ADDR+1) and oth) = ng_o);
 
-  dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
+  dev <= to_integer(signed(addr(IO_ADDR_BITS downto LO_SEL_ADDR)));
   
-  aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and not_waiting = '1' and
+  aVal <= '0' when ( cpu_d_aVal = '0' and not_waiting = '1' and
                      in_range ) else '1';
   
   U_decode: process(clk, aVal, addr, dev)
diff --git a/cMIPS/vhdl/uart.vhd b/cMIPS/vhdl/uart.vhd
index bb3e3228eeca0c702edafb7483ea653759ec5019..afed4fccd7ad62a8b72134eebe92b42c74f694a9 100644
--- a/cMIPS/vhdl/uart.vhd
+++ b/cMIPS/vhdl/uart.vhd
@@ -86,9 +86,9 @@ architecture estrutural of uart_int is
          Q:            out std_logic_vector);
   end component ser_par10;
 
-  component FFD is
-    port(clk, rst, set, D : in std_logic; Q : out std_logic);
-  end component FFD;
+  component FFDsimple is
+    port(clk, rst, D : in std_logic; Q : out std_logic);
+  end component FFDsimple;
 
   -- state machine for transmission-CPU interface
   type txcpu_state is (st_idle, st_check, st_done);
@@ -147,7 +147,7 @@ begin
 
   interr <= interr_TX_empty or interr_RX_full;
 
-  U_delay: FFD port map (clk, rst, '1', s_stat, sel_delayed);
+  U_delay: FFDsimple port map (clk, rst, s_stat, sel_delayed);
   
   -- TRANSMISSION ===========================================================
   U_txreg: register8 port map (clk,rst, s_tx, d_inp(7 downto 0), txreg);
@@ -160,7 +160,7 @@ begin
 
   tx_int_set <= ctrl(4) and tx_ld;
   d_int_tx_empty <= (interr_TX_empty or tx_int_set) and not(sel_delayed);
-  U_tx_int: FFD port map (clk, rst, '1', d_int_tx_empty, interr_TX_empty);
+  U_tx_int: FFDsimple port map (clk, rst, d_int_tx_empty, interr_TX_empty);
 
   
   -- this state machine contols the CPU-transmission interface -------------
@@ -340,9 +340,9 @@ begin
   U_receive: ser_par10 port map (clk, rst, rx_next, rxdat, sta_recv_sto);
   rxreg <= sta_recv_sto(8 downto 1);
 
-  U_edgeDetect0: FFD port map (clk, rst, '1', rxdat, rxdat_new);
-  U_edgeDetect1: FFD port map (clk, rst, '1', rxdat_new, rxdat_int);
-  U_edgeDetect2: FFD port map (clk, rst, '1', rxdat_int, rxdat_old);
+  U_edgeDetect0: FFDsimple port map (clk, rst, rxdat, rxdat_new);
+  U_edgeDetect1: FFDsimple port map (clk, rst, rxdat_new, rxdat_int);
+  U_edgeDetect2: FFDsimple port map (clk, rst, rxdat_int, rxdat_old);
   rxdat_1to0 <= rxdat_old and not(rxdat_new);
   
   -- framing error: 10th bit not a STOP=1 or 1st bit not a START=0
@@ -351,14 +351,14 @@ begin
                else '0';
 
   d_err_framing <= (a_framing or err_framing) and not(sel_delayed);
-  U_framing: FFD port map (clk, rst, '1', d_err_framing, err_framing);
+  U_framing: FFDsimple port map (clk, rst, d_err_framing, err_framing);
 
   d_err_overrun <= (a_overrun or err_overrun) and not(sel_delayed);
-  U_overrun: FFD port map (clk, rst, '1', d_err_overrun, err_overrun);
+  U_overrun: FFDsimple port map (clk, rst, d_err_overrun, err_overrun);
   
   rx_int_set   <= ctrl(3) and rx_done;
   d_rx_int_set <= (rx_int_set or interr_RX_full) and not(sel_delayed);
-  U_rx_int: FFD port map (clk, rst, '1', d_rx_int_set, interr_RX_full);
+  U_rx_int: FFDsimple port map (clk, rst, d_rx_int_set, interr_RX_full);
 
   
   -- SM controls reception-CPU interface -------------------------------
@@ -665,19 +665,21 @@ entity register8 is
 end register8;
 
 architecture functional of register8 is
-  signal value: reg8;
 begin
 
-  process(clk, rst, ld)
+  process(clk, rst)
+    variable value: reg8;
   begin
     if rst = '0' then
-      value <= x"00";
-    elsif ld = '1' and rising_edge(clk) then
-      value <= D;
+      value := x"00";
+    elsif rising_edge(clk) then
+      if ld = '1' then
+        value := D;
+      end if;
     end if;
+    Q <= value;
   end process;
 
-  Q <= value;
 end functional;
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++