From 6e5d3b1266018c463eb540e30776a83f0bd6983c Mon Sep 17 00:00:00 2001
From: Roberto Hexsel <roberto@inf.ufpr.br>
Date: Fri, 15 May 2015 16:15:10 -0300
Subject: [PATCH] TLB operating

---
 cMIPS/include/cMIPS.h            |   2 +-
 cMIPS/include/cMIPS.ld           |   2 +-
 cMIPS/include/cMIPS.s            |   8 +-
 cMIPS/include/cMIPSio.c          |  70 ++++---
 cMIPS/include/handlers.s         |  23 +--
 cMIPS/include/start.s            | 245 +++++++++++++++--------
 cMIPS/tests/badVAddrMM.expected  |  10 +-
 cMIPS/tests/doTests.sh           |   2 +-
 cMIPS/tests/lwFWDsw2.expected    |  42 ++--
 cMIPS/tests/merge.c              |   9 +-
 cMIPS/tests/mmu_context.expected |   7 +
 cMIPS/tests/mmu_context.s        | 253 ++++++++++++++++++++++++
 cMIPS/tests/mmu_refill.expected  |   4 +
 cMIPS/tests/mmu_refill.s         | 232 ++++++++++++++++++++++
 cMIPS/tests/mmu_tlbp.expected    |  20 +-
 cMIPS/tests/mmu_tlbp.s           |  95 +++++----
 cMIPS/tests/mmu_tlbwi.expected   |  39 ++--
 cMIPS/tests/mmu_tlbwi.s          | 138 ++++++-------
 cMIPS/tests/mmu_tlbwr.expected   |  12 +-
 cMIPS/tests/mmu_tlbwr.s          | 137 +++++++------
 cMIPS/vhdl/core.vhd              | 325 ++++++++++++++++++++-----------
 cMIPS/vhdl/exception.vhd         |   3 -
 cMIPS/vhdl/packageExcp.vhd       |  19 +-
 cMIPS/vhdl/packageMemory.vhd     |  29 +--
 cMIPS/vhdl/pipestages.vhd        |   3 +
 cMIPS/vhdl/tb_cMIPS.vhd          |   4 +-
 26 files changed, 1209 insertions(+), 524 deletions(-)
 create mode 100644 cMIPS/tests/mmu_context.expected
 create mode 100644 cMIPS/tests/mmu_context.s
 create mode 100644 cMIPS/tests/mmu_refill.expected
 create mode 100644 cMIPS/tests/mmu_refill.s

diff --git a/cMIPS/include/cMIPS.h b/cMIPS/include/cMIPS.h
index b486e82..a48386f 100644
--- a/cMIPS/include/cMIPS.h
+++ b/cMIPS/include/cMIPS.h
@@ -1,6 +1,6 @@
 
 #define x_INST_BASE_ADDR 0x00000000
-#define x_DATA_BASE_ADDR 0x00400000
+#define x_DATA_BASE_ADDR 0x04000000
 #define x_IO_BASE_ADDR   0x0F000000
 #define x_IO_MEM_SZ      0x00002000
 #define x_IO_ADDR_RANGE  0x00000020
diff --git a/cMIPS/include/cMIPS.ld b/cMIPS/include/cMIPS.ld
index 7912b2b..d698f31 100644
--- a/cMIPS/include/cMIPS.ld
+++ b/cMIPS/include/cMIPS.ld
@@ -2,7 +2,7 @@ SECTIONS
 {
        . = 0x00000000; /* x_INST_BASE_ADDR */
        .text : { *(.text .text.*) }
-       . = 0x00400000; /* x_DATA_BASE_ADDR */
+       . = 0x04000000; /* x_DATA_BASE_ADDR */
        .data    : { *(.data .data.*) }
        .rodata  : { *(.rodata .rodata.*) }
        .rodata1 : { *(.rodata1) }
diff --git a/cMIPS/include/cMIPS.s b/cMIPS/include/cMIPS.s
index de3b6be..39c4b21 100644
--- a/cMIPS/include/cMIPS.s
+++ b/cMIPS/include/cMIPS.s
@@ -1,10 +1,10 @@
 
 	# see vhdl/packageMemory.vhd for addresses
         .set x_INST_BASE_ADDR,0x00000000
-        .set x_INST_MEM_SZ,0x00002000
+        .set x_INST_MEM_SZ,0x00004000
 
-        .set x_DATA_BASE_ADDR,0x00400000
-        .set x_DATA_MEM_SZ,0x00002000
+        .set x_DATA_BASE_ADDR,0x04000000
+        .set x_DATA_MEM_SZ,0x00004000
 	
         .set x_IO_BASE_ADDR,0x0F000000
         .set x_IO_MEM_SZ,0x00002000
@@ -21,7 +21,7 @@
 	.set x_EXCEPTION_0000,0x00000080
 	.set x_EXCEPTION_0100,0x000000A0
 	.set x_EXCEPTION_0180,0x000000C0
-	.set x_EXCEPTION_0200,0x00000140
+	.set x_EXCEPTION_0200,0x00000200
 	.set x_ENTRY_POINT,   0x00000300
 
 	.set cop0_Index,   $0
diff --git a/cMIPS/include/cMIPSio.c b/cMIPS/include/cMIPSio.c
index 0a2eae8..38c3e8b 100644
--- a/cMIPS/include/cMIPSio.c
+++ b/cMIPS/include/cMIPSio.c
@@ -167,41 +167,10 @@ char *memset(char *dst, const int val, int len) {
 
 
 
-#endif // FOR_SIMULATION
 
+#else  // compile FOR_SYNTHESIS
 
 
-//=======================================================================
-// external counter -- counts down to zero and stops or interrupts
-//=======================================================================
-// write an integer with number of pulses to count and start counter
-//  if interr not 0, then will interrupt when count reaches zero
-void startCounter(int n, int interr) {
-  int *IO = (int *)IO_COUNT_ADDR;
-  int interrupt;
-  // set bit 31 to cause an interrupt on count==n, reset for no interrupt
-  interrupt = (interr == 0 ? 0x00000000 : 0x80000000);
-
-  // set bit 30 to start counting, reset to stop
-  *IO = (interrupt | 0x40000000 | (0x3fffffff & n)); 
-}
-
-// stop the counter, keep current count & interrupt status
-void stopCounter(void) {
-  int *IO = (int *)IO_COUNT_ADDR;
-  int value;
-  
-  value = *IO;
-  *IO = value & 0xbfffffff; // reset bit 30 to stop counter
-}
-
-// read counter value and interrupt status
-int readCounter(void) {
-  int *IO = (int *)IO_COUNT_ADDR;
-
-  return *IO;
-}; //--------------------------------------------------------------------
-
 
 
 //=======================================================================
@@ -380,3 +349,40 @@ void DSP7SEGput(int MSD, int MSdot, int lsd, int lsdot) {
 //-----------------------------------------------------------------------
 
 
+#endif // FOR_SYNTHESIS
+
+
+
+//=======================================================================
+// external counter -- counts down to zero and stops or interrupts
+//=======================================================================
+// write an integer with number of pulses to count and start counter
+//  if interr not 0, then will interrupt when count reaches zero
+void startCounter(int n, int interr) {
+  int *IO = (int *)IO_COUNT_ADDR;
+  int interrupt;
+  // set bit 31 to cause an interrupt on count==n, reset for no interrupt
+  interrupt = (interr == 0 ? 0x00000000 : 0x80000000);
+
+  // set bit 30 to start counting, reset to stop
+  *IO = (interrupt | 0x40000000 | (0x3fffffff & n)); 
+}
+
+// stop the counter, keep current count & interrupt status
+void stopCounter(void) {
+  int *IO = (int *)IO_COUNT_ADDR;
+  int value;
+  
+  value = *IO;
+  *IO = value & 0xbfffffff; // reset bit 30 to stop counter
+}
+
+// read counter value and interrupt status
+int readCounter(void) {
+  int *IO = (int *)IO_COUNT_ADDR;
+
+  return *IO;
+}; //--------------------------------------------------------------------
+
+
+
diff --git a/cMIPS/include/handlers.s b/cMIPS/include/handlers.s
index ad2f619..b063e41 100644
--- a/cMIPS/include/handlers.s
+++ b/cMIPS/include/handlers.s
@@ -7,8 +7,8 @@
 	.set M_StatusIEn,0x0000ff09     # STATUS.intEn=1, user mode
 	
 	#----------------------------------------------------------------
-	# interrupt handler for external counter attached to IP2=HW0
-	# Counter address -- see vhdl/packageMemory.vhd
+	# interrupt handler for external counter attached to IP5=HW3
+	# for extCounter address -- see vhdl/packageMemory.vhd
 
 	.bss
 	.align  2
@@ -65,7 +65,7 @@ extCounter:
 
 	
 	#----------------------------------------------------------------
-	# interrupt handler for UART attached to IP3=HW1
+	# interrupt handler for UART attached to IP6=HW4
 
 	.bss 
         .align  2
@@ -80,7 +80,7 @@ extCounter:
 	.global nrx,ntx
 	.comm   nrx 4                  # characters in RX_queue
 	.comm   ntx 4                  # spaces left in TX_queue
-        .comm   _uart_buff 16*4        # registers to be saved here
+        .comm   _uart_buff 16*4        # up to 16 registers to be saved here
 
 	.set UART_rx_irq,0x08
 	.set UART_tx_irq,0x10
@@ -90,7 +90,8 @@ extCounter:
 	.global UARTinterr
 	.ent    UARTinterr
 
-	# _uart_buff[0]=status, [1]=data_inp, [2]=new, [3]=$a0, [4]=$a1
+	# _uart_buff[0]=UARTstatus, [1]=UARTcontrol, [2]=data_inp, [3]=new,
+	#           [4]=$ra, [5]=$a0, [6]=$a1, [7]=$a2, [8]=$a3
 	
 UARTinterr:
 	lui   $k0, %hi(HW_uart_addr)
@@ -168,7 +169,7 @@ enableInterr:
 	ori   $v0, $v0, 1           #   and enable interrupts
 	mtc0  $v0, cop0_STATUS
 	nop
-	jr $ra                      # return updated STATUS
+	jr    $ra                   # return updated STATUS
 	nop
 	.end enableInterr
 
@@ -179,24 +180,24 @@ disableInterr:
 	and   $v0, $v0, $v1         # -2 = 0xffff.fffe
 	mtc0  $v0, cop0_STATUS
 	nop
-	jr $ra                      # return updated STATUS
+	jr    $ra                   # return updated STATUS
 	nop
 	.end disableInterr
 	#----------------------------------------------------------------
 
 
 	#----------------------------------------------------------------	
-	# delays processing by approx 4*$a4 processor cycles
+	# delays processing by approx 4*$a0 processor cycles
 	.text
 	.set    noreorder
 	.global cmips_delay
 	.ent    cmips_delay
 cmips_delay:
-	addiu $4, $4, -1
+	addiu $a0, $a0, -1
         nop
-        bne $4, $zero, cmips_delay
+        bne   $a0, $zero, cmips_delay
         nop
-        jr $ra
+        jr    $ra
         nop
 	.end    cmips_delay
 	#----------------------------------------------------------------
diff --git a/cMIPS/include/start.s b/cMIPS/include/start.s
index 52956d5..d15f84e 100644
--- a/cMIPS/include/start.s
+++ b/cMIPS/include/start.s
@@ -17,17 +17,10 @@
 	# initialize SP: ramTop-8
 _start: li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8)
 
-	# set STATUS, cop0, hw interrupt IRQ2,IRQ3 enabled
-        li   $k0, 0x10000c01
+	# set STATUS, cop0, hw interrupt IRQ7,IRQ6,IRQ5 enabled
+        li   $k0, 0x1000e001
         mtc0 $k0, cop0_STATUS
  
-	la   $k0, _go_main  # start main() in user mode
-        mtc0 $k0, cop0_EPC
-        nop
-        eret      # go into user mode, all else disabled
-	nop
-
-_go_main:
 	nop
 	jal main  # on returning from main(), MUST go into exit()
 	nop       #  to stop the simulation.
@@ -44,15 +37,17 @@ _exit:	nop	  # flush pipeline
 
 
 	#----------------------------------------------------------------
+	.global _excp_0000
+	.global excp_0000
+	.global _excp_0100
+	.global excp_0100
 	.global _excp_0180
 	.global excp_0180
 	.global _excp_0200
 	.global excp_0200
-	.global _excp_0000
-	.global excp_0000
-
-	
-	#----------------------------------------------------------------
+	##
+	##================================================================
+	##
 	.org x_EXCEPTION_0000,0 # exception vector_0000
 	.ent _excp_0000
 excp_0000:
@@ -75,116 +70,195 @@ nmi_reset_handler:
 	# j excp_0000ret       #  OR do something else!
 	.end _excp_0000
 
+	##
+	##================================================================
+	## exception vector_0100 TLBrefill, from See MIPS Run pg 145
+	##
+	.org x_EXCEPTION_0100,0
+	.ent _excp_0100
+	.set noreorder
+	.set noat
+
+excp_0100:
+_excp_0100:
+	mfc0 $k1, cop0_Context
+	lw   $k0, 0($k1)           # k0 <- TP[Context.lo]
+	lw   $k1, 8($k1)           # k1 <- TP[Context.hi]
+	mtc0 $k0, cop0_EntryLo0    # EntryLo0 <- k0 = even element
+	mtc0 $k1, cop0_EntryLo1    # EntryLo1 <- k1 = odd element
+	ehb
+	tlbwr	                   # update TLB
+	eret	
+	.end _excp_0100
+
+
+	##
+	##================================================================
+	## handler for all exceptions except interrupts and TLBrefill
+	##
+        .bss
+        .align  2
+        .comm   _excp_saves 16*4       # area to save up to 16 registers
+        # _excp_saves[0]=CAUSE, [1]=STATUS, [2]=ASID,
+	#            [8]=$ra, [9]=$a0, [10]=$a1, [11]=$a2, [12]=$a3
+        .text
+        .set    noreorder
 
-	#----------------------------------------------------------------
-	# handler for various exceptional conditions and HW errors
 	.org x_EXCEPTION_0180,0  # exception vector_180
 	.ent _excp_0180
 excp_0180:
 _excp_0180:
+	mfc0 $k0, cop0_STATUS
+	lui  $k1, %hi(_excp_saves)
+	ori  $k1, $k1, %lo(_excp_saves)
+	sw   $k0, 1*4($k1)
         mfc0 $k0, cop0_CAUSE
-	andi $k0, $k0, 0x3f    # keep only ExceptionCode
-	slt  $k1, $k0, 0x20    # not an address/bus error -- Table 8-25
-	beq  $k1, $zero, excp_0180ret
-	nop
-	and  $k0, $k0, 0x1f    # keep type of address error
+	sw   $k0, 0*4($k1)
+	
+	andi $k0, $k0, 0x3f    # keep only the first 16 ExceptionCode & b"00"
+	sll  $k0, $k0, 1       # displacement in vector is 8 bytes
 	lui  $k1, %hi(excp_tbl)
         ori  $k1, $k1, %lo(excp_tbl)
 	add  $k1, $k1, $k0
 	jr   $k1
 	nop
-excp_tbl: j excp_0180ret       # interrupt, do nothing and return
-	wait 0x04  # addr error      -- abort simulation, from Table 8-25
-	wait 0x08  # addr error      -- abort simulation
-	wait 0x0c  # addr error      -- abort simulation
-	wait 0x10  # addr error LD   -- abort simulation
-	wait 0x14  # addr error ST   -- abort simulation
-	wait 0x18  # bus error IF    -- abort simulation
-	wait 0x1c  # bus error LD/ST -- abort simulation
-	wait 0xff  # any other -- should never arrive here, abort simulation
+
+excp_tbl: # see Table 8-25, pg 95,96
+	wait 0x02  # interrupt, should never arrive here, abort simulation
+	nop
+
+	j h_Mod  # 1
+	nop
+
+	j h_TLBL # 2
+	nop
+
+	j h_TLBS # 3
+	nop
+
+	wait 0x04  # 4 AdEL addr error      -- abort simulation
+	nop
+	wait 0x05  # 5 AdES addr error      -- abort simulation
+	nop
+	wait 0x06  # 6 IBE addr error      -- abort simulation
+	nop
+	wait 0x07  # 7 DBE addr error      -- abort simulation
+	nop
+
+	j h_syscall # 8
+	nop
+
+	j h_breakpoint # 9
+	nop
+
+	j h_RI    # 10 reserved instruction
+	nop
+
+	j h_CpU   # 11 coprocessor unusable
+	nop
+
+	j h_Ov    # 12 overflow
+	nop
+
+	j h_trap  # 13 trap
+	nop
+	
+	wait 0x14 # reserved, should never get here -- abort simulation
+	nop
+	
+	wait 0x15 # PF exception, should never get here -- abort simulation
 	nop
 
+h_Mod:	
+h_TLBL:		
+h_TLBS:	
+h_syscall:
+h_breakpoint:	
+h_RI:	
+h_CpU:	
+h_Ov:	
+h_trap:	
+	
 excp_0180ret:
-	li   $k0, 0x1000ff09   	# enable interrupts, switch to user mode
-        mtc0 $k0, cop0_STATUS
-	eret
+	lui  $k1, %hi(_excp_saves) # Read previous contents of STATUS
+	ori  $k1, $k1, %lo(_excp_saves)
+	lw   $k0, 1*4($k1)
+	# mfc0 $k0, cop0_STATUS
+	
+	lui  $k1, 0xffff           #  and do not modify its contents
+	ori  $k1, $k1, 0xfff1      #  except for re-enabling interrupts
+	ori  $k0, $k0, M_StatusIEn #  and keeping user/kernel mode
+	and  $k0, $k1, $k0         #  as it was on exception entry 
+	mtc0 $k0, cop0_STATUS	
+	eret			   # Return from exception
+
 	.end _excp_0180
 	#----------------------------------------------------------------
 
-	
-	#----------------------------------------------------------------
+	##
+	##===============================================================
+	## interrupt handlers at exception vector 0200
+	##
 	# name all handlers here
-	.extern countCompare
-	.extern extCounter
-	.extern UARTinterr
-	.org x_EXCEPTION_0200,0   # exception vector_200, interrupt handlers
-	.ent _excp_0200
+	.extern countCompare  # IRQ7 = hwIRQ5, see vhdl/tb_cMIPS.vhd
+	.extern UARTinterr    # IRQ6 - hwIRQ4
+	.extern extCounter    # IRQ5 - hwIRQ3
+
 	.set M_CauseIM,0x0000ff00   # keep bits 15..8 -> IM = IP
 	.set M_StatusIEn,0x0000ff01 # user mode, enable all interrupts
+
+	.set noreorder
+	
+	.org x_EXCEPTION_0200,0     # exception vector_200, interrupt handlers
+	.ent _excp_0200
 excp_0200:
 _excp_0200:
 	mfc0 $k0, cop0_CAUSE
 	andi $k0, $k0, M_CauseIM  # Keep only IP bits from Cause
 	mfc0 $k1, cop0_STATUS
 	and  $k0, $k0, $k1        # and mask with IM bits 
-	beq  $k0, $zero, Dismiss  
-	nop
-	
-	# Find out which irq is active and dispatch to handler
-hand_7:	andi $k1, $k0, 0x8000	  # handle IP7=HW5
-	# beq  $k1, $zero, hand_6
-	beq  $k1, $zero, hand_3   # CHANGE THIS WHEN NEW HANDLERS ARE ADDED
-	nop
-	j countCompare
-	nop
-hand_6:	andi $k1, $k0, 0x4000	  # handle IP6=HW4
-	beq  $k1, $zero, hand_5
-	nop
-	j excp_0200ret            # add proper handler here
-	nop
-hand_5:	andi $k1, $k0, 0x2000	  # handle IP5=HW3
-	beq  $k1, $zero, hand_4
-	nop
-	j excp_0200ret            # add proper handler here
-	nop
-hand_4:	andi $k1, $k0, 0x1000	  # handle IP4=HW2
-	beq  $k1, $zero, hand_3
+
+	srl  $k0, $k0, 11	  # keep only 3 MS bits of IP (irq7..5)
+	lui  $k1, %hi(handlers_tbl) # plus displacement in j-table of 8 bytes
+	ori  $k1, $k1, %lo(handlers_tbl)
+	add  $k1, $k1, $k0
+	jr   $k1
 	nop
-	j excp_0200ret            # add proper handler here
+
+handlers_tbl:
+	j Dismiss		   # no request: 000
 	nop
-hand_3:	andi $k1, $k0, 0x0800	  # handle IP3=HW1
-	beq  $k1, $zero, hand_2
+
+	j extCounter		   # lowest priority, IRQ5: 001
+	nop	
+
+	j UARTinterr		   # mid priority, IRQ6: 01x
 	nop
 	j UARTinterr
 	nop
-hand_2:	andi $k1, $k0, 0x0400	  # handle IP2=HW0
-	beq  $k1, $zero, hand_1
-	nop
-	j extCounter
-	nop
-hand_1:	andi $k1, $k0, 0x0200	  # handle IP1=SW1
-	beq  $k1, $zero, hand_0
+
+	j countCompare             # highest priority, IRQ7: 1xx
 	nop
-	j excp_0200ret            # add proper handler here
+	j countCompare
 	nop
-hand_0:	andi $k1, $k0, 0x0100	  # handle IP0=SW0
-	beq  $k1, $zero, Dismiss
+	j countCompare
 	nop
-	j excp_0200ret		  # add proper handler here
+	j countCompare
 	nop
-	
-Dismiss:                # No pending request, must have been noise
-	nop             #  do nothing and return
+
+
+Dismiss: # No pending request, must have been noise
+	 #  do nothing and return
 
 excp_0200ret:
-	mfc0 $k0, cop0_STATUS	# Read STATUS register
-	lui  $k1, 0xffff           #  and do not modify its contents
+	mfc0 $k0, cop0_STATUS	   # Read STATUS register
+	addi $k1, $zero, -15       #  and do not modify its contents -15=fff1
 	ori  $k0, $k0, M_StatusIEn #  except for re-enabling interrupts
-	ori  $k1, $k1, 0xfff1      #  and going into user mode
-	and  $k0, $k1, $k0
-	mtc0 $k0, cop0_STATUS	
-	eret			# Return from interrupt
+	and  $k0, $k1, $k0         #  and keeping user/kernel mode
+	mtc0 $k0, cop0_STATUS      #  as it was on interrupt entry 	
+	eret			   # Return from interrupt
 	nop
+
 	.end _excp_0200
 	#----------------------------------------------------------------
 
@@ -193,3 +267,4 @@ excp_0200ret:
 	#----------------------------------------------------------------
 	# normal code starts here -- do not edit next line
 	.org x_ENTRY_POINT,0
+
diff --git a/cMIPS/tests/badVAddrMM.expected b/cMIPS/tests/badVAddrMM.expected
index 4afe29f..e806d1a 100644
--- a/cMIPS/tests/badVAddrMM.expected
+++ b/cMIPS/tests/badVAddrMM.expected
@@ -23,19 +23,19 @@
 [
 08800010
 00000358
-00400001
+04000001
 ]
 00000002
 [
 08800010
 00000358
-00400002
+04000002
 ]
 00000001
 [
 08800010
 00000358
-00400003
+04000003
 ]
 00000000
 
@@ -60,13 +60,13 @@
 [
 08800010
 000003f0
-00400001
+04000001
 ]
 00000002
 00000002
 [
 08800010
 00000410
-00400003
+04000003
 ]
 00000001
diff --git a/cMIPS/tests/doTests.sh b/cMIPS/tests/doTests.sh
index f13d21c..b105dab 100755
--- a/cMIPS/tests/doTests.sh
+++ b/cMIPS/tests/doTests.sh
@@ -72,7 +72,7 @@ a_BHW="lbsb lhsh lwsw lwswIncr swlw lwl_lwr"
 a_MEM="lwSweepRAM"
 a_CTR="teq_tne tlt_tlti tltu_tgeu eiDI ll_sc overflow"
 a_COP="mtc0CAUSE2 mtc0EPC syscall break mfc0CONFIG badVAddr badVAddrMM"
-a_MMU="mmu_index mmu_tlbwi mmu_tlbp mmu_tlbwr"
+a_MMU="mmu_index mmu_tlbwi mmu_tlbp mmu_tlbwr mmu_context mmu_refill"
 
 ## these tests MUST be run with FAKE CACHES
 # a_IOs="kbd7seg" 
diff --git a/cMIPS/tests/lwFWDsw2.expected b/cMIPS/tests/lwFWDsw2.expected
index 23b9d7c..058d8bb 100644
--- a/cMIPS/tests/lwFWDsw2.expected
+++ b/cMIPS/tests/lwFWDsw2.expected
@@ -1,41 +1,41 @@
-00400010
+04000010
 fffffff6
-00400014
+04000014
 fffffff7
-00400018
+04000018
 fffffff8
-0040001c
+0400001c
 fffffff9
-00400020
+04000020
 fffffffa
-00400024
+04000024
 fffffffb
-00400028
+04000028
 fffffffc
-0040002c
+0400002c
 fffffffd
-00400030
+04000030
 fffffffe
-00400034
+04000034
 ffffffff
-00400038
+04000038
 00000000
-0040003c
+0400003c
 00000001
-00400040
+04000040
 00000002
-00400044
+04000044
 00000003
-00400048
+04000048
 00000004
-0040004c
+0400004c
 00000005
-00400050
+04000050
 00000006
-00400054
+04000054
 00000007
-00400058
+04000058
 00000008
-0040005c
+0400005c
 00000009
-00400060
+04000060
diff --git a/cMIPS/tests/merge.c b/cMIPS/tests/merge.c
index c3a6dc1..96a0324 100644
--- a/cMIPS/tests/merge.c
+++ b/cMIPS/tests/merge.c
@@ -38,18 +38,13 @@ void myprint(int numbers[], int n);
 
 void main() {
 
-int temp[NUM_ITEMS];
-
-#ifdef cMIPS
-  int *buf = (int *)x_DATA_BASE_ADDR;
-#else
+  int temp[NUM_ITEMS];
   int buf[NUM_ITEMS];
-#endif
 
   int *ptr = buf;
   unsigned int i, m_w, m_z;
 
-  // from wikipedia
+  // generate random numbers to sort -- from wikipedia
   m_w = 17;    /* must not be zero, nor 0x464fffff */
   m_z = 31;    /* must not be zero, nor 0x9068ffff */
 
diff --git a/cMIPS/tests/mmu_context.expected b/cMIPS/tests/mmu_context.expected
new file mode 100644
index 0000000..10add15
--- /dev/null
+++ b/cMIPS/tests/mmu_context.expected
@@ -0,0 +1,7 @@
+aa000000
+ok
+55800000
+ok
+excp
+ok
+ok
diff --git a/cMIPS/tests/mmu_context.s b/cMIPS/tests/mmu_context.s
new file mode 100644
index 0000000..4d513f9
--- /dev/null
+++ b/cMIPS/tests/mmu_context.s
@@ -0,0 +1,253 @@
+	##
+	## Test the Context register.
+	##
+	## Write to the upper 9 bits (PTEbase) then read it back ;
+	## 
+	## Cause an exception by referencing an unmapped address and
+	##   then check BadVPN2
+	##
+
+	## EntryHi     : EntryLo0           : EntryLo1
+	## VPN2 g ASID : PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
+
+	.include "cMIPS.s"
+
+	.set MMU_CAPACITY, 8
+	.set MMU_WIRED,    2  ### do not change mapping for base of ROM, I/O
+
+	# New entries cannot overwrite tlb[0,1] which maps base of ROM + I/O
+	
+	# EntryHi cannot have an ASID different from zero, otw TLB misses
+	.set entryHi_1,  0x00012000 #                 pfn0  zzcc cdvg
+	.set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
+	.set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
+
+	.set entryHi_2,  0x00014000 #                 pfn0  zzcc cdvg
+	.set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
+	.set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
+
+	.set entryHi_3,  0x00016000 #                 pfn0  zzcc cdvg
+	.set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
+	.set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
+
+	.set entryHi_4,  0x00018000 #                 pfn0  zzcc cdvg
+	.set entryLo0_4, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
+	.set entryLo1_4, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
+
+	.set MMU_ini_tag_RAM0, x_DATA_BASE_ADDR
+	.set MMU_ini_dat_RAM0, 0x0100005         # this mapping is INVALID
+	.set MMU_ini_dat_RAM1, 0x0100047
+	
+	.text
+	.align 2
+	.set noreorder
+	.set noat
+	.globl _start
+	.ent _start
+_start:	
+
+        li   $2, cop0_STATUS_reset
+	addi $2, $2, -2
+        mtc0 $2, cop0_STATUS ### make sure CPU is not at exception level
+
+        li   $2, MMU_WIRED
+        mtc0 $2, cop0_Wired  ### make sure all but 0'th TLB entries are usable
+
+	j main
+	nop
+	.end _start
+
+
+        ##
+        ##================================================================
+        ## exception vector_0180 TLBrefill, from See MIPS Run pg 145
+        ##
+        .org x_EXCEPTION_0180,0
+        .ent _excp
+        .set noreorder
+        .set noat
+
+excp:
+_excp:	li   $30, 'e'
+        sw   $30, x_IO_ADDR_RANGE($31)
+        li   $30, 'x'
+        sw   $30, x_IO_ADDR_RANGE($31)
+        li   $30, 'c'
+        sw   $30, x_IO_ADDR_RANGE($31)
+        li   $30, 'p'
+        sw   $30, x_IO_ADDR_RANGE($31)
+        li   $30, '\n'
+        sw   $30, x_IO_ADDR_RANGE($31)
+        eret
+        .end _excp
+
+
+	##
+	##================================================================
+        ## normal code starts here
+	##
+        .org x_ENTRY_POINT,0
+
+main:	la   $31, x_IO_BASE_ADDR
+	
+	##
+	## write PTEbase, twice
+	##
+
+	la   $29, 0xaa000000
+	mtc0 $29, cop0_Context
+
+	ehb			     # clear hazards
+	
+	mfc0 $28, cop0_Context
+	sw   $28, 0($31)
+	bne  $28, $29, error1
+	nop
+
+	li $30, 'o'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'k'
+	j  next1
+	sw $30, x_IO_ADDR_RANGE($31)
+	
+error1:	li $30, 'e'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'r'
+	sw $30, x_IO_ADDR_RANGE($31)
+	sw $30, x_IO_ADDR_RANGE($31)
+
+next1:	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+	##
+	## check only top 9 bits are written
+	##
+
+	move $28, $zero
+	la   $29, 0x55800000         # can write only 9 MS bits
+	mtc0 $29, cop0_Context
+
+	ehb			     # clear hazards
+	
+	mfc0 $28, cop0_Context
+	sw   $28, 0($31)
+	srl  $28, $28, 23	     # keep only 9 MS bits
+	li   $27, 0b010101011        # check 9 MS bits == 0x551--- = 0x-AB
+	bne  $28, $27, error2
+	nop
+	
+	li $30, 'o'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'k'
+	j  next2
+	sw $30, x_IO_ADDR_RANGE($31)
+	
+error2:	li $30, 'e'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'r'
+	sw $30, x_IO_ADDR_RANGE($31)
+	sw $30, x_IO_ADDR_RANGE($31)
+
+next2:	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+
+	##
+	## cause a TLB exception and check only bottom 23 bits are written
+	##   mark first RAM VPN2 as invalid
+	##
+
+	li   $5, 4                   # tlb[4] maps first RAM entry
+	mtc0 $5, cop0_Index
+
+	la   $5, MMU_ini_tag_RAM0
+	mtc0 $5, cop0_EntryHi
+
+	la   $5, MMU_ini_dat_RAM0    # set mapping invalid: d,v,g=101
+	mtc0 $5, cop0_EntryLo0
+
+	la   $5, MMU_ini_dat_RAM1
+	mtc0 $5, cop0_EntryLo1
+
+	tlbwi                        # change mapping
+	
+	la   $29, 0xff800000         # can write only 9 MS bits
+	mtc0 $29, cop0_Context
+
+	ehb			     # clear hazards
+
+	nop
+	nop
+	
+	move $28, $zero
+	
+	la   $8, x_DATA_BASE_ADDR    # cause the exception: TLBinvalid
+	sw   $zero, 0($8)
+	
+	nop
+	nop      # instructions that follow offending store are nullified
+	nop	 #   so we prevent misbehaved tests by doing nothing for
+	nop      #   6 cycles to drain the pipeline
+	nop
+	nop
+	
+	mfc0 $28, cop0_Context
+	# sw   $28, 0($31)
+
+	la   $27, 0xff800000 | (x_DATA_BASE_ADDR >>9)
+	bne  $28, $27, error3
+	nop
+	
+	li $30, 'o'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'k'
+	j  next3
+	sw $30, x_IO_ADDR_RANGE($31)
+	
+error3:	li $30, 'e'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'r'
+	sw $30, x_IO_ADDR_RANGE($31)
+	sw $30, x_IO_ADDR_RANGE($31)
+
+next3:	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+	##
+	## make sure BadVAddr was loaded correctly with offending address
+	##
+
+	mfc0 $28, cop0_BadVAddr
+	# sw   $28, 0($31)
+	bne  $28, $8, error4
+	nop
+	
+	li $30, 'o'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'k'
+	j  next4
+	sw $30, x_IO_ADDR_RANGE($31)
+
+error4:	li $30, 'e'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, 'r'
+	sw $30, x_IO_ADDR_RANGE($31)
+	sw $30, x_IO_ADDR_RANGE($31)
+
+next4:	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+
+
+	nop
+	nop
+        nop
+	nop
+	nop
+        nop
+        wait
+	nop
+	nop
+
+
+	
diff --git a/cMIPS/tests/mmu_refill.expected b/cMIPS/tests/mmu_refill.expected
new file mode 100644
index 0000000..3be4a7e
--- /dev/null
+++ b/cMIPS/tests/mmu_refill.expected
@@ -0,0 +1,4 @@
+excp
+there
+and back again
+
diff --git a/cMIPS/tests/mmu_refill.s b/cMIPS/tests/mmu_refill.s
new file mode 100644
index 0000000..f5931d8
--- /dev/null
+++ b/cMIPS/tests/mmu_refill.s
@@ -0,0 +1,232 @@
+	##
+	## Test the TLB as if it were just a memory array
+	## Perform a series of indexed writes, then a series of probes
+	##   first two fail, next two succeed
+	##
+	##
+	## EntryHi     : EntryLo0           : EntryLo1
+	## VPN2 g ASID : PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
+
+	.include "cMIPS.s"
+
+	.set MMU_WIRED,    2  ### do not change mapping for base of ROM, I/O
+
+        # New entries cannot overwrite tlb[0,1] which map base of ROM, I/O
+
+        # EntryHi cannot have an ASID different from zero, otw TLB misses
+        .set entryHi_1,  0x00012000 #                 pfn0  zzcc cdvg
+        .set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
+        .set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
+
+        .set entryHi_2,  0x00014000 #                 pfn0  zzcc cdvg
+        .set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
+        .set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
+
+        .set entryHi_3,  0x00016000 #                 pfn0  zzcc cdvg
+        .set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
+        .set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
+
+        .set entryHi_4,  0x00018000 #                 pfn0  zzcc cdvg
+        .set entryLo0_4, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
+        .set entryLo1_4, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
+
+	.text
+	.align 2
+	.set noreorder
+	.set noat
+	.org x_INST_BASE_ADDR,0
+	.globl _start
+	.ent _start
+
+	## set STATUS, cop0, no interrupts enabled
+_start:	li   $k0, 0x10000000
+        mtc0 $k0, cop0_STATUS
+
+	j main
+	nop
+	.end _start
+	
+	##
+        ##================================================================
+        ## exception vector_0000 TLBrefill, from See MIPS Run pg 145
+        ##
+        .org x_EXCEPTION_0000,0
+        .ent _excp
+        .set noreorder
+        .set noat
+
+excp:
+_excp:  mfc0 $k1, cop0_Context
+        lw   $k0, 0($k1)           # k0 <- TP[Context.lo]
+        lw   $k1, 8($k1)           # k1 <- TP[Context.hi]
+        mtc0 $k0, cop0_EntryLo0    # EntryLo0 <- k0 = even element
+        mtc0 $k1, cop0_EntryLo1    # EntryLo1 <- k1 = odd element
+        ehb
+        tlbwr                      # update TLB
+	li   $30, 'e'
+	sw   $30, x_IO_ADDR_RANGE($20)	
+	li   $30, 'x'
+	sw   $30, x_IO_ADDR_RANGE($20)	
+	li   $30, 'c'
+	sw   $30, x_IO_ADDR_RANGE($20)	
+	li   $30, 'p'
+	sw   $30, x_IO_ADDR_RANGE($20)	
+	li   $30, '\n'
+	sw   $30, x_IO_ADDR_RANGE($20)	
+	eret
+        .end _excp
+
+	##
+        ##================================================================
+        ## normal code starts here
+	##
+        .org x_ENTRY_POINT,0
+
+	
+	## dirty trick: there is not enough memory for a full PT, thus
+	##   we set the PT at the bottom of RAM addresses and trick
+	##   Context into accessing a low address range
+
+	.set PTbase, x_DATA_BASE_ADDR
+	.ent main
+main:	la   $20, x_IO_BASE_ADDR
+	
+	##
+	## setup a PageTable
+	##
+	## 16 bytes per entry:  
+	## EntryLo0           : EntryLo1
+	## PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
+	##
+
+	la  $4, PTbase
+
+	li   $5, 0            # 1st ROM mapping
+	mtc0 $5, cop0_Index
+	nop
+	tlbr
+
+	mfc0 $6, cop0_EntryLo0
+	# sw   $6, 0($20)
+	mfc0 $7, cop0_EntryLo1
+	# sw   $7, 0($20)
+
+	# 1st entry: PPN0 & PPN1 ROM
+	sw  $6, 0($4)
+	sw  $0, 4($4)
+	sw  $7, 8($4)
+	sw  $0, 12($4)
+
+	li $5, 7              # 2nd ROM mapping
+	mtc0 $5, cop0_Index
+	nop
+	tlbr
+
+	mfc0 $6, cop0_EntryLo0
+	# sw   $6, 0($20)
+	mfc0 $7, cop0_EntryLo1
+	# sw   $7, 0($20)
+
+	# 2nd entry: PPN2 & PPN3 ROM
+	sw  $6, 16($4)
+	sw  $0, 20($4)
+	sw  $7, 24($4)
+	sw  $0, 28($4)
+
+	# load Context with PTbase
+	mtc0 $4, cop0_Context
+	
+	## change mapping for 2nd ROM TLB entry, thus causing a miss
+
+	li   $9, 0x2000
+	sll  $9, $9, 8
+
+	mfc0 $8, cop0_EntryHi
+	
+	add  $8, $9, $8     # change tag
+
+	mtc0 $8, cop0_EntryHi
+
+	tlbwi		    # and write it back to TLB
+
+	## cause a TLB miss
+
+	jal  there
+	nop
+	
+	li   $30, 'a'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'n'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'd'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, ' '
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'b'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'a'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'c'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'k'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, ' '
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'a'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'g'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'a'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'i'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'n'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, '\n'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	sw   $30, x_IO_ADDR_RANGE($20)
+
+	
+	nop
+	nop
+        nop
+	nop
+	nop
+        nop
+        wait
+	nop
+	nop
+
+	
+	.org (x_INST_BASE_ADDR + 2*4096), 0
+
+there:	li   $30, 't'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'h'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'e'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'r'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, 'e'
+	sw   $30, x_IO_ADDR_RANGE($20)
+	li   $30, '\n'
+	sw   $30, x_IO_ADDR_RANGE($20)
+
+	jr   $31
+	nop
+	
+	
+
+	
+	nop
+	nop
+        nop
+	nop
+	nop
+        nop
+        wait
+	nop
+	nop
+	.end main
+
diff --git a/cMIPS/tests/mmu_tlbp.expected b/cMIPS/tests/mmu_tlbp.expected
index 813b715..4c7b2ab 100644
--- a/cMIPS/tests/mmu_tlbp.expected
+++ b/cMIPS/tests/mmu_tlbp.expected
@@ -1,20 +1,14 @@
-00006001
+00016000
 
-00004077
+00014000
 
 
-0000e001
+0001e000
 80000000
 m
 
-000040ff
-80000000
-m
-
-000020ff
-00000001
-h=1
+00012000
+h
 
-00000000
-00000004
-h=4
+00018000
+h
diff --git a/cMIPS/tests/mmu_tlbp.s b/cMIPS/tests/mmu_tlbp.s
index eec1f8f..06482be 100644
--- a/cMIPS/tests/mmu_tlbp.s
+++ b/cMIPS/tests/mmu_tlbp.s
@@ -1,41 +1,35 @@
 	##
 	## Test the TLB as if it were just a memory array
 	## Perform a series of indexed writes, then a series of probes
-	##   first two fail, next two succeed
+	##   the first two fail, next two succeed
 	##
-        ## EntryLo1 is not implemented as of may2015
         ##
-
 	## EntryHi     : EntryLo0           : EntryLo1
 	## VPN2 g ASID : PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
 
-	## TLB(i): VPN2 g ASID : PFN0 ccc0 d0 v0 : PFN1 ccc1 d1 v1
-	## TLB(0): 0    0 00   : x00  010  0  1  : x11  010  0  1
-	## TLB(1): 1    1 ff   : x21  011  0  1  : x31  011  0  1
-	## TLB(2): 2    0 77   : x41  010  1  1  : x51  011  1  1
-	## TLB(3): 3    1 01   : x61  011  1  1  : x71  111  1  1
-
 	.include "cMIPS.s"
 
 	.set MMU_CAPACITY, 8
-	.set MMU_WIRED,    1  ### do not change mapping for base of ROM
+	.set MMU_WIRED,    2  ### do not change mapping for base of ROM, I/O
 
-        .set entryHi_0,  0x00000000 #                 pfn0  zzcc cdvg
-        .set entryLo0_0, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
-        .set entryLo1_0, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
+        # New entries cannot overwrite tlb[0,1] which map base of ROM, I/O
 
-        .set entryHi_1,  0x000020ff #                 pfn0  zzcc cdvg
+        # EntryHi cannot have an ASID different from zero, otw TLB misses
+        .set entryHi_1,  0x00012000 #                 pfn0  zzcc cdvg
         .set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
         .set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
 
-        .set entryHi_2,  0x00004077 #                 pfn0  zzcc cdvg
+        .set entryHi_2,  0x00014000 #                 pfn0  zzcc cdvg
         .set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
         .set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
 
-        .set entryHi_3,  0x00006001 #                 pfn0  zzcc cdvg
+        .set entryHi_3,  0x00016000 #                 pfn0  zzcc cdvg
         .set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
         .set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
 
+        .set entryHi_4,  0x00018000 #                 pfn0  zzcc cdvg
+        .set entryLo0_4, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
+        .set entryLo1_4, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
 
 	.text
 	.align 2
@@ -45,6 +39,10 @@
 	.ent _start
 _start:	la   $31, x_IO_BASE_ADDR
 
+        li   $2, cop0_STATUS_reset
+        addi $2, $2, -2
+        mtc0 $2, cop0_STATUS ### make sure CPU is not at exception level
+
 	## load into MMU(3)
 	li   $1, 3
 	mtc0 $1, cop0_Index
@@ -88,8 +86,8 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw   $30, x_IO_ADDR_RANGE($31)
 
 	
-	## load into MMU(1)
-	addiu $1, $1, -1
+	## load into MMU(5)
+	li   $1, 5
 	mtc0 $1, cop0_Index
 	la   $8, entryHi_1
 	mtc0 $8, cop0_EntryHi
@@ -101,13 +99,13 @@ _start:	la   $31, x_IO_BASE_ADDR
 
 	
 	## load into MMU(4)
-	addiu $1, $zero, 4
+	li   $1, 4
 	mtc0 $1, cop0_Index
-	la   $11, entryHi_0
+	la   $11, entryHi_4
 	mtc0 $11, cop0_EntryHi
-	la   $12, entryLo0_0
+	la   $12, entryLo0_4
 	mtc0 $12, cop0_EntryLo0
-	la   $13, entryLo1_0
+	la   $13, entryLo1_4
 	mtc0 $13, cop0_EntryLo1
 	tlbwi
 
@@ -144,29 +142,34 @@ vpn:	la   $14, entryHi_3
 
 	
 	## make a copy of entryHi_2 and change ASID to force a miss
+	##
+	## cannot change ASID at EntryHi as this will always cause misses
+	##  and we do not care for TLB misses here
+	##
+
 asid:	la  $18, entryHi_2
 	ori $18, $18, 0x88      # change ASID w.r.t tlb(2)
 
-	mtc0 $18, cop0_EntryHi
-	sw   $18, 0($31)
+#	mtc0 $18, cop0_EntryHi
+#	sw   $18, 0($31)
 
-	ehb 	# clear all hazards
+#	ehb 	# clear all hazards
 	
-	tlbp    # and probe the tlb
+#	tlbp    # and probe the tlb
 
-	mfc0 $19, cop0_Index    # check for bit31=1
-	sw   $19, 0($31)
+#	mfc0 $19, cop0_Index    # check for bit31=1
+#	sw   $19, 0($31)
 
-	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
-	beq  $20, $zero, hits
-	nop
+#	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
+#	beq  $20, $zero, hits
+#	nop
 
-	li   $30, 'm'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '\n'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	nop
-	sw   $30, x_IO_ADDR_RANGE($31)
+#	li   $30, 'm'
+#	sw   $30, x_IO_ADDR_RANGE($31)
+#	li   $30, '\n'
+#	sw   $30, x_IO_ADDR_RANGE($31)
+#	nop
+#	sw   $30, x_IO_ADDR_RANGE($31)
 
 	##
 	## and now probe two entries that will surely hit
@@ -183,7 +186,7 @@ hits:	la  $18, entryHi_1
 	tlbp    # and probe the tlb
 
 	mfc0 $19, cop0_Index    # check for bit31=1
-	sw   $19, 0($31)
+	#sw   $19, 0($31)
 
 	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
 	beq  $20, $zero, hit1
@@ -196,11 +199,6 @@ hits:	la  $18, entryHi_1
 
 
 hit1:	li   $30, 'h'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
@@ -208,8 +206,8 @@ hit1:	li   $30, 'h'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	
 
-	## make a copy of entryHi_0 to force a hit
-	la  $18, entryHi_0
+	## make a copy of entryHi_4 to force a hit
+	la  $18, entryHi_4
 
 	mtc0 $18, cop0_EntryHi
 	sw   $18, 0($31)
@@ -219,7 +217,7 @@ hit1:	li   $30, 'h'
 	tlbp    # and probe the tlb
 
 	mfc0 $19, cop0_Index    # check for bit31=1
-	sw   $19, 0($31)
+	#sw   $19, 0($31)
 
 	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
 	beq  $20, $zero, hit0
@@ -231,11 +229,6 @@ hit1:	li   $30, 'h'
 	sw   $30, x_IO_ADDR_RANGE($31)
 
 hit0:	li   $30, 'h'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
diff --git a/cMIPS/tests/mmu_tlbwi.expected b/cMIPS/tests/mmu_tlbwi.expected
index 98e6a3d..d445911 100644
--- a/cMIPS/tests/mmu_tlbwi.expected
+++ b/cMIPS/tests/mmu_tlbwi.expected
@@ -1,36 +1,37 @@
 3
-00006001
+00016000
 0000191f
 00001d3f
-4
+
+0
 00000000
-00000012
-00000412
+00000007
+00000047
 1
-000020ff
-0000091b
-00000c1b
+0f000000
+003c0007
+003c0047
 2
-00004077
+00014000
 00001016
 0000141e
 3
-00006001
+00016000
 0000191f
 00001d3f
 4
-00000000
+00018000
 00000012
 00000412
 5
-00402000
-00010087
-000100c7
+00012000
+0000091b
+00000c1b
 6
-00406000
-00010187
-000101c7
+04006000
+00100187
+001001c7
 7
-0f000000
-003c0007
-003c0047
+00002000
+00000087
+000000c7
diff --git a/cMIPS/tests/mmu_tlbwi.s b/cMIPS/tests/mmu_tlbwi.s
index 9105c4d..7f1d689 100644
--- a/cMIPS/tests/mmu_tlbwi.s
+++ b/cMIPS/tests/mmu_tlbwi.s
@@ -3,39 +3,35 @@
 	## Perform a series of indexed writes, then a series of reads
 	##   and compare values read to those written
 	##
-	## Entries 4..7 are only read, to show initialization values
+	## Entries 0,1,6,7 are only read, to show initialization values
 	##
 
 	## EntryHi     : EntryLo0           : EntryLo1
 	## VPN2 g ASID : PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
 
-	## TLB(i): VPN2 g ASID : PFN0 ccc0 d0 v0 : PFN1 ccc1 d1 v1
-	## TLB(0): 0    0 00   : x00  010  0  1  : x11  010  0  1
-	## TLB(1): 1    1 ff   : x21  011  0  1  : x31  011  0  1
-	## TLB(2): 2    0 77   : x41  010  1  1  : x51  011  1  1
-	## TLB(3): 3    1 01   : x61  011  1  1  : x71  111  1  1
-
 	.include "cMIPS.s"
 
-	.set MMU_CAPACITY, 8
-	.set MMU_WIRED,    1  ### do not change mapping for base of ROM
-
-	.set entryHi_0,  0x00000000 #                 pfn0  zzcc cdvg
-	.set entryLo0_0, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
-	.set entryLo1_0, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
+	.set MMU_WIRED,    2  ### do not change mapping for base of ROM, I/O
 
-	.set entryHi_1,  0x000020ff #                 pfn0  zzcc cdvg
+	# New entries cannot overwrite tlb[0.1] which map base of ROM + I/O
+	
+	# EntryHi cannot have an ASID different from zero, otw TLB misses
+	.set entryHi_1,  0x00012000 #                 pfn0  zzcc cdvg
 	.set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
 	.set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
 
-	.set entryHi_2,  0x00004077 #                 pfn0  zzcc cdvg
+	.set entryHi_2,  0x00014000 #                 pfn0  zzcc cdvg
 	.set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
 	.set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
 
-	.set entryHi_3,  0x00006001 #                 pfn0  zzcc cdvg
+	.set entryHi_3,  0x00016000 #                 pfn0  zzcc cdvg
 	.set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
 	.set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
 
+	.set entryHi_4,  0x00018000 #                 pfn0  zzcc cdvg
+	.set entryLo0_4, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
+	.set entryLo1_4, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
+
 	.text
 	.align 2
 	.set noreorder
@@ -44,8 +40,13 @@
 	.ent _start
 _start:	la   $31, x_IO_BASE_ADDR
 
+        li   $2, cop0_STATUS_reset
+	addi $2, $2, -2
+        mtc0 $2, cop0_STATUS ### make sure CPU is not at exception level
+
         li   $2, MMU_WIRED
-        mtc0 $2, cop0_Wired  ### make sure all but 0'th TLB entries are usable
+        mtc0 $2, cop0_Wired  ### make sure all but 0,1 TLB entries are usable
+
 	
 	# load into MMU(3)
 	li   $1, 3
@@ -78,8 +79,23 @@ _start:	la   $31, x_IO_BASE_ADDR
 	mfc0 $25, cop0_EntryLo1
 	sw   $25, 0($31)
 
+	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+
+	# load into MMU(4)
+	li   $1, 4
+	mtc0 $1, cop0_Index
+	la   $11, entryHi_4
+	mtc0 $11, cop0_EntryHi
+	la   $12, entryLo0_4
+	mtc0 $12, cop0_EntryLo0
+	la   $13, entryLo1_4
+	mtc0 $13, cop0_EntryLo1
+	tlbwi
+
 	# load into MMU(2)
-	addiu $1, $1, -1
+	li   $1, 2
 	mtc0 $1, cop0_Index
 	la   $5, entryHi_2
 	mtc0 $5, cop0_EntryHi
@@ -89,8 +105,8 @@ _start:	la   $31, x_IO_BASE_ADDR
 	mtc0 $7, cop0_EntryLo1
 	tlbwi
 
-	# load into MMU(1)
-	addiu $1, $1, -1
+	# load into MMU(5)
+	li   $1, 5
 	mtc0 $1, cop0_Index
 	la   $8, entryHi_1
 	mtc0 $8, cop0_EntryHi
@@ -100,26 +116,35 @@ _start:	la   $31, x_IO_BASE_ADDR
 	mtc0 $10, cop0_EntryLo1
 	tlbwi
 
-	# load into MMU(4)
-	addiu $1, $zero, 4
-	mtc0 $1, cop0_Index
-	la   $11, entryHi_0
-	mtc0 $11, cop0_EntryHi
-	la   $12, entryLo0_0
-	mtc0 $12, cop0_EntryLo0
-	la   $13, entryLo1_0
-	mtc0 $13, cop0_EntryLo1
-	tlbwi
 
-	# and now read values back, in reverse order
+	# and now read back all entries: 0..7
 
-	# read from MMU(4)
-	addi $30, $1, '0'
+	# read from MMU(0)
+	li    $1, 0
+	addi  $30, $1, '0'
 	sw $30, x_IO_ADDR_RANGE($31)
 	li $30, '\n'
 	sw $30, x_IO_ADDR_RANGE($31)
 
+	mtc0  $1, cop0_Index
 	tlbr 			# index = 0
+	mfc0 $23, cop0_EntryHi
+	sw   $23, 0($31)
+	mfc0 $24, cop0_EntryLo0
+	sw   $24, 0($31)
+	mfc0 $24, cop0_EntryLo1
+	sw   $24, 0($31)
+
+
+	# read from MMU(1)
+	li   $1, 1
+	addi $30, $1, '0'
+	sw $30, x_IO_ADDR_RANGE($31)
+	li $30, '\n'
+	sw $30, x_IO_ADDR_RANGE($31)
+
+	mtc0 $1, cop0_Index
+	tlbr 			# index = 1
 	mfc0 $14, cop0_EntryHi
 	sw   $14, 0($31)
 	mfc0 $15, cop0_EntryLo0
@@ -128,15 +153,15 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw   $16, 0($31)
 
 	
-	# read from MMU(1)
-	addiu $1, $1, -3
+	# read from MMU(2)
+	li    $1, 2
 	addi  $30, $1, '0'
 	sw $30, x_IO_ADDR_RANGE($31)
 	li $30, '\n'
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 1
+	tlbr 			# index = 2
 	mfc0 $17, cop0_EntryHi
 	sw   $17, 0($31)
 	mfc0 $18, cop0_EntryLo0
@@ -145,15 +170,15 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw   $19, 0($31)
 
 	
-	# read from MMU(2)
-	addiu $1, $1, 1
+	# read from MMU(3)
+	li    $1, 3
 	addi  $30, $1, '0'
 	sw $30, x_IO_ADDR_RANGE($31)
 	li $30, '\n'
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 2
+	tlbr 			# index = 3
 	mfc0 $20, cop0_EntryHi
 	sw   $20, 0($31)
 	mfc0 $21, cop0_EntryLo0
@@ -162,15 +187,15 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw   $22, 0($31)
 
 
-	# read from MMU(1)
-	addiu $1, $1, 1
+	# read from MMU(4)
+	li    $1, 4
 	addi  $30, $1, '0'
 	sw $30, x_IO_ADDR_RANGE($31)
 	li $30, '\n'
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 3
+	tlbr 			# index = 4
 	mfc0 $23, cop0_EntryHi
 	sw   $23, 0($31)
 	mfc0 $24, cop0_EntryLo0
@@ -178,36 +203,16 @@ _start:	la   $31, x_IO_BASE_ADDR
 	mfc0 $25, cop0_EntryLo1
 	sw   $25, 0($31)
 
-	##
-	## now read initialization values of remaining entries
-	##
 	
-	# read from MMU(4)
-	li  $1, 4
-	addi  $30, $1, '0'
-	sw $30, x_IO_ADDR_RANGE($31)
-	li $30, '\n'
-	sw $30, x_IO_ADDR_RANGE($31)
-
-	mtc0  $1, cop0_Index
-	tlbr 			# index = 4
-	mfc0 $23, cop0_EntryHi
-	sw   $23, 0($31)
-	mfc0 $24, cop0_EntryLo0
-	sw   $24, 0($31)
-	mfc0 $24, cop0_EntryLo1
-	sw   $24, 0($31)
-
-
 	# read from MMU(5)
-	addi $1, $1, 1
+	li    $1, 5
 	addi  $30, $1, '0'
 	sw $30, x_IO_ADDR_RANGE($31)
 	li $30, '\n'
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 4
+	tlbr 			# index = 5
 	mfc0 $23, cop0_EntryHi
 	sw   $23, 0($31)
 	mfc0 $24, cop0_EntryLo0
@@ -224,7 +229,7 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 4
+	tlbr 			# index = 6
 	mfc0 $23, cop0_EntryHi
 	sw   $23, 0($31)
 	mfc0 $24, cop0_EntryLo0
@@ -241,7 +246,7 @@ _start:	la   $31, x_IO_BASE_ADDR
 	sw $30, x_IO_ADDR_RANGE($31)
 
 	mtc0  $1, cop0_Index
-	tlbr 			# index = 4
+	tlbr 			# index = 7
 	mfc0 $23, cop0_EntryHi
 	sw   $23, 0($31)
 	mfc0 $24, cop0_EntryLo0
@@ -249,6 +254,7 @@ _start:	la   $31, x_IO_BASE_ADDR
 	mfc0 $24, cop0_EntryLo1
 	sw   $24, 0($31)
 
+
 	nop
 	nop
         nop
diff --git a/cMIPS/tests/mmu_tlbwr.expected b/cMIPS/tests/mmu_tlbwr.expected
index 630d9d3..a126ae5 100644
--- a/cMIPS/tests/mmu_tlbwr.expected
+++ b/cMIPS/tests/mmu_tlbwr.expected
@@ -1,17 +1,11 @@
-00000002
-00000007
-00000004
-00000002
-
+ok
 80000000
 m
 
 80000000
 m
 
-00000002
-h=2
+h
 
-00000004
-h=4
+h
 
diff --git a/cMIPS/tests/mmu_tlbwr.s b/cMIPS/tests/mmu_tlbwr.s
index 74dd1e3..6e3fc0c 100644
--- a/cMIPS/tests/mmu_tlbwr.s
+++ b/cMIPS/tests/mmu_tlbwr.s
@@ -1,7 +1,7 @@
 	##
 	## Test the TLB as if it were just a memory array
 	## Perform a random write, then probe for it
-	##   writes are such thar first two probes fail, next two succeed
+	##   writes are such that first two probes fail, next two succeed
 	## Because of timing, only one WRITE -> PROBE can be tested
 	##   "deterministically" as changes to core timing may break the test
 	##
@@ -9,35 +9,33 @@
 	## EntryHi     : EntryLo0           : EntryLo1
 	## VPN2 g ASID : PPN0 ccc0 d0 v0 g0 : PPN1 ccc1 d1 v1 g1
 
-	## TLB(i): VPN2 g ASID : PFN0 ccc0 d0 v0 : PFN1 ccc1 d1 v1
-	## TLB(0): 0    0 00   : x00  010  0  1  : x11  010  0  1
-	## TLB(1): 1    1 ff   : x21  011  0  1  : x31  011  0  1
-	## TLB(2): 2    0 77   : x41  010  1  1  : x51  011  1  1
-	## TLB(3): 3    1 01   : x61  011  1  1  : x71  111  1  1
-
 	.include "cMIPS.s"
 
 	.set MMU_CAPACITY, 8
-	.set MMU_WIRED,    1  ### do not change mapping for base of ROM
-
-	.set entryHi_0,  0x00010000 #                 pfn0  zzcc cdvg
-        .set entryLo0_0, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
-        .set entryLo1_0, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
-
-        .set entryHi_1,  0x000020ff #                 pfn0  zzcc cdvg
-        .set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
-        .set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
+	.set MMU_WIRED,    2  ### do not change mapping for base of ROM
+	                      ###   nor I/O addresses
 
-        .set entryHi_2,  0x00004077 #                 pfn0  zzcc cdvg
-        .set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
-        .set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
-
-        .set entryHi_3,  0x00006001 #                 pfn0  zzcc cdvg
-        .set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
-        .set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
+	# New entries cannot overwrite tlb[0.1] which maps base of ROM + I/O
+	
+	# EntryHi cannot have an ASID different from zero, otw TLB misses
+	.set entryHi_1,  0x00012000 #                 pfn0  zzcc cdvg
+	.set entryLo0_1, 0x0000091b #  x0 x0 x0 x0 x0 1001  0001 1011 x91b
+	.set entryLo1_1, 0x00000c1b #  x0 x0 x0 x0 x0 1100  0001 1011 xc1b
+
+	.set entryHi_2,  0x00014000 #                 pfn0  zzcc cdvg
+	.set entryLo0_2, 0x00001016 #  x0 x0 x0 x0 x1 0000  0001 0110 x1016
+	.set entryLo1_2, 0x0000141e #  x0 x0 x0 x0 x1 0100  0001 1110 x141e
+
+	.set entryHi_3,  0x00016000 #                 pfn0  zzcc cdvg
+	.set entryLo0_3, 0x0000191f #  x0 x0 x0 x0 x1 1001  0001 1111 x191f
+	.set entryLo1_3, 0x00001d3f #  x0 x0 x0 x0 x1 1101  0011 1111 x1d3f
+
+	.set entryHi_4,  0x00018000 #                 pfn0  zzcc cdvg
+	.set entryLo0_4, 0x00000012 #  x0 x0 x0 x0 x0 0000  0001 0010 x12
+	.set entryLo1_4, 0x00000412 #  x0 x0 x0 x0 x0 0100  0001 0010 x412
 	
 	# initialize TLB with these
-	.set entryHi_i,  0x00ffffff #                    pfn0  cc cdvg
+	.set entryHi_i,  0x00ffff00 #                    pfn0  cc cdvg
 	.set entryLo0_i, 0x0fff185f #  x0 x0 x0 xf xf 11 11 11 00 0000 xfffc0
 	.set entryLo1_i, 0x0fff1c7f #  x0 xf xf xf xf 11 11 11 00 0000 xfffc0
 
@@ -49,14 +47,16 @@
 	.ent _start
 _start:	la   $31, x_IO_BASE_ADDR
 
+        li   $2, cop0_STATUS_reset
+        addi $2, $2, -2
+        mtc0 $2, cop0_STATUS ### make sure CPU is not at exception level
+	
 	li   $2, MMU_WIRED
 	mtc0 $2, cop0_Wired  ### make sure all but 0'th TLB entries are usable
+	
 
 	##
 	## Initialize TLB with entries that will not match in tests below
-	## NOTE: this is strictly forbidden as all entries are equal
-	##       we only do this while testing the TLB
-	##       NEVER do this in normal usage
 	##
 	la   $2, entryHi_i
 	mtc0 $2, cop0_EntryHi
@@ -65,41 +65,69 @@ _start:	la   $31, x_IO_BASE_ADDR
 	la   $4, entryLo1_i
 	mtc0 $4, cop0_EntryLo1
 
-	### do not change mapping for base of ROM at tlb(0)
-	li   $5, 1
-	mtc0 $5, cop0_Index
-	tlbwi
+	### do not change mapping for base of ROM at tlb[0] not tlb[1]
 	li   $5, 2
 	mtc0 $5, cop0_Index
 	tlbwi
+
+	addi $2, $2, 0x4000  # increase VPN2
 	li   $5, 3
 	mtc0 $5, cop0_Index
 	tlbwi
+
+	addi $2, $2, 0x4000  # increase VPN2
 	li   $5, 4
 	mtc0 $5, cop0_Index
 	tlbwi
+
+	addi $2, $2, 0x4000  # increase VPN2
 	li   $5, 5
 	mtc0 $5, cop0_Index
 	tlbwi
+
+	addi $2, $2, 0x4000  # increase VPN2
 	li   $5, 6
 	mtc0 $5, cop0_Index
 	tlbwi
+
+	addi $2, $2, 0x4000  # increase VPN2
 	li   $5, 7
 	mtc0 $5, cop0_Index
 	tlbwi
 	
 
 	mfc0 $19, cop0_Random    # check for randomness
-	sw   $19, 0($31)
-	mfc0 $19, cop0_Random    # check for randomness
-	sw   $19, 0($31)
+	mfc0 $20, cop0_Random    # check for randomness	
+	mfc0 $21, cop0_Random    # check for randomness
+	mfc0 $22, cop0_Random    # check for randomness
+	
+	beq $19, $20, error4
 	nop
-	mfc0 $19, cop0_Random    # check for randomness
-	sw   $19, 0($31)
-	mfc0 $19, cop0_Random    # check for randomness
-	sw   $19, 0($31)
-	li   $30, '\n'
-	sw   $30, x_IO_ADDR_RANGE($31)
+	beq $19, $21, error4
+	nop
+	beq $19, $22, error4
+	nop
+	beq $20, $21, error4
+	nop
+	beq $20, $22, error4
+	nop
+	beq $21, $22, error4
+	nop
+
+        li $30, 'o'
+        sw $30, x_IO_ADDR_RANGE($31)
+        li $30, 'k'
+        j  next4
+        sw $30, x_IO_ADDR_RANGE($31)
+
+error4: li $30, 'e'
+        sw $30, x_IO_ADDR_RANGE($31)
+        li $30, 'r'
+        sw $30, x_IO_ADDR_RANGE($31)
+        sw $30, x_IO_ADDR_RANGE($31)
+
+next4:  li $30, '\n'
+        sw $30, x_IO_ADDR_RANGE($31)
 
 
 	## write to a random location
@@ -121,11 +149,11 @@ _start:	la   $31, x_IO_BASE_ADDR
 	## check first record was written
 	## make sure it will miss by probing for Entry_0
 	##
-	la   $5, entryHi_0
+	la   $5, entryHi_4
 	mtc0 $5, cop0_EntryHi
-	la   $5, entryLo0_0
+	la   $5, entryLo0_4
 	mtc0 $5, cop0_EntryLo0
-	la   $5, entryLo1_0
+	la   $5, entryLo1_4
 	mtc0 $5, cop0_EntryLo1
 
 	nop
@@ -149,10 +177,6 @@ miss3:	li   $30, 'm'
 
 hit3:	li   $30, 'h'
 	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
@@ -211,11 +235,6 @@ miss2:	li   $30, 'm'
 	
 
 hit2:	li   $30, 'h'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
@@ -239,7 +258,7 @@ next1:	li   $30, '\n'
 	tlbp
 	
 	mfc0 $19, cop0_Index    # check for bit31=1
-	sw   $19, 0($31)
+	# sw   $19, 0($31)
 
 	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
 	beq  $20, $zero, hit1
@@ -252,11 +271,6 @@ miss1:	li   $30, 'm'
 	j    next0
 
 hit1:	li   $30, 'h'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
@@ -287,7 +301,7 @@ next0:	li   $30, '\n'
 	tlbp
 	
 	mfc0 $19, cop0_Index    # check for bit31=1
-	sw   $19, 0($31)
+	# sw   $19, 0($31)
 
 	slt  $20, $19, $zero    # $20 <- (bit31 = 1)
 	beq  $20, $zero, hit0
@@ -300,11 +314,6 @@ miss0:	li   $30, 'm'
 	j    done
 
 hit0:	li   $30, 'h'
-	sw   $30, x_IO_ADDR_RANGE($31)
-	li   $30, '='
-	sw   $30, x_IO_ADDR_RANGE($31)
-	andi $30, $19, (MMU_CAPACITY - 1)
-	addi $30, $30, '0'
 	sw   $30, x_IO_ADDR_RANGE($31)
 	li   $30, '\n'
 	sw   $30, x_IO_ADDR_RANGE($31)
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index eb2a752..ef183f3 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -117,8 +117,6 @@ architecture rtl of core is
          WB_can_trap:   out std_logic_vector;
          MM_excp_type:  in  exception_type;
          WB_excp_type:  out exception_type;
-         MM_PC:         in  std_logic_vector;
-         WB_PC:         out std_logic_vector;
          MM_cop0_LLbit: in  std_logic;
          WB_cop0_LLbit: out std_logic;
          MM_abort:      in  boolean;
@@ -139,7 +137,7 @@ architecture rtl of core is
   signal IF_excp_type,RF_excp_type,EX_excp_type,WB_excp_type: exception_type := exNOP;
   signal MM_excp_type, MM_excp_type_i, TLB_excp_type : exception_type;
   signal trap_instr,EX_trap_instr: instr_type;
-  signal RF_PC,EX_PC,MM_PC,WB_PC, LLaddr: reg32;
+  signal RF_PC,EX_PC,MM_PC, LLaddr: reg32;
   signal EX_LLbit,MM_LLbit,WB_LLbit: std_logic;
   signal LL_update,LL_SC_abort,LL_SC_differ,EX_trapped,MM_ex_trapped: std_logic;
   signal int_req, EX_int_req: reg8;
@@ -159,7 +157,7 @@ architecture rtl of core is
   signal count_eq_compare,count_update,count_enable : std_logic;
   signal exception,EX_exception,is_exception : exception_type := exNOP;
   signal ExcCode : reg5 := cop0code_NULL;
-  signal exception_num, exception_dec : integer;       -- for debugging only
+  signal exception_num,exception_dec,TLB_excp_num : integer; -- for debugging only
   signal next_instr_in_delay_slot,EX_is_delayslot : std_logic;
   signal cop0_sel, EX_cop0_sel, epc_source : reg3;
   signal cop0_reg,EX_cop0_reg : reg5;
@@ -184,8 +182,8 @@ architecture rtl of core is
   signal hit0_pc, hit1_pc, hit2_pc, hit3_pc, hit_pc : boolean;
   signal hit4_pc, hit5_pc, hit6_pc, hit7_pc : boolean;
   signal hit0_mm, hit1_mm, hit2_mm, hit3_mm, hit_mm : boolean;
-  signal hit4_mm, hit5_mm, hit6_mm, hit7_mm : boolean;
-  signal tlb_miss, tlb_miss_IF, tlb_miss_MM, tlb_exception : boolean;
+  signal hit4_mm, hit5_mm, hit6_mm, hit7_mm: boolean;
+  signal tlb_miss, tlb_exception, tlb_stage_MM, addrErr_stage_mm : boolean;
   signal hit_mm_v, hit_mm_d, hit_pc_v : std_logic;
   signal tlb_adr_mm : MMU_idx_bits;
   signal tlb_probe, probe_hit, hit_mm_bit : std_logic;
@@ -195,7 +193,7 @@ architecture rtl of core is
   signal tlb_a0_mm,tlb_a1_mm,tlb_a2_mm : natural range 0 to (MMU_CAPACITY-1);
   signal tlb_ppn_pc0,tlb_ppn_pc1 : mmu_dat_reg;
   signal tlb_ppn_mm0,tlb_ppn_mm1 : mmu_dat_reg;
-  signal tlb_ppn_mm, tlb_ppn_pc, tlb_ppn : std_logic_vector(PPN_BITS - 1 downto 0);
+  signal tlb_ppn_mm, tlb_ppn_pc  : std_logic_vector(PPN_BITS - 1 downto 0);
   
   signal tlb_tag0, tlb_tag1, tlb_tag2, tlb_tag3, tlb_tag_inp : reg32;
   signal tlb_tag4, tlb_tag5, tlb_tag6, tlb_tag7, e_hi, e_hi_inp : reg32;
@@ -333,7 +331,7 @@ architecture rtl of core is
   signal EX_postn, shamt,EX_shamt: reg5;
   signal regs_A,EX_A,MM_A,WB_A, regs_B,EX_B,MM_B:reg32;
   signal displ32,EX_displ32: reg32;
-  signal result,MM_result,WB_result,WB_C: reg32;
+  signal result,MM_result,WB_result,WB_C, EX_addr,MM_addr: reg32;
   signal pc_p8,EX_pc_p8,MM_pc_p8,WB_pc_p8 : reg32;
   signal HI,MM_HI,WB_HI, LO,MM_LO,WB_LO : reg32;
 
@@ -406,6 +404,8 @@ architecture rtl of core is
          MM_B:       out std_logic_vector;
          EX_result:  in  std_logic_vector;
          MM_result:  out std_logic_vector;
+         EX_addr:    in  std_logic_vector;
+         MM_addr:    out std_logic_vector;
          HI:         in  std_logic_vector;
          MM_HI:      out std_logic_vector;
          LO:         in  std_logic_vector;
@@ -701,7 +701,7 @@ begin
 
   -- iaVal <= '1' when ((phi0 = '1' and if_stalled = '0')) else '0';
   
-  i_aVal <= '0'; -- interface signal/port, always fetch new instruction
+  i_aVal <= '0'; -- interface signal/port, always fetches a new instruction
   iaVal  <= '0'; -- internal signal
   
   rom_stall <= not(iaVal) and not(i_wait);
@@ -771,12 +771,10 @@ begin
 
 
   -- uncomment this when NOT making use of the TLB
-  i_addr <= PC_aligned;    -- fetch instruction from aligned address
+  -- i_addr <= PC_aligned;    -- fetch instruction from aligned address
 
   -- uncomment this when making use of the TLB
-  -- i_addr <= phy_i_addr;
-  
-  abort <= '1' when addrError else '0';
+  i_addr <= phy_i_addr;
   
   instr_fetched <= instr when (nullify = '0' and abort = '0'
                                and PC(1 downto 0) = b"00") else
@@ -844,7 +842,7 @@ begin
       assert not(exception = exWAIT and syscall_n /= x"80000")
         report LF & "INVALID REFERENCE at PC="& SLV32HEX(EPC) &
         " opc="& SLV2STR(opcode) & " fun=" & SLV2STR(func) &
-        " cause(6..2)=" & SLV2STR(RF_instruction(10 downto 6)) & 
+        " instr=" & SLV32HEX(RF_instruction) & 
         LF & "SIMULATION ABORTED AT EXCEPTION HANDLER;"
         severity failure;
 
@@ -940,10 +938,14 @@ begin
       if (is_branch = '1') then
         br_stall <= '1';
       end if;
-      eq_fwd_A <= regs_A;
     elsif ((MM_wreg = '0') and (MM_a_c = a_rs) and (MM_a_c /= b"00000")
            and (MM_aVal = '1')) then    -- non-LW
-      eq_fwd_A <= MM_result;
+      if MM_mfc0 /= '1' then
+        eq_fwd_A <= MM_result;
+      else
+        eq_fwd_A <= MM_cop0_val;
+      end if;
+      -- eq_fwd_A <= MM_result;
     else
       eq_fwd_A <= regs_A;
     end if;
@@ -960,7 +962,12 @@ begin
       eq_fwd_B <= regs_B;
     elsif ((MM_wreg = '0') and (MM_a_c = a_rt) and (MM_a_c /= b"00000")
            and (MM_aVal = '1')) then    -- non-LW
-      eq_fwd_B <= MM_result;
+      if MM_mfc0 /= '1' then
+        eq_fwd_B <= MM_result;
+      else
+        eq_fwd_B <= MM_cop0_val;
+      end if;
+      -- eq_fwd_B <= MM_result;
     else
       eq_fwd_B <= regs_B;
     end if;
@@ -1152,7 +1159,7 @@ begin
         case opcode is
           when b"110000" => i_exception := exLL;  -- not REALLY exceptions
           when b"111000" => i_exception := exSC;
-          when b"111111" =>
+         -- when b"111111" =>
          --    if addrError then
          --      i_exception := MM_excp_type;
          --    else
@@ -1284,13 +1291,17 @@ begin
                   or nullify_EX         -- abort ref if previous excep in EX
                   or abort;             -- abort ref if exception in MEM
 
+  abort <= '1' when (addrError or (tlb_exception and tlb_stage_mm)) else '0';
+  -- abort <= '1' when (addrError) else '0';
 
+  
   -- this adder performs address calculation so the TLB can be checked during
-  --   EX so we may signal an exception as early as possible
+  --   EX and thus signal an exception as early as possible
   U_VIR_ADDR_ADD: mf_alt_adder port map (alu_inp_A, EX_displ32, v_addr);
   
 
   U_EX_ADDR_ERR_EXCP: process(EX_mem_t,EX_aVal,EX_wrmem, v_addr)
+    variable i_stage_mm : boolean;
   begin
 
     case EX_mem_t(1 downto 0) is  -- xx,by,hf,wd
@@ -1302,10 +1313,12 @@ begin
           else
             MM_excp_type <= MMaddressErrorST;
           end if;
-          addrError <= TRUE;
+          addrError  <= TRUE;
+          i_stage_mm := TRUE;
         else
           MM_excp_type <= exNOP;
-          addrError     <= FALSE;
+          addrError    <= FALSE;
+          i_stage_mm   := FALSE;
         end if;
 
       when b"10" =>                        -- LH*, SH
@@ -1315,17 +1328,21 @@ begin
           else
             MM_excp_type <= MMaddressErrorST;
           end if;
-          addrError       <= TRUE;
+          addrError  <= TRUE;
+          i_stage_mm := TRUE;
         else
           MM_excp_type <= exNOP;
-          addrError     <= FALSE;
+          addrError    <= FALSE;
+          i_stage_mm   := FALSE;
         end if;
         
       when others =>                      -- LB*, SB
         MM_excp_type <= exNOP;
-        addrError     <= FALSE;
-                     
+        addrError    <= FALSE;
+        i_stage_mm   := FALSE;
     end case;
+
+    addrErr_stage_mm <= i_stage_mm;
     
     -- assert MM_excp_type = exNOP  -- DEBUG
     --   report "SIMULATION ERROR -- data addressing error: " &
@@ -1335,8 +1352,10 @@ begin
 
   end process U_EX_ADDR_ERR_EXCP; ----------------------------------
 
+  EX_addr <= phy_d_addr;                 -- with TLB  
 
-
+  assert true or ( (phy_d_addr = v_addr) and (EX_aVal = '0') )  -- DEBUG
+    report "mapping mismatch V:P "& SLV32HEX(v_addr) &":"& SLV32HEX(phy_d_addr);
   
   
   -- ----------------------------------------------------------------------
@@ -1346,7 +1365,8 @@ begin
               EX_muxC,MM_muxC, EX_aVal_cond,MM_aVal, EX_wrmem_cond,MM_wrmem,
               EX_mem_t,MM_mem_t,
               EX_A,MM_A, alu_fwd_B,MM_B,
-              result,MM_result, HI,MM_HI, LO,MM_LO,
+              result,MM_result, EX_addr,MM_addr,
+              HI,MM_HI, LO,MM_LO,
               alu_move_ok,MM_alu_move_ok, EX_move,MM_move,
               EX_pc_p8,MM_pc_p8);
 
@@ -1370,9 +1390,8 @@ begin
   
   d_addr <= d_addr_pre;  -- without TLB
 
-  -- d_addr <= phy_d_addr;                 -- with TLB
-  
-  MM_MEM_INTERFACE: process(MM_mem_t,MM_aVal,MM_wrmem, MM_result, rd_data_raw)
+
+  MM_MEM_INTERFACE: process(MM_mem_t,MM_aVal,MM_wrmem, MM_addr, rd_data_raw)
     variable i_d_addr : reg32;
     variable bytes_read : reg32;
     variable i_byte_sel : reg4;
@@ -1388,11 +1407,11 @@ begin
       when b"11" =>
         i_byte_sel := b"1111";              -- LW, SW, LWL, LWR
         bytes_read := rd_data_raw;
-        i_d_addr   := MM_result(31 downto 2) & b"00";   -- align reference
+        i_d_addr   := MM_addr(31 downto 2) & b"00";   -- align reference
         
       when b"10" =>
-        i_d_addr     := MM_result(31 downto 1) & '0' ;    -- align reference
-        if MM_result(1) = '0' then                      -- LH*, SH
+        i_d_addr     := MM_addr(31 downto 1) & '0' ;    -- align reference
+        if MM_addr(1) = '0' then                      -- LH*, SH
           i_byte_sel := b"0011";
           i_half     := rd_data_raw(15 downto 0);
         else
@@ -1406,8 +1425,8 @@ begin
         end if;
 
       when b"01" =>                                     -- LB*, SB
-        i_d_addr := MM_result;
-        case MM_result(1 downto 0) is
+        i_d_addr := MM_addr;
+        case MM_addr(1 downto 0) is
           when b"00"  => i_byte_sel := b"0001";
                          i_byte     := rd_data_raw(7  downto  0);
           when b"01"  => i_byte_sel := b"0010";
@@ -1424,7 +1443,7 @@ begin
         end if;
         
       when others =>
-        i_d_addr   := "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";  -- MM_result;
+        i_d_addr   := "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";  -- MM_addr;
         i_byte_sel := b"0000";
         bytes_read := (others => 'X');
 
@@ -1655,17 +1674,19 @@ begin
 
 
  
-  is_exception <=  -- TLB_excp_type when tlb_exception else
+  is_exception <=  TLB_excp_type when tlb_exception else
                    MM_excp_type   when addrError           else
                    exOvfl         when MM_ex_trapped = '1' else
                    IFaddressError when EX_PC_abort         else
                    EX_exception;
-  
+
+  exception_num <= exception_type'pos(is_exception); -- for debugging only
   
   COP0_DECODE_EXCEPTION_AND_UPDATE_STATUS:
   process (EX_a_rt, is_exception, EX_trap_instr, 
            EX_cop0_reg, EX_cop0_sel, EX_nmi, EX_interrupt,EX_int_req,
-           EX_is_delayslot, cop0_inp, EX_tr_is_equal, EX_tr_less_than,
+           next_instr_in_delay_slot, EX_is_delayslot,
+           cop0_inp, EX_tr_is_equal, EX_tr_less_than,
            INDEX, RANDOM, EntryLo0, EntryLo1, CONTEXT, PAGEMASK, WIRED,
            EntryHi, COUNT, COMPARE, STATUS, CAUSE, EPC, BadVAddr,
            rom_stall,ram_stall)
@@ -1678,8 +1699,6 @@ begin
 
   begin
 
-    exception_num <= exception_type'pos(is_exception); -- for debugging only
-
     newSTATUS    := STATUS;      
     i_epc_update := '1';
     i_epc_source := b"000";
@@ -1697,7 +1716,6 @@ begin
     interrupt_taken <= '0';
     trap_taken      <= '0';
     ExcCode         <= cop0code_NULL;
-    -- BadVAddr_update <= '1';
     EX_mfc0         <= '0';
 
     newSTATUS             := STATUS;    -- preserve as needed
@@ -1844,6 +1862,7 @@ begin
 
       when exOvfl =>                    -- OVERFLOW happened one cycle earlier
         newSTATUS(STATUS_EXL) := '1';   -- at exception level
+        newSTATUS(STATUS_IE)  := '0';   -- disable interrupts
         exception_taken <= '1';
         i_update        := '1';
         i_update_r      := cop0reg_STATUS;
@@ -1858,12 +1877,12 @@ begin
       when IFaddressError | MMaddressErrorLD | MMaddressErrorST =>
         -- fetch/load/store from UNALIGNED ADDRESS
         newSTATUS(STATUS_EXL) := '1';   -- at exception level
+        newSTATUS(STATUS_IE)  := '0';   -- disable interrupts
         exception_taken <= '1';
         i_update        := '1';
         i_update_r      := cop0reg_STATUS;
         i_epc_update    := '0';
         i_excp_PCsel    := PCsel_EXC_0180; -- PC <= exception_0180
-        -- BadVAddr_update <= '0';
         if is_exception = MMaddressErrorST then
           ExcCode <= cop0code_AdES;
         else
@@ -1871,11 +1890,9 @@ begin
         end if;
         if is_exception = IFaddressError then
           i_nullify       := '1';       -- nullify instructions in IF,RF
-          i_epc_source    := b"010";    -- bad address is in EXCP_EX_PC
-        else
-          i_epc_source    := b"010";    -- bad address is in EXCP_EX_PC
         end if;
-
+        i_epc_source    := b"010";      -- bad address is in EXCP_EX_PC
+        
       when exEHB =>                     -- stall processor to clear hazards
         i_stall    := '1';
 
@@ -1884,21 +1901,75 @@ begin
         i_stall := '1';                 -- stall the processor
         
 
-      when exTLBrefill | exTLBrefillWR =>
+      when exTLBrefillIF | exTLBrefillRD | exTLBrefillWR =>
 
         case is_exception is
-          when exTLBrefillWR => ExcCode <= cop0code_TLBS;
-          when exTLBrefill   => ExcCode <= cop0code_TLBL;
+          when exTLBrefillIF =>
+            ExcCode <= cop0code_TLBL;
+            if next_instr_in_delay_slot = '1' then   -- instr is in delay slot
+              i_epc_source := b"001";       -- RF_PC, re-execute branch/jump
+            else
+              i_epc_source := b"000";       -- PC
+            end if;
+          when exTLBrefillRD =>
+            ExcCode <= cop0code_TLBL;
+            if EX_is_delayslot = '1' then   -- instr is in delay slot
+              i_epc_source := b"010";       -- EX_PC, re-execute branch/jump
+            else
+              i_epc_source := b"001";       -- RF_PC
+            end if;
+          when exTLBrefillWR =>
+            ExcCode <= cop0code_TLBS;
+            if EX_is_delayslot = '1' then   -- instr is in delay slot
+              i_epc_source := b"010";       -- EX_PC, re-execute branch/jump
+            else
+              i_epc_source := b"001";       -- RF_PC
+            end if;
           when others => null;
         end case;
-        if EX_is_delayslot = '1' then -- instr is in delay slot
-          i_epc_source := b"010";     -- EX_PC, re-execute branch/jump
-        else
-          i_epc_source := b"001";     -- RF_PC
-        end if;
-        i_excp_PCsel := PCsel_EXC_0000; -- PC <= exception_0000
-        -- BadVAddr_update <= '0';        
+        newSTATUS(STATUS_EXL) := '1';       -- at exception level
+        newSTATUS(STATUS_IE)  := '0';       -- disable interrupts
+        i_excp_PCsel := PCsel_EXC_0000;     -- PC <= exception_0000
+        i_epc_update := '0';
+
 
+      when exTLBdblFaultIF | exTLBdblFaultRD | exTLBdblFaultWR | 
+           exTLBinvalIF | exTLBinvalRD | exTLBinvalWR | exTLBmod =>
+        case is_exception is
+          when exTLBinvalIF | exTLBdblFaultIF =>
+            ExcCode <= cop0code_TLBL;
+            if next_instr_in_delay_slot = '1' then   -- instr is in delay slot
+              i_epc_source := b"001";       -- RF_PC, re-execute branch/jump
+            else
+              i_epc_source := b"000";       -- PC
+            end if;
+          when exTLBinvalRD | exTLBdblFaultRD =>
+            ExcCode <= cop0code_TLBL;
+            if EX_is_delayslot = '1' then   -- instr is in delay slot
+              i_epc_source := b"010";       -- EX_PC, re-execute branch/jump
+            else
+              i_epc_source := b"001";       -- RF_PC
+            end if;
+          when exTLBinvalWR | exTLBdblFaultWR =>
+            ExcCode <= cop0code_TLBS;
+            if EX_is_delayslot = '1' then   -- instr is in delay slot
+              i_epc_source := b"010";       -- EX_PC, re-execute branch/jump
+            else
+              i_epc_source := b"001";       -- RF_PC
+            end if;
+          when exTLBmod =>
+            ExcCode <= cop0code_Mod;
+            if EX_is_delayslot = '1' then   -- instr is in delay slot
+              i_epc_source := b"010";       -- EX_PC, re-execute branch/jump
+            else
+              i_epc_source := b"001";       -- RF_PC
+            end if;
+          when others => null;
+        end case;
+        newSTATUS(STATUS_EXL) := '1';       -- at exception level
+        newSTATUS(STATUS_IE)  := '0';       -- disable interrupts
+        i_excp_PCsel := PCsel_EXC_0180;     -- PC <= exception_0180
+        i_epc_update := '0';
 
         
           
@@ -2109,10 +2180,11 @@ begin
 
   
   -- BadVAddr -- pg 74 ---------------------------
-
-  BadVAddr_inp <= v_addr when addrError or tlb_miss_mm else  -- D-TLB | misaligned
-                  EX_PC  when EX_PC_abort              else  -- fetch misaligned
-                  PC;                                        -- I-TLB
+                  -- Dtlb | misaligned
+  BadVAddr_inp <= v_addr when ( (addrError and addrErr_stage_mm) or
+                                (tlb_exception and tlb_stage_mm) ) else
+                  EX_PC  when EX_PC_abort                else -- fetch misaligned
+                  PC;                                         -- I-TLB
 
   BadVAddr_update <= '0' when tlb_exception or addrError else '1';
   
@@ -2240,13 +2312,12 @@ begin
   context_upd_pte <= '0' when (update = '1' and update_reg = cop0reg_Context)
                      else '1';
 
-  context_upd_bad <= '0' when tlb_exception else '1';
-  
-  tlb_context_inp <= tlb_excp_VA;
-  
-  MMU_ContextPTE: registerN generic map(9, b"000000000")
+  MMU_ContextPTE: registerN generic map(9, ContextPTE_init)
     port map (clk, rst, context_upd_pte,
               cop0_inp(31 downto 23), Context(31 downto 23));
+
+  context_upd_bad <= '0' when tlb_exception else '1';
+  tlb_context_inp <= tlb_excp_VA;
   
   MMU_ContextBAD: registerN generic map(19, b"0000000000000000000")
     port map (clk, rst, context_upd_bad, tlb_context_inp, Context(22 downto 4));
@@ -2275,8 +2346,8 @@ begin
                                or ( tlb_exception ) )
                   else not(tlb_read);
   
-  entryHi_inp <= cop0_inp when tlb_read = '0' else
-                 tlb_excp_VA & EHI_ZEROS & EntryHi(EHI_ASIDHI_BIT downto EHI_ASIDLO_BIT) when tlb_exception else
+  entryHi_inp <= tlb_excp_VA & EHI_ZEROS & EntryHi(EHI_G_BIT) & EntryHi(EHI_ASIDHI_BIT downto EHI_ASIDLO_BIT) when tlb_exception else
+                 cop0_inp when tlb_read = '0' else
                  tlb_entryhi;
   
   MMU_EntryHi: register32 generic map(x"00000000")
@@ -2284,37 +2355,73 @@ begin
 
  
   -- -- pg 41 ----------------------------------
-  MMU_exceptions: process(EX_wrmem, tlb_miss,
+  MMU_exceptions: process(EX_wrmem, EX_aVal, tlb_miss, hit_mm, hit_pc,
                           hit_mm_v, hit_mm_d, hit_pc_v, STATUS)
+    variable i_stage_mm : boolean;
   begin
 
-    TLB_excp_type <= exNOP;
+    -- check first for events down in the pipeline: LOADS and STORES
 
-    -- first check for events later in the pipeline: LOADS and STORES
-    
-    if tlb_miss then            -- miss, check for TLBrefill or TLBdoubleFault
-      if STATUS(STATUS_EXL) = '1' then
-        TLB_excp_type <= exTLBdblFault;
-      elsif EX_wrmem = '0' then
-        TLB_excp_type <= exTLBrefillWR;
+    if tlb_miss then
+
+      if not(hit_mm) and EX_wrmem = '0' then
+        if STATUS(STATUS_EXL) = '1' then
+          TLB_excp_type <= exTLBdblFaultWR;
+        else
+          TLB_excp_type <= exTLBrefillWR;
+        end if;
+      elsif not(hit_mm) then
+        if STATUS(STATUS_EXL) = '1' then
+          TLB_excp_type <= exTLBdblFaultRD;
+        else
+          TLB_excp_type <= exTLBrefillRD;
+        end if;
+      elsif not(hit_pc) then
+        if STATUS(STATUS_EXL) = '1' then
+          TLB_excp_type <= exTLBdblFaultIF;
+        else
+          TLB_excp_type <= exTLBrefillIF;
+        end if;
+      else
+        TLB_excp_type <= exNOP;
+      end if;
+
+      if not(hit_mm) then
+        i_stage_mm := TRUE;
+      else
+        i_stage_mm := FALSE;
+      end if;
+      
+    elsif hit_mm then
+      if (EX_aVal = '0' and hit_mm_v = '0') then      -- check for TLBinvalid
+        if EX_aVal = '0' then
+          TLB_excp_type <= exTLBinvalWR;
+        else
+          TLB_excp_type <= exTLBinvalRD;
+        end if;
+      elsif (EX_wrmem = '0' and hit_mm_d = '0') then  -- check for TLBmodified
+        TLB_excp_type <= exTLBmod;
       else
-        TLB_excp_type <= exTLBrefill;
+        TLB_excp_type <= exNOP;
       end if;
-    elsif hit_mm_v = '0' then  -- hit; check for TLBinvalid
-      TLB_excp_type <= exTLBinval;
-    elsif EX_wrmem = '0' and hit_mm_d = '0' then  -- hit; check for TLBmodified
-      TLB_excp_type <= exTLBmod;
-    elsif hit_pc_v = '0' then  -- hit; check for TLBinvalid
-      TLB_excp_type <= exTLBinval;
+      i_stage_mm := TRUE;
+      
+    elsif (hit_pc and hit_pc_v = '0' and iaVal = '0') then -- check for TLBinvalid
+
+      TLB_excp_type <= exTLBinvalIF;
+      i_stage_mm := FALSE;
+
     else
       TLB_excp_type <= exNOP;
     end if;
 
-    tlb_exception <= FALSE ; -- (TLB_excp_type /= exNOP);
-
+    tlb_stage_MM <= i_stage_mm;
+    
   end process MMU_exceptions; -- -----------------------------------------
 
+  tlb_exception <= (TLB_excp_type /= exNOP);
 
+  TLB_excp_num  <= exception_type'pos(TLB_excp_type); -- for debugging only
   
   
   -- MMU TLB TAG-DATA array -- pg 17 ------------------------------------
@@ -2445,9 +2552,9 @@ begin
   tlb_entryLo1(ELO_G_BIT) <= e_lo1(DAT_G_BIT);
 
 
-  e_hi_inp <= EntryHi;
-  e_hi_inp(TAG_G_BIT) <= EntryLo0(ELO_G_BIT) and EntryLo1(ELO_G_BIT);  -- pg64
-  e_hi_inp(TAG_Z_BIT) <= '0';
+  e_hi_inp <= EntryHi(EHI_AHI_BIT downto EHI_ALO_BIT) & EHI_ZEROS &
+              (EntryLo0(ELO_G_BIT) and EntryLo1(ELO_G_BIT)) &
+              EntryHi(EHI_ASIDHI_BIT downto EHI_ASIDLO_BIT);  -- pg64
 
   tlb_tag_inp <= e_hi_inp;
 
@@ -2462,16 +2569,15 @@ begin
   mm <= entryHi(EHI_AHI_BIT downto EHI_ALO_BIT) when tlb_probe = '1' else
         v_addr(VA_HI_BIT downto VA_LO_BIT);
 
-  tlb_miss_MM <= not(hit_mm) and (EX_mem_t /= b"0000");
-  tlb_miss_IF <= not(hit_pc);
-
-  tlb_miss    <=  tlb_miss_IF or tlb_miss_MM;
+  tlb_miss <= ( (not(hit_pc) and (iAval = '0')) or
+                (not(hit_mm) and ((EX_mem_t /= b"0000") and (EX_aval = '0'))) );
   
-  tlb_excp_VA <= v_addr(VA_HI_BIT downto VA_LO_BIT) when tlb_miss_MM else
+  tlb_excp_VA <= v_addr(VA_HI_BIT downto VA_LO_BIT) when tlb_stage_MM else
                  PC(VA_HI_BIT downto VA_LO_BIT);
 
 
--- TLB entry 0 -- initialized to 1st page of ROM
+  -- TLB entry 0 -- initialized to 1st page of ROM
+  --   this mapping must be pinned down at all times (Wired >= 2, see next entry)
   
   MMU_TAG0: register32 generic map(MMU_ini_tag_ROM0)
     port map (clk, rst, tlb_tag0_updt, tlb_tag_inp, tlb_tag0);
@@ -2491,15 +2597,17 @@ begin
                               tlb_tag0(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
              else FALSE;
 
+  
 
-  -- TLB entry 1 -- initialized to 1st page of ROM
+  -- TLB entry 1 -- initialized to page with I/O devices
+  --   this mapping must be pinned down at all times (Wired >= 2)
 
-  MMU_TAG1: register32 generic map(MMU_ini_tag_ROM2)
+  MMU_TAG1: register32 generic map(MMU_ini_tag_IO)
     port map (clk, rst, tlb_tag1_updt, tlb_tag_inp, tlb_tag1);
 
-  MMU_DAT1_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM2)  -- d=1,v=1,g=1
+  MMU_DAT1_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO0)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat1_updt, tlb_dat0_inp, tlb_dat1_0);
-  MMU_DAT1_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM3)  -- d=1,v=1,g=1
+  MMU_DAT1_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO1)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat1_updt, tlb_dat1_inp, tlb_dat1_1);
 
   hit1_pc <= TRUE when (tlb_tag1(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
@@ -2623,15 +2731,14 @@ begin
              else FALSE;
 
 
+  -- TLB entry 7 -- initialized to 3rd page of ROM  
   
-  -- TLB entry 7 -- initialized to I/O page
-  
-  MMU_TAG7: register32 generic map(MMU_ini_tag_IO)
+  MMU_TAG7: register32 generic map(MMU_ini_tag_ROM2)
     port map (clk, rst, tlb_tag7_updt, tlb_tag_inp, tlb_tag7);
 
-  MMU_DAT7_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO0)  -- d=1,v=1,g=1
+  MMU_DAT7_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM2)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat7_updt, tlb_dat0_inp, tlb_dat7_0);
-  MMU_DAT7_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO1)  -- d=1,v=1,g=1
+  MMU_DAT7_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM3)  -- d=1,v=1,g=1
     port map (clk, rst, tlb_dat7_updt, tlb_dat1_inp, tlb_dat7_1);
 
   hit7_pc <= TRUE when (tlb_tag7(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
@@ -2720,17 +2827,17 @@ begin
                    tlb_dat6_1 when 6,
                    tlb_dat7_1 when others;
 
-  tlb_ppn_mm <= tlb_ppn_mm0(DAT_AHI_BIT downto DAT_ALO_BIT) when MM_result(PAGE_SZ_BITS) = '0' else
+  tlb_ppn_mm <= tlb_ppn_mm0(DAT_AHI_BIT downto DAT_ALO_BIT) when v_addr(PAGE_SZ_BITS) = '0' else
                 tlb_ppn_mm1(DAT_AHI_BIT downto DAT_ALO_BIT);
   
-  hit_mm_v   <= tlb_ppn_mm0(DAT_V_BIT) when PC(PAGE_SZ_BITS) = '0' else
+  hit_mm_v   <= tlb_ppn_mm0(DAT_V_BIT) when v_addr(PAGE_SZ_BITS) = '0' else
                 tlb_ppn_mm1(DAT_V_BIT);
 
-  hit_mm_d   <= tlb_ppn_mm0(DAT_D_BIT) when PC(PAGE_SZ_BITS) = '0' else
+  hit_mm_d   <= tlb_ppn_mm0(DAT_D_BIT) when v_addr(PAGE_SZ_BITS) = '0' else
                 tlb_ppn_mm1(DAT_D_BIT);
 
   
-  phy_d_addr <= tlb_ppn_mm(PPN_BITS-1 downto 0) & d_addr_pre(PAGE_SZ_BITS-1 downto 0);
+  phy_d_addr <= tlb_ppn_mm(PPN_BITS-1 downto 0) & v_addr(PAGE_SZ_BITS-1 downto 0);
 
   
   -- MMU-TLB == end =======================================================
@@ -2748,7 +2855,7 @@ begin
   -- ----------------------------------------------------------------------    
   PIPESTAGE_EXCP_MM_WB: reg_excp_MM_WB
     port map (clk, rst, excp_MM_WB_ld, MM_can_trap,WB_can_trap,   
-              MM_excp_type, WB_excp_type, MM_PC,WB_PC,
+              MM_excp_type, WB_excp_type,
               MM_LLbit,WB_LLbit, MM_abort,WB_abort,
               MM_cop0_a_c,WB_cop0_a_c, MM_cop0_val,WB_cop0_val);
 
diff --git a/cMIPS/vhdl/exception.vhd b/cMIPS/vhdl/exception.vhd
index e40fd79..86d78a7 100644
--- a/cMIPS/vhdl/exception.vhd
+++ b/cMIPS/vhdl/exception.vhd
@@ -188,8 +188,6 @@ entity reg_excp_MM_WB is
        WB_can_trap:   out reg2;
        MM_excp_type:  in  exception_type;
        WB_excp_type:  out exception_type;
-       MM_PC:         in  reg32;
-       WB_PC:         out reg32;
        MM_cop0_LLbit: in  std_logic;
        WB_cop0_LLbit: out std_logic;
        MM_abort:      in  boolean;
@@ -212,7 +210,6 @@ begin
       if ld = '0' then
         WB_excp_type  <= MM_excp_type  ;
         WB_can_trap   <= MM_can_trap   ;
-        WB_PC         <= MM_PC         ;
         WB_cop0_LLbit <= MM_cop0_LLbit ;
         WB_abort      <= MM_abort      ;
         WB_cop0_a_c   <= MM_cop0_a_c   ;
diff --git a/cMIPS/vhdl/packageExcp.vhd b/cMIPS/vhdl/packageExcp.vhd
index df6a253..85578fe 100644
--- a/cMIPS/vhdl/packageExcp.vhd
+++ b/cMIPS/vhdl/packageExcp.vhd
@@ -29,19 +29,20 @@ package p_EXCEPTION is
                           exBREAK, exTRAP, exSYSCALL,  -- 8
                           exRESV_INSTR, exWAIT,  -- 10
                           IFaddressError, MMaddressErrorLD, MMaddressErrorST, --13
-                          exTLBrefill, exTLBrefillWR,  -- 15
-                          exTLBdblFault, exTLBinval, exTLBmod, -- 18
-                          exOvfl,       -- 19
-                          exLL,exSC,    -- 20,21  these are handled by COP0
-                          exEHB,        -- 22
-                          exTLBP, exTLBR, exTLBWI, exTLBWR,  -- 26
-                          exDERET,  -- 27
+                          exTLBrefillIF, exTLBrefillRD, exTLBrefillWR,  -- 16
+                          exTLBdblFaultIF, exTLBdblFaultRD, exTLBdblFaultWR, -- 19
+                          exTLBinvalIF, exTLBinvalRD, exTLBinvalWR,  -- 22
+                          exTLBmod, exOvfl,  -- 24
+                          exLL,exSC,    -- 25,26  these are handled by COP0
+                          exEHB,        -- 27
+                          exTLBP, exTLBR, exTLBWI, exTLBWR,  -- 31
+                          exDERET,  -- 32
                           invalid_exception);
 
   attribute enum_encoding of exception_type : type is
-    "000000 000001 000010 000011 000100 000101 000110 000111 001000 001001 001010 001011 001100 001101 001110 001111 010000 010001 010010 010011 010100 010101 010110 010111 011000 011001 011010 011011 011100";
+    "000000 000001 000010 000011 000100 000101 000110 000111 001000 001001 001010 001011 001100 001101 001110 001111 010000 010001 010010 010011 010100 010101 010110 010111 011000 011001 011010 011011 011100 011101 011110 011111 100000 100001";
 
---  011101 011110 011111 100000 100001 100010";
+--   100010";
 
 
   
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index 135c8b5..9821eda 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -41,16 +41,16 @@ package p_MEMORY is
   -- begin DO NOT change these names as several scripts depend on them --
   --  you may change the values, not names nor formatting              --
   constant x_INST_BASE_ADDR : reg32   := x"00000000";
-  constant x_INST_MEM_SZ    : reg32   := x"00002000";  
-  constant x_DATA_BASE_ADDR : reg32   := x"00400000";  
-  constant x_DATA_MEM_SZ    : reg32   := x"00002000";
+  constant x_INST_MEM_SZ    : reg32   := x"00004000";  
+  constant x_DATA_BASE_ADDR : reg32   := x"04000000";  
+  constant x_DATA_MEM_SZ    : reg32   := x"00004000";
   constant x_IO_BASE_ADDR   : reg32   := x"0F000000";
   constant x_IO_MEM_SZ      : reg32   := x"00002000";
   constant x_IO_ADDR_RANGE  : reg32   := x"00000020";
   constant x_EXCEPTION_0000 : reg32   := x"00000080";
   constant x_EXCEPTION_0100 : reg32   := x"000000A0";
   constant x_EXCEPTION_0180 : reg32   := x"000000C0";
-  constant x_EXCEPTION_0200 : reg32   := x"00000140";
+  constant x_EXCEPTION_0200 : reg32   := x"00000200";
   constant x_ENTRY_POINT    : reg32   := x"00000300";
   -- end DO NOT change these names --
 
@@ -85,7 +85,7 @@ package p_MEMORY is
     IO_BASE_ADDR + (IO_MAX_NUM_DEVS - 1)*IO_ADDR_RANGE;
 
 
-  -- DATA CACHE parameters ----------------------------------------------
+  -- DATA CACHE parameters ================================================
   
   -- The combination of capacity, associativity and block/line size
   --  MUST be such that DC_INDEX_BITS >= 6 (64 sets/way)
@@ -110,7 +110,7 @@ package p_MEMORY is
     std_logic_vector(to_signed(DC_NUM_WAYS - 1, 3));
 
   
-  -- INSTRUCTION CACHE parameters ---------------------------------------
+  -- INSTRUCTION CACHE parameters =========================================
 
   -- The combination of capacity, associativity and block/line size
   --  MUST be such that IC_INDEX_BITS >= 6 (64 sets/via)
@@ -143,7 +143,7 @@ package p_MEMORY is
   constant icache_Stats_hit   : reg3 := "101";
 
   
-  -- MMU parameters -----------------------------------------------------
+  -- MMU parameters ========================================================
 
   -- constants for CONFIG1 cop0 register (Table 8-24 pg 103)
   constant MMU_CAPACITY : natural := 8;
@@ -165,11 +165,11 @@ package p_MEMORY is
   constant ASID_LO_BIT  : natural := 0;
 
   constant EHI_ASIDLO_BIT : natural := 0;
-  constant EHI_ASIDHI_BIT : natural := 7;  
+  constant EHI_ASIDHI_BIT : natural := 7;
+  constant EHI_G_BIT    : natural := 8;
   constant EHI_ALO_BIT  : natural := PAGE_SZ_BITS + 1;  -- maps 2 phy-pages
   constant EHI_AHI_BIT  : natural := 31;
-  constant EHI_ZEROS    : std_logic_vector(PAGE_SZ_BITS-EHI_AHI_BIT downto 0) :=
-    (others => '0');    
+  constant EHI_ZEROS    : std_logic_vector(PAGE_SZ_BITS-EHI_G_BIT-1 downto 0) := (others => '0');    
   
   constant TAG_ASIDLO_BIT : natural := 0;
   constant TAG_ASIDHI_BIT : natural := 7;
@@ -195,6 +195,8 @@ package p_MEMORY is
   constant DAT_AHI_BIT  : natural := DAT_ALO_BIT + PPN_BITS - 1;
   constant DAT_REG_BITS : natural := DAT_ALO_BIT + PPN_BITS;
 
+  constant ContextPTE_init : reg9 := b"000000000";
+  
   subtype mmu_dat_reg is std_logic_vector (DAT_AHI_BIT downto 0);
   
   subtype  MMU_idx_bits is std_logic_vector(MMU_CAPACITY_BITS-1 downto 0);
@@ -207,6 +209,8 @@ package p_MEMORY is
   constant tag_ones  : std_logic_vector(VABITS-1 downto PAGE_SZ_BITS+1) := (others => '1');
   constant tag_mask  : reg32 := tag_ones & tag_zeros;
 
+
+  -- physical addresses for 8 ROM pages
   
   constant x_ROM_PPN_0 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 0*PAGE_SZ, 32));
   constant x_ROM_PPN_1 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 1*PAGE_SZ, 32));
@@ -242,7 +246,8 @@ package p_MEMORY is
    x_ROM_PPN_7(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
 
 
-
+  -- physical addresses for 8 ROM pages
+  
   constant x_RAM_PPN_0 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 0*PAGE_SZ, 32));
   constant x_RAM_PPN_1 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 1*PAGE_SZ, 32));
   constant x_RAM_PPN_2 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 2*PAGE_SZ, 32));
@@ -277,6 +282,8 @@ package p_MEMORY is
    x_RAM_PPN_7(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
 
 
+  -- physical addresses for 2 pages reserved for I/O devices
+  
   constant x_IO_PPN_0 : reg32 := std_logic_vector(to_signed(IO_BASE_ADDR + 0*PAGE_SZ, 32));
   constant x_IO_PPN_1 : reg32 := std_logic_vector(to_signed(IO_BASE_ADDR + 1*PAGE_SZ, 32));
 
diff --git a/cMIPS/vhdl/pipestages.vhd b/cMIPS/vhdl/pipestages.vhd
index bd4396f..a927519 100644
--- a/cMIPS/vhdl/pipestages.vhd
+++ b/cMIPS/vhdl/pipestages.vhd
@@ -164,6 +164,8 @@ entity reg_EX_MM is
        MM_B:       out reg32;
        EX_result:  in  reg32;
        MM_result:  out reg32;
+       EX_addr:    in  reg32;
+       MM_addr:    out reg32;
        HI:         in  reg32;
        MM_HI:      out reg32;
        LO:         in  reg32;
@@ -196,6 +198,7 @@ begin
         MM_A        <= EX_A      ;
         MM_B        <= EX_B      ;
         MM_result   <= EX_result ;
+        MM_addr     <= EX_addr   ;
         MM_HI       <= HI        ;
         MM_LO       <= LO        ;
         MM_alu_move_ok <= EX_alu_move_ok ;
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 531cd59..e87a138 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -494,8 +494,8 @@ begin  -- TB
   not_waiting <= (inst_wait and data_wait); --  and io_wait);
 
   -- irq <= b"000000"; -- NO interrupt requests
-  irq <= b"0000" & uart_irq & counter_irq; -- uart+counter interrupts
-  -- irq <= b"00000" & counter_irq; -- counter interrupts
+  irq <= uart_irq & counter_irq & b"0000"; -- uart+counter interrupts
+  -- irq <= counter_irq & b"00000"; -- counter interrupts
   nmi <= '0'; -- input port to TB
 
   
-- 
GitLab