diff --git a/cMIPS/docs/cMIPS.pdf b/cMIPS/docs/cMIPS.pdf
index 86f57878f3b57c5420e6714db6763ca5b637cc4b..bc980295cb6293853cf80e63065c716fa2ca2aba 100644
Binary files a/cMIPS/docs/cMIPS.pdf and b/cMIPS/docs/cMIPS.pdf differ
diff --git a/cMIPS/tests/mmu_double.s b/cMIPS/tests/mmu_double.s
index 656fc7f9b9ce705254371927c75b2b6f596a775f..729906decd40bcd032b20823d33a34b12438844b 100644
--- a/cMIPS/tests/mmu_double.s
+++ b/cMIPS/tests/mmu_double.s
@@ -105,7 +105,7 @@ _excp_180: tlbp         # probe for the guilty entry
         li   $30, '\n'
         sw   $30, x_IO_ADDR_RANGE($20)
 
-        eret			# return to EPC saved on the first fault
+        eret			# return to the EPC saved on the first fault
         .end _excp_180		#   the second fault refills TLB
 
 	
@@ -118,7 +118,7 @@ _excp_180: tlbp         # probe for the guilty entry
 	
 	## dirty trick: there is not enough memory for a full PT, thus
 	##   we set the PT at the bottom of RAM addresses and have
-	##   Context pointing into that address range
+	##   Context pointing to that address range
 
 	.set PTbase, x_DATA_BASE_ADDR
 	.ent main
@@ -148,7 +148,7 @@ main:	la   $20, x_IO_BASE_ADDR
 	sw  $7, 8($4)
 	sw  $0, 0xc($4)
 
-	li $5, 7              # 2nd ROM mapping
+	li $5, 2              # 2nd ROM mapping
 	mtc0 $5, cop0_Index
 	nop
 	tlbr
@@ -199,8 +199,9 @@ main:	la   $20, x_IO_BASE_ADDR
 	nop
 	nop
 
+	##
 	## cause a TLB miss
-
+	##
 	jal  there
 	nop
 	
diff --git a/cMIPS/tests/mmu_refill.s b/cMIPS/tests/mmu_refill.s
index b087aac75985e3995e649f5f8c4459e32b4ec3b0..768772d30c6b6d0a720bb7c96d0ee7555cccf975 100644
--- a/cMIPS/tests/mmu_refill.s
+++ b/cMIPS/tests/mmu_refill.s
@@ -115,7 +115,8 @@ main:	la   $20, x_IO_BASE_ADDR
 	sw  $7, 8($4)
 	sw  $0, 12($4)
 
-	li $5, 7              # 2nd ROM mapping
+
+	li   $5, 2            # 2nd ROM mapping on 2nd PT element
 	mtc0 $5, cop0_Index
 	nop
 	tlbr
@@ -125,17 +126,39 @@ main:	la   $20, x_IO_BASE_ADDR
 	mfc0 $7, cop0_EntryLo1
 	# sw   $7, 0($20)
 
-	# 2nd entry: PPN2 & PPN3 ROM
+
+	# 2nd entry:  PPN2 & PPN3 I/O
 	sw  $6, 16($4)
 	sw  $0, 20($4)
 	sw  $7, 24($4)
 	sw  $0, 28($4)
 
+	
+	li $5, 3             # 3rd ROM mapping on 3rd PT element
+	mtc0 $5, cop0_Index
+	nop
+	tlbr
+
+	mfc0 $6, cop0_EntryLo0
+	# sw   $6, 0($20)
+	mfc0 $7, cop0_EntryLo1
+	# sw   $7, 0($20)
+
+	# 2nd entry: PPN4 & PPN5 ROM
+	sw  $6, 32($4)
+	sw  $0, 36($4)
+	sw  $7, 40($4)
+	sw  $0, 44($4)
+
 	# load Context with PTbase
 	mtc0 $4, cop0_Context
 	
+
 	## change mapping for 2nd ROM TLB entry, thus causing a miss
 
+	li   $5, 2          # 2nd ROM mapping
+	mtc0 $5, cop0_Index
+
 	li   $9, 0x2000
 	sll  $9, $9, 8
 
@@ -144,9 +167,13 @@ main:	la   $20, x_IO_BASE_ADDR
 	add  $8, $9, $8     # change tag
 
 	mtc0 $8, cop0_EntryHi
-
+	
 	tlbwi		    # and write it back to TLB
 
+	nop
+	nop
+	nop
+	
 	## cause a TLB miss
 
 	jal  there
diff --git a/cMIPS/tests/mmu_refill2.s b/cMIPS/tests/mmu_refill2.s
index 7fff9e9a1ee5008070e47ac36a6ff8925ccbc9f2..65ae103e15e7da340b5d549bf1d91751a58465d4 100644
--- a/cMIPS/tests/mmu_refill2.s
+++ b/cMIPS/tests/mmu_refill2.s
@@ -64,9 +64,9 @@ _excp:  mfc0 $k1, cop0_Context
         mtc0 $k0, cop0_EntryLo0    # EntryLo0 <- k0 = even element
         mtc0 $k1, cop0_EntryLo1    # EntryLo1 <- k1 = odd element
 	##
-	## cause another miss on 2nd ROM mapping
+	## cause, on purpose, another miss on 2nd ROM mapping
 	##
-	li   $k0, 7		   
+	li   $k0, 2
 	mtc0 $k0, cop0_Index
 	ehb
         tlbwi                      # update TLB
@@ -128,7 +128,7 @@ main:	la   $20, x_IO_BASE_ADDR
 
 	
 	# 2nd entry: PPN2 & PPN3 ROM
-	li $5, 7              # 2nd ROM mapping
+	li $5, 2              # 2nd ROM mapping
 	mtc0 $5, cop0_Index
 	nop
 	tlbr
@@ -145,8 +145,8 @@ main:	la   $20, x_IO_BASE_ADDR
 	sw  $0, 0x1c($4)
 
 
-	# 1024th entry: PPN4 & PPN5 RAM
-	li   $5, 6           # 3rd RAM mapping
+	# 1024th entry: PPN6 & PPN7 RAM
+	li   $5, 7           # 3rd RAM mapping
 	mtc0 $5, cop0_Index
 	nop
 	tlbr
@@ -168,6 +168,8 @@ main:	la   $20, x_IO_BASE_ADDR
 	
 	
 	## change mapping for 3rd RAM TLB entry, thus causing a miss
+	li   $5, 7           # 3rd RAM mapping
+	mtc0 $5, cop0_Index
 
 	li   $9, 0x8000
 	sll  $9, $9, 8
@@ -178,8 +180,14 @@ main:	la   $20, x_IO_BASE_ADDR
 
 	tlbwi		    # and write it back to TLB (Index = 6)
 
+	nop
+	nop
+	nop
+	
 	##
 	## cause miss on the load in the delay slot - miss on 6th RAM page
+	##   then a second miss since handler (purposefully) updates the
+	##   TLB entry for the 2nd ROM page
 	##
 	li  $15, (x_DATA_BASE_ADDR + 6*4096) # VPN2
 		
diff --git a/cMIPS/tests/mmu_tlbwi.expected b/cMIPS/tests/mmu_tlbwi.expected
index 58187bee666828589bf04f80e9efdb83f560b131..1a4d0eb4c0bf838af0b31ee9df606919f24a684b 100644
--- a/cMIPS/tests/mmu_tlbwi.expected
+++ b/cMIPS/tests/mmu_tlbwi.expected
@@ -28,10 +28,10 @@
 0000091b
 00000c1b
 6
+00044000
+00001107
+00001147
+7
 00046000
 00001187
 000011c7
-7
-00002000
-00000087
-000000c7
diff --git a/cMIPS/tests/uart_irx.c b/cMIPS/tests/uart_irx.c
index 1641f476af1fdf62272250161c84a08e167bb085..7e3e40c4695f4b9d4b4fd8a86ddc37306c04ee6f 100644
--- a/cMIPS/tests/uart_irx.c
+++ b/cMIPS/tests/uart_irx.c
@@ -8,16 +8,19 @@ typedef struct control { // control register fields (uses only ls byte)
 } Tcontrol;
 
 typedef struct status { // status register fields (uses only ls byte)
+#if 1
   int s;
-  // int ign   : 24,      // ignore uppermost bits
-  //  ign7    : 1,        // ignored (bit 7)
-  //  txEmpty : 1,        // TX register is empty (bit 6)
-  //  rxFull  : 1,        // octet available from RX register (bit 5)
-  //  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
-  //  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
-  //  ign2    : 1,        // ignored (bit 2)
-  //  framing : 1,        // framing error (bit 1)
-  //  overun  : 1;        // overun error (bit 0)
+#else
+  int ign : 24,       // ignore uppermost bits
+  ign7    : 1,        // ignored (bit 7)
+  txEmpty : 1,        // TX register is empty (bit 6)
+  rxFull  : 1,        // octet available from RX register (bit 5)
+  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
+  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
+  ign2    : 1,        // ignored (bit 2)
+  framing : 1,        // framing error (bit 1)
+  overun  : 1;        // overun error (bit 0)
+#endif
 } Tstatus;
 
 #define RXfull  0x00000020
diff --git a/cMIPS/tests/uartrx.c b/cMIPS/tests/uartrx.c
index 5c7d1bbaddc4fccb599f11d47cdfb42691246727..968ae8ac759e318119ef9ef5ddd5b4de5d2f674f 100644
--- a/cMIPS/tests/uartrx.c
+++ b/cMIPS/tests/uartrx.c
@@ -2,7 +2,7 @@
 
 typedef struct control { // control register fields (uses only ls byte)
   int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send
+    rts     : 1,         // Request to Send out (bit 7)
     ign2    : 2,         // bits 6,5 ignored
     intTX   : 1,         // interrupt on TX buffer empty (bit 4)
     intRX   : 1,         // interrupt on RX buffer full (bit 3)
@@ -10,16 +10,19 @@ typedef struct control { // control register fields (uses only ls byte)
 } Tcontrol;
 
 typedef struct status { // status register fields (uses only ls byte)
+#if 0
   int s;
-  // int ign   : 24,      // ignore uppermost bits
-  //  ign7    : 1,        // ignored (bit 7)
-  //  txEmpty : 1,        // TX register is empty (bit 6)
-  //  rxFull  : 1,        // octet available from RX register (bit 5)
-  //  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
-  //  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
-  //  ign2    : 1,        // ignored (bit 2)
-  //  framing : 1,        // framing error (bit 1)
-  //  overun  : 1;        // overun error (bit 0)
+#else
+  int ign : 24,       // ignore uppermost 3 bytes
+  cts     : 1,        // Clear To Send inp=1 (bit 7)
+  txEmpty : 1,        // TX register is empty (bit 6)
+  rxFull  : 1,        // octet available from RX register (bit 5)
+  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
+  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
+  ign2    : 1,        // ignored (bit 2)
+  framing : 1,        // framing error (bit 1)
+  overun  : 1;        // overun error (bit 0)
+#endif
 } Tstatus;
 
 #define RXfull  0x00000020
@@ -70,9 +73,9 @@ int main(void) { // receive a string through the UART serial interface
   do {
     i = i+1;
 
-    // while ( (state = uart->cs.stat.txEmpty) != 1 )
-    while ( ! ( (state = uart->cs.stat.s) & RXfull ) )
-      ;
+    //  while ( ! ( (state = uart->cs.stat.s) & RXfull ) )
+    while ( (state = (int)uart->cs.stat.rxFull) == 0 )
+      if (state == 0) cmips_delay(1); // just do something with state
     s[i] = (char)uart->d.rx;
     to_stdout( s[i] );
 
diff --git a/cMIPS/tests/uarttx.c b/cMIPS/tests/uarttx.c
index ba59e25fd1b4b029b8db2c2ab452b49fce295f48..67c3af530fbdd80afd55434214497c52b445487e 100644
--- a/cMIPS/tests/uarttx.c
+++ b/cMIPS/tests/uarttx.c
@@ -3,7 +3,7 @@
 
 typedef struct control { // control register fields (uses only ls byte)
   int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send
+    rts     : 1,         // Request to Send out (bit 7)
     ign2    : 2,         // bits 6,5 ignored
     intTX   : 1,         // interrupt on TX buffer empty (bit 4)
     intRX   : 1,         // interrupt on RX buffer full (bit 3)
@@ -11,16 +11,19 @@ typedef struct control { // control register fields (uses only ls byte)
 } Tcontrol;
 
 typedef struct status { // status register fields (uses only ls byte)
+#if 0
   int s;
-  // int ign   : 24,      // ignore uppermost bits
-  //  ign7    : 1,        // ignored (bit 7)
-  //  txEmpty : 1,        // TX register is empty (bit 6)
-  //  rxFull  : 1,        // octet available from RX register (bit 5)
-  //  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
-  //  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
-  //  ign2    : 1,        // ignored (bit 2)
-  //  framing : 1,        // framing error (bit 1)
-  //  overun  : 1;        // overun error (bit 0)
+#else
+  int ign : 24,       // ignore uppermost 3 bytes
+  cts     : 1,        // Clear To Send inp=1 (bit 7)
+  txEmpty : 1,        // TX register is empty (bit 6)
+  rxFull  : 1,        // octet available from RX register (bit 5)
+  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
+  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
+  ign2    : 1,        // ignored (bit 2)
+  framing : 1,        // framing error (bit 1)
+  overun  : 1;        // overun error (bit 0)
+#endif
 } Tstatus;
 
 #define RXfull  0x00000020
@@ -96,9 +99,9 @@ int main(void) { // send a string through the UART serial interface
   do {
 
     i = i+1;
-    // while ( (state = uart->cs.stat.txEmpty) != 1 )
-    while ( ! ( (state = uart->cs.stat.s) & TXempty ) )
-      ;
+    // while ( ! ( (state = uart->cs.stat.s) & TXempty ) )
+    while ( (state = (int)uart->cs.stat.txEmpty) == 0 )
+      if (state == 1) cmips_delay(2); // just do something with state
     uart->d.tx = (int)s[i];
 
   } while (s[i] != '\0');  // '\0' is transmitted in previous line