diff --git a/cMIPS/bin/compile.sh b/cMIPS/bin/compile.sh
index dab96317068f179da3e5724ffe6631a4e1718a49..831eb43194eb4992fa77b76e1f14161205e39b2c 100755
--- a/cMIPS/bin/compile.sh
+++ b/cMIPS/bin/compile.sh
@@ -131,8 +131,8 @@ if [ $synth = true ];
 then S="-D FOR_SYNTHESIS" ; 
 else S="-U FOR_SYNTHESIS" ; fi
 
-(mips-gcc -O${level} $warn -DcMIPS -mno-gpopt -I"${include}" -S ${src} $S\
-          -o ${asm}  ||  exit 1) && \
+(mips-gcc -O${level} $warn -DcMIPS -mno-gpopt -I"${include}" \
+          -S ${src} $S -o ${asm}  ||  exit 1) && \
 mips-gcc -O1 -DcMIPS -mno-gpopt -I"${include}" -S ${c_io}.c -o ${c_io}.s $S &&\
 mips-as -O1 -EL -mips32 -I "${include}" -o ${obj} ${asm} && \
 mips-as -O1 -EL -mips32 -I "${include}" -o ${c_start}.o ${c_start}.s && \
diff --git a/cMIPS/include/cMIPS.s b/cMIPS/include/cMIPS.s
index 8a8e876143dbd6c7435c8cea4dc561848665a0c0..de3b6beb6a0b9dbfa5d4a6596d9fd8d107c20f92 100644
--- a/cMIPS/include/cMIPS.s
+++ b/cMIPS/include/cMIPS.s
@@ -11,6 +11,7 @@
 	.set x_IO_ADDR_RANGE,0x00000020
 
 	.set HW_counter_addr,(x_IO_BASE_ADDR +  5 * x_IO_ADDR_RANGE)
+	.set HW_FPU_addr,    (x_IO_BASE_ADDR +  6 * x_IO_ADDR_RANGE)
 	.set HW_uart_addr,   (x_IO_BASE_ADDR +  7 * x_IO_ADDR_RANGE)
 	.set HW_dsp7seg_addr,(x_IO_BASE_ADDR +  9 * x_IO_ADDR_RANGE)
 	.set HW_keybd_addr,  (x_IO_BASE_ADDR + 10 * x_IO_ADDR_RANGE)
@@ -18,20 +19,30 @@
 
 	# see vhdl/packageExcp.vhd for addresses
 	.set x_EXCEPTION_0000,0x00000080
+	.set x_EXCEPTION_0100,0x000000A0
 	.set x_EXCEPTION_0180,0x000000C0
 	.set x_EXCEPTION_0200,0x00000140
-	.set x_ENTRY_POINT,0x00000300
+	.set x_ENTRY_POINT,   0x00000300
 
-	.set cop0_COUNT,$9
-	.set cop0_COMPARE,$11
-	.set cop0_STATUS,$12
-	.set cop0_CAUSE,$13
-	.set cop0_EPC,$14
-	.set cop0_CONFIG,$16
+	.set cop0_Index,   $0
+	.set cop0_Random,  $1
+	.set cop0_EntryLo0,$2
+	.set cop0_EntryLo1,$3
+	.set cop0_Context ,$4
+	.set cop0_PageMask,$5
+	.set cop0_Wired,   $6
+	.set cop0_BadVAddr,$8
+	.set cop0_COUNT   ,$9
+	.set cop0_EntryHi ,$10
+	.set cop0_COMPARE ,$11
+	.set cop0_STATUS  ,$12
+	.set cop0_CAUSE   ,$13
+	.set cop0_EPC,     $14
+	.set cop0_CONFIG,  $16
 	.set cop0_CONFIG_f0,0
 	.set cop0_CONFIG_f1,1
-	.set cop0_LLAddr,$17
-	.set cop0_ErrorPC,$30
+	.set cop0_LLAddr,  $17
+	.set cop0_ErrorPC, $30
 
 	
 	# reset: COP0 present, at exception level, all else disabled
diff --git a/cMIPS/include/cMIPSio.c b/cMIPS/include/cMIPSio.c
index fff66507d04fd68d5d8bb104602f5ec3c5eb5243..0a2eae8d0110ec4b7818b67f97f7a2759db2d657 100644
--- a/cMIPS/include/cMIPSio.c
+++ b/cMIPS/include/cMIPSio.c
@@ -92,16 +92,81 @@ void dumpRAM(void) {
 //=======================================================================
 void readStats(sStats *s) {
   int *IO = (int *)IO_STATS_ADDR;
-
+#if 0
   s->dc_ref    = *(IO+0);
   s->dc_rd_hit = *(IO+1);
   s->dc_wr_hit = *(IO+2);
   s->dc_flush  = *(IO+3);
   s->ic_ref    = *(IO+4);
   s->ic_hit    = *(IO+5);
+#endif
+}; //--------------------------------------------------------------------
+
+
+//=======================================================================
+// memcpy -- need this to fool GCC into believing this is libc
+//=======================================================================
+char *memcpy(char *dst, const char *src, int n) {
+  int cnt;
+  char *ret;
+
+  ret = dst;
+  cnt = (int)src % 4;
+  while( (cnt > 0) && (n > 0) ) {
+    *dst = *src;
+    cnt--; n--;
+    dst++; src++;
+  } // src is now word aligned
+  while ( n >= 4) {
+    if ( ((int)dst % 4) == 0 ) { // dst aligned to word x00
+      *((int *)dst) = *((int *)src);
+    } else if ( ((int)dst % 2) == 0 ) { // dst aligned to short xx0
+      *((short *)dst) = *((short *)src);
+      *((short *)(dst+2)) = *((short *)(src+2));
+    } else { // dst aligned to char
+      *dst = *src;
+      *((short *)(dst+1)) = *((short *)(src+1));
+      *(dst+3) = *(src+3);
+    }
+    n-=4; src+=4; dst+=4;
+  }
+  while(n > 0) {
+    *dst = *src;
+    n--; dst++; src++;
+  }
+  return(ret);
 }; //--------------------------------------------------------------------
 
 
+//=======================================================================
+// memset -- need this to fool GCC into believing this is libc
+//=======================================================================
+char *memset(char *dst, const int val, int len) {
+  unsigned char *ptr = (unsigned char*)dst;
+  int cnt;
+
+  cnt = (int)ptr % 4;
+  while( (cnt > 0) && (len > 0) ) {
+    *ptr = (char)val;
+    cnt--; len--;
+    ptr++;
+  } // ptr is now word aligned
+  cnt = val | (val<<8) | (val<<16) | (val<<24);
+  while (len >= 4) {
+    *((int *)ptr) = cnt;
+    len -= 4;
+    ptr += 4;
+  }
+  while(len > 0) {
+    *ptr = (char)val;
+    len--;
+    ptr++;
+  }
+  return(dst);
+}; //--------------------------------------------------------------------
+
+
+
 #endif // FOR_SIMULATION
 
 
@@ -114,7 +179,7 @@ void readStats(sStats *s) {
 void startCounter(int n, int interr) {
   int *IO = (int *)IO_COUNT_ADDR;
   int interrupt;
-  // set bit 31 to cause an interrupt on count==0, reset for no interrupt
+  // set bit 31 to cause an interrupt on count==n, reset for no interrupt
   interrupt = (interr == 0 ? 0x00000000 : 0x80000000);
 
   // set bit 30 to start counting, reset to stop
@@ -313,3 +378,5 @@ void DSP7SEGput(int MSD, int MSdot, int lsd, int lsdot) {
   *IO = dot1 | dot0 | dig1 | dig0;
 }
 //-----------------------------------------------------------------------
+
+
diff --git a/cMIPS/include/start.s b/cMIPS/include/start.s
index b1d0b3df68c6c0d341d0ef81ab389c7ac3ba60fa..52956d5d9dff13ee9c74bed965b9696024d85907 100644
--- a/cMIPS/include/start.s
+++ b/cMIPS/include/start.s
@@ -9,23 +9,25 @@
 	.global exit
 	.org x_INST_BASE_ADDR,0
 	.ent _start
-_start: nop
 
-	# set STATUS, cop0, kernel mode, interrupts disabled
-        li   $k0, cop0_STATUS_reset
-        mtc0 $k0, cop0_STATUS
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
 
-	# set CAUSE, "no exceptions", interrVec separated from exceptVec
-	li   $k0, cop0_CAUSE_reset   # 0x0080007c
-	mtc0 $k0, cop0_CAUSE
-	
 	# initialize SP: ramTop-8
-	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8)
+_start: li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8)
 
-	# set STATUS, cop0, user mode, hw interrupt IRQ2,IRQ3 enabled
-        li   $k0, 0x10000c09
+	# set STATUS, cop0, hw interrupt IRQ2,IRQ3 enabled
+        li   $k0, 0x10000c01
         mtc0 $k0, cop0_STATUS
-	
+ 
+	la   $k0, _go_main  # start main() in user mode
+        mtc0 $k0, cop0_EPC
+        nop
+        eret      # go into user mode, all else disabled
+	nop
+
+_go_main:
 	nop
 	jal main  # on returning from main(), MUST go into exit()
 	nop       #  to stop the simulation.
@@ -59,7 +61,7 @@ _excp_0000:
 	j nmi_reset_handler
 	nop
 	#excp_0000ret:
-	#	li   $k0, 0x1000ff09   # enable interrupts, user mode
+	#	li   $k0, 0x1000ff01   # enable interrupts, user mode
 	#       mtc0 $k0, cop0_STATUS
 	#	eret
 
@@ -117,8 +119,8 @@ excp_0180ret:
 	.extern UARTinterr
 	.org x_EXCEPTION_0200,0   # exception vector_200, interrupt handlers
 	.ent _excp_0200
-	.set M_CauseIM,0x0000ff00 # keep bits 15..8 -> IM = IP
-	.set M_StatusIEn,0x0000ff09 # user mode, enable all interrupts
+	.set M_CauseIM,0x0000ff00   # keep bits 15..8 -> IM = IP
+	.set M_StatusIEn,0x0000ff01 # user mode, enable all interrupts
 excp_0200:
 _excp_0200:
 	mfc0 $k0, cop0_CAUSE
@@ -175,13 +177,10 @@ Dismiss:                # No pending request, must have been noise
 	nop             #  do nothing and return
 
 excp_0200ret:
-	li   $k0, 0x08800000
-	mtc0 $k0, cop0_CAUSE
-
 	mfc0 $k0, cop0_STATUS	# Read STATUS register
 	lui  $k1, 0xffff           #  and do not modify its contents
 	ori  $k0, $k0, M_StatusIEn #  except for re-enabling interrupts
-	ori  $k1, $k1, 0xfff9      #  and going into user mode
+	ori  $k1, $k1, 0xfff1      #  and going into user mode
 	and  $k0, $k1, $k0
 	mtc0 $k0, cop0_STATUS	
 	eret			# Return from interrupt
@@ -194,5 +193,3 @@ excp_0200ret:
 	#----------------------------------------------------------------
 	# normal code starts here -- do not edit next line
 	.org x_ENTRY_POINT,0
-
-	
\ No newline at end of file
diff --git a/cMIPS/serial.inp b/cMIPS/serial.inp
index 6f4c06088d2190e37a184257b4ca30eaf3038091..99a7b7ea4231cb60d075d62decfc74843d4c2855 100644
--- a/cMIPS/serial.inp
+++ b/cMIPS/serial.inp
@@ -1,3 +1,6 @@
 
-01234
+abcdef
+012345
+pqrstu
+098765
 
diff --git a/cMIPS/tests/doTests.sh b/cMIPS/tests/doTests.sh
index 8e8a4bd92352f1de1b65aa61e1fc1aaa9d74b882..f13d21c1f1ae35529f116abff586725dfb5b53e0 100755
--- a/cMIPS/tests/doTests.sh
+++ b/cMIPS/tests/doTests.sh
@@ -37,12 +37,12 @@ usage:  $0 [options]
 OPTIONS:
    -h    Show this message
    -B    ignore blank space in comparing simulation to expected results
-   -c    simulate also programs that are timing-dependent -- use fake-caches
+   -c    simulate only programs that are timing independent: can use caches
 EOF
 }
 
 ignBLANKS=""
-withCache=true
+withCache=false
 
 while true ; do
 
@@ -51,7 +51,7 @@ while true ; do
             ;;
         -B) ignBLANKS="-B"
             ;;
-        -c) withCache=false
+        -c) withCache=true
             ;;
         "") break
             ;;
@@ -72,6 +72,7 @@ a_BHW="lbsb lhsh lwsw lwswIncr swlw lwl_lwr"
 a_MEM="lwSweepRAM"
 a_CTR="teq_tne tlt_tlti tltu_tgeu eiDI ll_sc overflow"
 a_COP="mtc0CAUSE2 mtc0EPC syscall break mfc0CONFIG badVAddr badVAddrMM"
+a_MMU="mmu_index mmu_tlbwi mmu_tlbp mmu_tlbwr"
 
 ## these tests MUST be run with FAKE CACHES
 # a_IOs="kbd7seg" 
@@ -86,7 +87,7 @@ rm -f *.simout *.elf
 stoptime=20ms
 
 if [ 0 = 0 ] ; then
-    for F in $(echo $a_FWD $a_CAC $a_BEQ $a_FUN $a_OTH $a_BHW $a_MEM $a_CTR $a_COP $a_IOs);
+    for F in $(echo $a_FWD $a_CAC $a_BEQ $a_FUN $a_OTH $a_BHW $a_MEM $a_CTR $a_COP $a_MMU $a_IOs);
     do
 	$bin/assemble.sh ${F}.s
 	${simulator} --ieee-asserts=disable --stop-time=$stoptime \
@@ -105,7 +106,7 @@ fi
 
 
 c_small="divmul fat fib sieve ccitt16 gcd matrix negcnt reduz rand"
-c_types="memcpy xram sort-byte sort-half sort-int"
+c_types="xram sort-byte sort-half sort-int memcpy"
 c_sorts="bubble insertion merge quick selection shell"
 
 ## the tests below MUST be run with FAKE CACHES
@@ -125,14 +126,14 @@ else
   SIMULATE="$c_small $c_types $c_sorts $c_timing $c_uart"
   echo -e "abcdef\n012345\n" >serial.inp
   # make sure all memory latencies are ZERO
-  pack=$srcVHDL/packageWires.vhd
-  sed -i -e "/ROM_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" \
-         -e "/RAM_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" \
-         -e "/IO_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" $pack
+  # pack=$srcVHDL/packageWires.vhd
+  # sed -i -e "/ROM_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" \
+  #        -e "/RAM_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" \
+  #        -e "/IO_WAIT_STATES/s/ := \([0-9][0-9]*\);/ := 0;/" $pack
 fi
 
 for F in $(echo "$SIMULATE" ) ; do 
-    $bin/compile.sh -O2 ${F}.c
+    $bin/compile.sh -O 3 ${F}.c
     ${simulator} --ieee-asserts=disable --stop-time=$stoptime \
           2>/dev/null >$F.simout
     diff $ignBLANKS -q $F.expected $F.simout
diff --git a/cMIPS/tests/extCounterInt.c b/cMIPS/tests/extCounterInt.c
index 61e1de2c5f97b27f84f7fd7de8914e24a9fd348f..af0dc93eaaf2ae64280f646e3e205f01da854f18 100644
--- a/cMIPS/tests/extCounterInt.c
+++ b/cMIPS/tests/extCounterInt.c
@@ -4,10 +4,10 @@
 #include "cMIPS.h"
 
 #define MAX   100
-#define FALSE 0
+#define FALSE (0==1)
 #define TRUE  ~FALSE
 
-extern _counter_buff;
+extern _counter_val;
 
 int p[MAX];
 
@@ -16,7 +16,10 @@ void main() {
   int i, k, iter;
   int num;
 
-  _counter_buff = 0;   // variable where num interrupts is accumulated
+  enableInterr();
+
+  _counter_val = 0;   // variable to accumulate number of interrupts
+
   startCounter(200,TRUE);   // counter will interrupt after N cycles
 
   p[0] = 0;
@@ -51,7 +54,16 @@ void main() {
   to_stdout('\n');
   print(num); // == x01a
   to_stdout('\n');
-  print( _counter_buff );
 
+  if (_counter_val > 10) {   // more than 10 interrupts ?
+    to_stdout('o');
+    to_stdout('k');
+  } else {
+    to_stdout('e');
+    to_stdout('r');
+    to_stdout('r');
+  }
+  to_stdout('\n');
+  to_stdout('\n');
 }
 
diff --git a/cMIPS/tests/fib.c b/cMIPS/tests/fib.c
index 70f500ec78557bb57ad5b5cad2d1010ed6bc42a4..21772e6165ded6f30dc9a10369ed26ba6a57b3d0 100644
--- a/cMIPS/tests/fib.c
+++ b/cMIPS/tests/fib.c
@@ -36,7 +36,7 @@ void myprint(unsigned int* buf, unsigned int n) {
 
   for(i=0; i<n; i++) {
 #ifdef cMIPS
-    *IO = buf[i];   // 0x001 001 002 003 005 008 00d 015 022 0x037 15.275ns
+    print(buf[i]);   // 0x001 001 002 003 005 008 00d 015 022 0x037 15.275ns
 #else
     printf("%03x ",buf[i]); // 0x001 001 002 003 005 008 00d 015 022 0x037
 #endif
diff --git a/cMIPS/tests/memcpy.c b/cMIPS/tests/memcpy.c
index 5bfe5649fb480daa8e4837d1e6cdc87a14e333ef..01dc5162e0a41f4e2dc5d8851384b3003c185b26 100644
--- a/cMIPS/tests/memcpy.c
+++ b/cMIPS/tests/memcpy.c
@@ -1,63 +1,10 @@
+
 # ifdef cMIPS
    #include "cMIPS.h"
 #else
    #include <stdio.h>
 #endif
 
-char *memcpy2(char *dst, const char *src, int n) {
-  int cnt;
-  char *ret;
-
-  ret = dst;
-  cnt = (int)src % 4;
-  while( (cnt > 0) && (n > 0) ) {
-    *dst = *src;
-    cnt--; n--;
-    dst++; src++;
-  } // src is now word aligned
-  while ( n >= 4) {
-    if ( ((int)dst % 4) == 0 ) { // dst aligned to word x00
-      *((int *)dst) = *((int *)src);
-    } else if ( ((int)dst % 2) == 0 ) { // dst aligned to short xx0
-      *((short *)dst) = *((short *)src);
-      *((short *)(dst+2)) = *((short *)(src+2));
-    } else { // dst aligned to char
-      *dst = *src;
-      *((short *)(dst+1)) = *((short *)(src+1));
-      *(dst+3) = *(src+3);
-    }
-    n-=4; src+=4; dst+=4;
-  }
-  while(n > 0) {
-    *dst = *src;
-    n--; dst++; src++;
-  }
-  return(ret);
-}
-
-void *memset(void *dst, const int val, int len) {
-  register unsigned char *ptr = (unsigned char*)dst;
-  int cnt;
-
-  cnt = (int)ptr % 4;
-  while( (cnt > 0) && (len > 0) ) {
-    *ptr = (char)val;
-    cnt--; len--;
-    ptr++;
-  } // ptr is now word aligned
-  cnt = val | (val<<8) | (val<<16) | (val<<24);
-  while (len >= 4) {
-    *((int *)ptr) = cnt;
-    len -= 4;
-    ptr += 4;
-  }
-  while(len > 0) {
-    *ptr = (char)val;
-    len--;
-    ptr++;
-  }
-  return(dst);
-}
 
 
 #define sSz 20
@@ -89,11 +36,11 @@ int main(void) {
     N=j; 
     s=src;
     d=dst;
-    vet = memcpy2(d, s, N);
+    vet = memcpy(d, s, N);
 #ifdef cMIPS
     for (i=0; i<N; i++) { to_stdout(vet[i]); } ; to_stdout('\n');
 #else
-    for (i=0; i<N; i++) { printf("%c", vet[i]); } ; printf("\n");
+    //  for (i=0; i<N; i++) { printf("%c", vet[i]); } ; printf("\n");
 #endif
 
   }
@@ -109,7 +56,7 @@ int main(void) {
 #ifdef cMIPS
     for (i=0; i<N; i++) { to_stdout(vet[i]); } ; to_stdout('\n');
 #else
-    for (i=0; i<N; i++) { printf("%c", vet[i]); } ; printf("\n");
+    // for (i=0; i<N; i++) { printf("%c", vet[i]); } ; printf("\n");
 #endif
 
 
diff --git a/cMIPS/tests/uartrx.c b/cMIPS/tests/uartrx.c
index ace7d54cd201f13e129f720ef86962988c521caa..5c7d1bbaddc4fccb599f11d47cdfb42691246727 100644
--- a/cMIPS/tests/uartrx.c
+++ b/cMIPS/tests/uartrx.c
@@ -62,8 +62,8 @@ int main(void) { // receive a string through the UART serial interface
   ctrl.ign2  = 0;
   ctrl.intTX = 0;
   ctrl.intRX = 0;
-  ctrl.speed = 0;
-  uart->cs.ctl = ctrl;  // operate at highest data rate
+  ctrl.speed = 1;   // operate at the second highest data rate
+  uart->cs.ctl = ctrl;
 
   i = -1;
 
diff --git a/cMIPS/tests/uarttx.c b/cMIPS/tests/uarttx.c
index 0fc929cbb3f45acfb52ddf0f71e302b639b4abb5..0b66a348cb7522ec5fa401e16228c14f69beb681 100644
--- a/cMIPS/tests/uarttx.c
+++ b/cMIPS/tests/uarttx.c
@@ -1,3 +1,4 @@
+
 #include "cMIPS.h"
 
 typedef struct control { // control register fields (uses only ls byte)
@@ -43,7 +44,7 @@ typedef struct serial {
 #define LONG_STRING 1
 
 #if LONG_STRING
-char *dog = "\n  the quick brown fox jumps over the lazy dog\n";
+char *dog = "\n\tthe quick brown fox jumps over the lazy dog\n";
 char s[32];
 #else
 char s[32]; //  = "123";
@@ -63,7 +64,7 @@ int strcopy(const char *y, char *x)
 
 int main(void) { // send a string through the UART serial interface
   int i;
-  volatile int state;
+  volatile int state, val;
   volatile Tserial *uart;  // tell GCC to not optimize away tests
   Tcontrol ctrl;
 
@@ -75,17 +76,17 @@ int main(void) { // send a string through the UART serial interface
   s[0] = '1';   s[1] = '2';   s[2] = '3';   s[3] = '\0';
 #endif 
 
-  uart = (void *)IO_UART_ADDR;  // bottom of UART address range
+  uart = (void *)IO_UART_ADDR;  // UART's address
 
-  counter = (int *)IO_COUNT_ADDR;
+  counter = (int *)IO_COUNT_ADDR; // counter's address
 
-  ctrl.speed = 0;
-  ctrl.intTX = 0; // 1;
+  ctrl.speed = 0;  // operate at the highest data rate
+  ctrl.intTX = 0;  // no interrupts
   ctrl.intRX = 0;
   ctrl.ign2  = 0;
   ctrl.ign   = 0;
   ctrl.rts   = 1;
-  uart->cs.ctl = ctrl;  // operate at highest data rate
+  uart->cs.ctl = ctrl;
 
   i = -1;
   do {
@@ -99,10 +100,10 @@ int main(void) { // send a string through the UART serial interface
   } while (s[i] != '\0');  // '\0' is transmitted in previous line
 
 
-  // then wait until last char is sent out of shift-register to return
+  // then wait until last char is sent out of the shift-register to return
   startCounter(COUNTING, 0);
-  while ( (i=(readCounter() & 0x3fffffff)) < COUNTING )
-    ; //print(i);
+  while ( (val=(readCounter() & 0x3fffffff)) < COUNTING )
+    {}; 
 
-  return i;  // so compiler won't optimize away the last loop
+  return val;  // so compiler won't optimize away the last loop
 }
diff --git a/cMIPS/vhdl/aux.vhd b/cMIPS/vhdl/aux.vhd
index 18a99deb7d769ac67975b065050d9411a137261f..b1bcd499adf77fc0e466011a023fdd6b811c5724 100644
--- a/cMIPS/vhdl/aux.vhd
+++ b/cMIPS/vhdl/aux.vhd
@@ -51,40 +51,6 @@ end functional;
 
 
 
--- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
--- 28-bit register, synchronous load active in '0' (used only in TLB)
--- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-library IEEE;
-use IEEE.std_logic_1164.all;
-use work.p_WIRES.all;
-
-entity register28 is
-  generic (INITIAL_VALUE: reg28 := x"0000000");
-  port(clk, rst, ld: in  std_logic;
-        D:           in  reg28;
-        Q:           out reg28);
-end register28;
-
-architecture functional of register28 is
-begin
-  process(clk, rst, ld)
-    variable state: reg28;
-  begin
-    if rst = '0' then
-      state := INITIAL_VALUE;
-    elsif rising_edge(clk) then
-      if ld = '0' then
-        state := D;
-      end if;
-    end if;
-    Q <= state;
-  end process;
-  
-end functional;
--- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-
-
-
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- N-bit register, synchronous load active in '0', asynch reset
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -92,11 +58,11 @@ library IEEE;
 use IEEE.std_logic_1164.all;
 use work.p_WIRES.all;
 entity registerN is
-  generic (NUM_BITS: integer := 16);
+  generic (NUM_BITS: integer := 16;
+           INIT_VAL: std_logic_vector);
   port(clk, rst, ld: in  std_logic;
        D:            in  std_logic_vector(NUM_BITS-1 downto 0);
        Q:            out std_logic_vector(NUM_BITS-1 downto 0));
-  constant INIT_VAL: std_logic_vector(NUM_BITS-1 downto 0) := (others => '0');
 end registerN;
 
 architecture functional of registerN is
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 54af5119218ef717bbabf11d05152ddceaf1dd63..8cd9e09838599a52cdb6d420b650c930ae0e668a 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -100,7 +100,9 @@ architecture rtl of core is
          EX_cop0_val:   in  std_logic_vector;
          MM_cop0_val:   out std_logic_vector;
          EX_trapped:    in  std_logic;
-         MM_ex_trapped: out std_logic);
+         MM_ex_trapped: out std_logic;
+         EX_mfc0:       in  std_logic;
+         MM_mfc0:       out std_logic);
   end component reg_excp_EX_MM;
 
   component reg_excp_MM_WB is
@@ -134,6 +136,7 @@ architecture rtl of core is
   signal LL_update,LL_SC_abort,LL_SC_differ,EX_trapped,MM_ex_trapped: std_logic;
   signal int_req, EX_int_req: reg8;
   signal RF_nmi,EX_nmi : std_logic;
+  signal EX_mfc0, MM_mfc0 : std_logic;
   signal can_trap,EX_can_trap,MM_can_trap,WB_can_trap: reg2;
   signal is_trap, tr_signed, tr_stall: std_logic;
   signal tr_is_equal,EX_tr_is_equal, tr_less_than,EX_tr_less_than: std_logic;
@@ -146,7 +149,7 @@ architecture rtl of core is
   signal STATUSinp,STATUS, CAUSEinp,CAUSE, EPCinp,EPC : reg32;
   signal COUNT,COMPARE : reg32;
   signal count_eq_compare,count_update,count_enable : std_logic;
-  signal exception,EX_exception,is_exception: exception_type := exNOP;
+  signal exception,EX_exception,is_exception : exception_type := exNOP;
   signal ExcCode : reg5 := cop0code_NULL;
   signal exception_num, exception_dec : integer;       -- for debugging only
   signal next_instr_in_delay_slot,EX_is_delayslot : std_logic;
@@ -160,27 +163,41 @@ architecture rtl of core is
   -- MMU signals --
   signal INDEX, index_inp, RANDOM, WIRED, wired_inp : reg32;
   signal index_update, wired_update : std_logic;
-  signal EntryLo0, EntryLo1, EntryLo0_inp, EntryLo1_inp : reg32;
+  signal EntryLo0, EntryLo1, EntryLo0_inp, EntryLo1_inp, v_addr : reg32;
   signal EntryHi, EntryHi_inp : reg32;
-  signal Context, PageMask, Context_inp, PageMask_inp : reg32;
+  signal Context, PageMask, PageMask_inp : reg32;
   signal entryLo0_update, entryLo1_update, entryHi_update : std_logic;
-  signal context_update, tlb_read : std_logic;
-  signal tlb_entrylo0_mm,tlb_entrylo1_mm,tlb_context_mm,tlb_entryhi : reg32;
+  signal context_upd_pte, context_upd_bad, tlb_read : std_logic;
+  signal tlb_entrylo0_mm, tlb_entrylo1_mm, tlb_entryhi : reg32;
   signal tlb_tag0_updt, tlb_tag1_updt, tlb_tag2_updt, tlb_tag3_updt : std_logic;
+  signal tlb_tag4_updt, tlb_tag5_updt, tlb_tag6_updt, tlb_tag7_updt : std_logic;
   signal tlb_dat0_updt, tlb_dat1_updt, tlb_dat2_updt, tlb_dat3_updt : std_logic;
-  signal hit_pc, hit0_pc, hit1_pc, hit2_pc, hit3_pc : std_logic;
-  signal tlb_a0_pc, tlb_a1_pc, tlb_a2_pc : std_logic;
-  signal hit_mm, hit0_mm, hit1_mm, hit2_mm, hit3_mm : std_logic;
-  signal tlb_a0_mm, tlb_a1_mm, tlb_a2_mm : std_logic;
-  signal tlb_adr_pc, tlb_adr_mm, probe_adr : MMU_idx_bits;
-  signal tlb_probe, probe_hit : std_logic;
+  signal tlb_dat4_updt, tlb_dat5_updt, tlb_dat6_updt, tlb_dat7_updt : std_logic;
+  signal hit0_pc, hit1_pc, hit2_pc, hit3_pc, hit_pc : boolean;
+  signal hit4_pc, hit5_pc, hit6_pc, hit7_pc : boolean;
+  signal hit0_mm, hit1_mm, hit2_mm, hit3_mm, hit_mm : boolean;
+  signal hit4_mm, hit5_mm, hit6_mm, hit7_mm : boolean;
+  signal hit_mm_v, hit_mm_d, hit_pc_v : std_logic;
+  signal tlb_adr_mm : MMU_idx_bits;
+  signal tlb_probe, probe_hit, hit_mm_bit : std_logic;
   signal mm : std_logic_vector(VA_HI_BIT downto VA_LO_BIT);
-  signal tlb_adr : natural range 0 to (MMU_CAPACITY - 1);
-  signal tlb_ppn : std_logic_vector(PPN_BITS - 1 downto 0);
-  
-  signal tlb_tag_inp, tlb_tag0, tlb_tag1, tlb_tag2, tlb_tag3 : reg32;
-  signal tlb_dat_inp, tlb_dat0, tlb_dat1, tlb_dat2, tlb_dat3 : reg28;
-  signal tlb_entryLo0, tlb_entryLo1, phy_i_addr : reg32;
+  signal tlb_adr,tlb_a0_pc,tlb_a1_pc,tlb_a2_pc : natural range 0 to (MMU_CAPACITY-1);
+  signal hit_pc_adr, hit_mm_adr : natural range 0 to (MMU_CAPACITY-1);
+  signal tlb_a0_mm,tlb_a1_mm,tlb_a2_mm : natural range 0 to (MMU_CAPACITY-1);
+  signal tlb_ppn_pc0,tlb_ppn_pc1 : mmu_dat_reg;
+  signal tlb_ppn_mm0,tlb_ppn_mm1 : mmu_dat_reg;
+  signal tlb_ppn_mm, tlb_ppn_pc, tlb_ppn : std_logic_vector(PPN_BITS - 1 downto 0);
+  
+  signal tlb_tag0, tlb_tag1, tlb_tag2, tlb_tag3, tlb_tag_inp : reg32;
+  signal tlb_tag4, tlb_tag5, tlb_tag6, tlb_tag7, e_hi, e_hi_inp : reg32;
+  signal tlb_dat0_inp, tlb_dat1_inp, e_lo0, e_lo1 : mmu_dat_reg;
+  signal tlb_dat0_0, tlb_dat1_0, tlb_dat2_0, tlb_dat3_0 : mmu_dat_reg;
+  signal tlb_dat0_1, tlb_dat1_1, tlb_dat2_1, tlb_dat3_1 : mmu_dat_reg;
+  signal tlb_dat4_0, tlb_dat5_0, tlb_dat6_0, tlb_dat7_0 : mmu_dat_reg;
+  signal tlb_dat4_1, tlb_dat5_1, tlb_dat6_1, tlb_dat7_1 : mmu_dat_reg;
+
+  signal tlb_entryLo0, tlb_entryLo1, phy_i_addr, phy_d_addr : reg32;
+  signal tlb_context_inp : std_logic_vector(VABITS-1 downto PAGE_SZ_BITS+1);
   
   -- other components ------------ 
   
@@ -225,12 +242,12 @@ architecture rtl of core is
          Q:            out std_logic_vector);
   end component register32;
 
-  component register28 is
-    generic (INITIAL_VALUE: std_logic_vector);
+  component registerN is
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
-  end component register28;
+  end component registerN;
 
   component counter32 is
     generic (INITIAL_VALUE: std_logic_vector);
@@ -313,7 +330,7 @@ architecture rtl of core is
 
   -- data memory --
   signal rd_data_raw, rd_data, WB_rd_data, WB_mem_data: reg32;
-  signal MM_B_data, WB_B_data: reg32;
+  signal MM_B_data, WB_B_data, d_addr_pre: reg32;
   signal jr_stall, br_stall, fwd_lwlr, sw_stall : std_logic;
   signal fwd_mem, WB_addr2: reg2;
 
@@ -470,23 +487,23 @@ architecture rtl of core is
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--29
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--30
     ('1','1',SPEC3,'0','0','0',opSPC,"001","00", '0', "00",cNOP,"00"),--special3
-    ('0','1',LB,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lb=32
-    ('0','1',LH,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lh=33
-    ('0','1',LWL,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lwl=34
-    ('0','1',LW,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lw=35
-    ('0','1',LBU,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lbu=36
-    ('0','1',LHU,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lhu=37
-    ('0','1',LWR,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--lwr=38
+    ('0','1',LB,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lb=32
+    ('0','1',LH,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lh=33
+    ('0','1',LWL,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lwl=34
+    ('0','1',LW,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lw=35
+    ('0','1',LBU,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lbu=36
+    ('0','1',LHU,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lhu=37
+    ('0','1',LWR,  '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--lwr=38
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--39
-    ('0','0',SB,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"00"),--sb=40
-    ('0','0',SH,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"00"),--sh=41
+    ('0','0',SB,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sb=40
+    ('0','0',SH,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sh=41
     ('1','1',NIL,  '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swl=42
-    ('0','0',SW,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"00"),--sw=43
+    ('0','0',SW,   '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sw=43
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--44
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--45
     ('1','1',NIL,  '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swr=46
     ('1','1',NIL,  '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--cache=47
-    ('0','1',LL,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"00"),--ll=48
+    ('0','1',LL,   '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--ll=48
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--lwc1=49
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--lwc2=50
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--pref=51
@@ -494,7 +511,7 @@ architecture rtl of core is
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--ldc1=53
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--ldc2=54
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--55
-    ('0','0',SC,   '0','1','0',opADD,"111","01", '1', "00",cNOP,"00"),--sc=56
+    ('0','0',SC,   '0','1','0',opADD,"111","01", '1', "00",cNOP,"11"),--sc=56
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swc1=57
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swc2=58
     ('1','1',NIL,  '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--59
@@ -740,9 +757,14 @@ begin
   -- U_INCPC: adder32 port map (x"00000004", PC_aligned, PCincd);
   -- PCincd <= std_logic_vector( 4 + signed(PC_aligned) );
   U_INCPC: mf_alt_add_4 PORT MAP( datab => PC_aligned, result => PCincd );
-  
+
+
+  -- uncomment this when NOT making use of the TLB
   i_addr <= PC_aligned;    -- fetch instruction from aligned address
 
+  -- uncomment this when making use of the TLB
+  -- i_addr <= phy_i_addr;
+  
   abort <= MM_abort or WB_abort;
   
   instr_fetched <= instr when (nullify = '0' and abort = '0'
@@ -1202,7 +1224,11 @@ begin
   begin
     FORWARD_A:
     if ((MM_wreg = '0')and(MM_a_c /= b"00000")and(MM_a_c = EX_a_rs)) then
-      i_A := MM_result;
+      if MM_mfc0 /= '1' then
+        i_A := MM_result;
+      else 
+        i_A := MM_cop0_val;
+      end if;
     elsif ((WB_wreg = '0')and(WB_a_c /= b"00000")and(WB_a_c = EX_a_rs)) then
       i_A := WB_C;
     else
@@ -1214,7 +1240,11 @@ begin
 
     FORWARD_B:
     if ((MM_wreg = '0')and(MM_a_c /= b"00000")and(MM_a_c = EX_a_rt)) then
-      i_B := MM_result;
+      if MM_mfc0 /= '1' then
+        i_B := MM_result;
+      else 
+        i_B := MM_cop0_val;
+      end if;
     elsif ((WB_wreg = '0')and(WB_a_c /= b"00000")and(WB_a_c = EX_a_rt)) then
       i_B := WB_C;
     else
@@ -1243,8 +1273,14 @@ begin
   EX_aVal_cond <= EX_aVal
                   or nullify_EX         -- abort ref if previous excep in EX
                   or abort;             -- abort ref if exception in MEM
+
+
+  -- this adder performs address calculation so the TLB can be checked during
+  --   EX and signal any exception as early as possible
+  U_VIR_ADDR_ADD: mf_alt_adder port map (alu_inp_A, EX_displ32, v_addr);
   
 
+  
   -- ----------------------------------------------------------------------
   PIPESTAGE_EX_MM: reg_EX_MM
     port map (clk,rst, EX_MM_ld,
@@ -1274,7 +1310,12 @@ begin
   rd_data_raw <= data_inp when (MM_wrmem = '1' and MM_aVal = '0') else
                  (others => 'X');
   
+  d_addr <= d_addr_pre;  -- without TLB
+
+  -- d_addr <= phy_d_addr;                 -- with TLB
+  
   MM_MEM_INTERFACE: process(MM_mem_t,MM_aVal,MM_wrmem, MM_result, rd_data_raw)
+    variable i_d_addr : reg32;
     variable bytes_read : reg32;
     variable i_byte_sel : reg4;
     variable i_byte : reg8;
@@ -1291,7 +1332,7 @@ begin
       when b"11" =>
         i_byte_sel := b"1111";              -- LW, SW, LWL, LWR
         bytes_read := rd_data_raw;
-        d_addr     <= MM_result(31 downto 2) & b"00";   -- align reference
+        i_d_addr   := MM_result(31 downto 2) & b"00";   -- align reference
         
         if ( MM_mem_t(3) = '0' and          -- normal LOAD, not LWL,LWR
              MM_aVal = '0' and MM_result(1 downto 0) /= b"00" ) then
@@ -1305,7 +1346,7 @@ begin
         end if;
 
       when b"10" =>
-        d_addr     <= MM_result(31 downto 1) & '0' ;    -- align reference
+        i_d_addr     := MM_result(31 downto 1) & '0' ;    -- align reference
         if MM_result(1) = '0' then                      -- LH*, SH
           i_byte_sel := b"0011";
           i_half     := rd_data_raw(15 downto 0);
@@ -1330,7 +1371,7 @@ begin
         end if;
         
       when b"01" =>                                     -- LB*, SB
-        d_addr     <= MM_result;
+        i_d_addr := MM_result;
         case MM_result(1 downto 0) is
           when b"00"  => i_byte_sel := b"0001";
                          i_byte     := rd_data_raw(7  downto  0);
@@ -1349,13 +1390,15 @@ begin
         -- MM_addr_error <= exNOP;
         
       when others =>
-        d_addr     <= "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";  -- MM_result;
+        i_d_addr   := "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";  -- MM_result;
         i_byte_sel := b"0000";
         bytes_read := (others => 'X');
         -- MM_addr_error <= exNOP;
 
     end case;
 
+    d_addr_pre <= i_d_addr;
+
     b_sel    <= i_byte_sel;
     rd_data  <= bytes_read;
 
@@ -1569,7 +1612,7 @@ begin
 
   -- ----------------------------------------------------------------------    
   PIPESTAGE_EXCP_RF_EX: reg_excp_RF_EX
-    port map (clk, rst, excp_RF_EX_ld, can_trap,EX_can_trap,
+    port map (clk, rst, excp_RF_EX_ld, can_trap,EX_can_trap, 
               exception,EX_exception, trap_instr,EX_trap_instr, 
               cop0_reg,EX_cop0_reg, cop0_sel,EX_cop0_sel,
               next_instr_in_delay_slot,EX_is_delayslot,
@@ -1582,14 +1625,18 @@ begin
 
   -- check for overflow in EX, send it to MM for later processing
   ex_trapped <= '1' when (EX_can_trap = b"10" and ovfl = '1') else '0';
+
   
   is_exception <= exOvfl when MM_ex_trapped = '1' else EX_exception;
+
+  
   
   COP0_DECODE_EXCEPTION_AND_UPDATE_STATUS:
-  process (rst, EX_a_rt, EX_PC, is_exception, EX_trap_instr,
+  process (EX_a_rt, is_exception, EX_trap_instr, EX_wreg,
            EX_cop0_reg, EX_cop0_sel, EX_nmi, EX_interrupt,EX_int_req,
            EX_is_delayslot, cop0_inp, EX_tr_is_equal, EX_tr_less_than,
-           COUNT, COMPARE, STATUS, CAUSE, EPC, BadVAddr,
+           INDEX, RANDOM, EntryLo0, EntryLo1, CONTEXT, PAGEMASK, WIRED,
+           EntryHi, COUNT, COMPARE, STATUS, CAUSE, EPC, BadVAddr,
            rom_stall,ram_stall)
     
     variable newSTATUS, i_COP0_rd : reg32;
@@ -1621,7 +1668,9 @@ begin
     ExcCode         <= cop0code_NULL;
     BadVAddr_source <= '0';
     BadVAddr_update <= '1';
+    EX_mfc0         <= '0';
 
+    newSTATUS             := STATUS;    -- preserve as needed
     newSTATUS(STATUS_BEV) := '0';  -- interrupts at offset 0x200
     newSTATUS(STATUS_CU3) := '0';  -- COP-3 absent (always)
     newSTATUS(STATUS_CU2) := '0';  -- COP-2 absent (always)
@@ -1695,6 +1744,7 @@ begin
         end case;
         i_a_c   := EX_a_rt;
         i_stall := '0';
+        EX_mfc0 <= '1';
 
       when exERET =>            -- exception return
         i_update     := '1';
@@ -1920,7 +1970,7 @@ begin
     if STATUS(STATUS_EXL) = '0' then
       newCAUSE(CAUSE_BD)   := EX_is_delayslot;  -- instr is in delay slot
     else
-      newCAUSE(CAUSE_BD)   := CAUSE(CAUSE_BD);
+      newCAUSE(CAUSE_BD)   := CAUSE(CAUSE_BD);  -- hold it in a double fault
     end if;
     newCAUSE(CAUSE_TI)     := count_eq_compare;
     newCAUSE(CAUSE_CE1)    := '0';
@@ -2054,19 +2104,31 @@ begin
   
   -- MMU-TLB ===========================================================
 
-  -- MMU Index -----------------------------------
+  -- MMU Index -- cop0_0 -------------------------
 
   index_update <= '0' when (update = '1' and update_reg = cop0reg_Index)
                   else not(tlb_probe);
+
+  hit_mm_bit <= '0' when (hit_mm = TRUE) else '1';
   
-  index_inp <= not(hit_mm) & MMU_IDX_0s & tlb_adr_mm when tlb_probe = '1' else 
-               '0' & MMU_IDX_0s & cop0_inp(MMU_CAPACITY_BITS-1 downto 0);
+  with hit_mm_adr select tlb_adr_mm <= "000" when 0,
+                                       "001" when 1,
+                                       "010" when 2,
+                                       "011" when 3,
+                                       "100" when 4,
+                                       "101" when 5,
+                                       "110" when 6,
+                                       "111" when 7,
+                                       "XXX" when others;
+  
+  index_inp  <= hit_mm_bit & MMU_IDX_0s & tlb_adr_mm when tlb_probe = '1' else 
+                hit_mm_bit & MMU_IDX_0s & cop0_inp(MMU_CAPACITY_BITS-1 downto 0);
 
   MMU_Index: register32 generic map(x"00000000")
     port map (clk, rst, index_update, index_inp, INDEX);
 
 
-  -- MMU Wired -----------------------------------
+  -- MMU Wired -- cop0_6 -- pg 72 ----------------
 
   wired_update <= '0' when (update = '1' and update_reg = cop0reg_Wired)
                   else '1';
@@ -2076,7 +2138,8 @@ begin
   MMU_Wired: register32 generic map(MMU_WIRED_INIT)
     port map (clk, rst, wired_update, wired_inp, WIRED);
 
-  -- MMU Random ----------------------------------
+  
+  -- MMU Random -- cop0_1 ------------------------
 
   MMU_Random: process(clk, rst, WIRED, wired_update)
     variable count : integer range -1 to MMU_CAPACITY-1;
@@ -2092,7 +2155,8 @@ begin
     RANDOM <= std_logic_vector(to_signed(count, 32));
   end process MMU_Random;
 
-  -- MMU EntryLo0 -- pg 63 ----------------------
+  
+  -- MMU EntryLo0 -- pg 63 -- cop0_2 ------------
 
   entryLo0_update <= '0' when (update = '1' and update_reg = cop0reg_EntryLo0)
                   else not(tlb_read);
@@ -2102,7 +2166,8 @@ begin
   MMU_EntryLo0: register32 generic map(x"00000000")
     port map (clk, rst, entryLo0_update, entryLo0_inp, EntryLo0);
 
-  -- MMU EntryLo1 -- pg 63 ----------------------  
+
+  -- MMU EntryLo1 -- pg 63 -- cop0_3 ------------  
   
   entryLo1_update <= '0' when (update = '1' and update_reg = cop0reg_EntryLo1)
                   else not(tlb_read);
@@ -2112,17 +2177,37 @@ begin
   MMU_EntryLo1: register32 generic map(x"00000000")
     port map (clk, rst, entryLo1_update, entryLo1_inp, EntryLo1);
 
-  -- MMU Context -- pg 67 ----------------------  
+
+  -- MMU Context -- pg 67 -- cop0_4 ------------
+
+  context_upd_pte <= '0' when (update = '1' and update_reg = cop0reg_Context)
+                     else '1';
+
+  context_upd_bad <= '0' when (not(hit_mm) or not(hit_pc)) else '1';
+
+  assert true                          -- DEBUG
+    report "pgSz " & integer'image(PAGE_SZ_BITS) &
+           " va-1 " & integer'image(VABITS-1) &
+           " pg+1 " & integer'image(PAGE_SZ_BITS+1) &
+           " add " & integer'image(VABITS-1 - PAGE_SZ_BITS+1) &
+           " lef " & integer'image( PC(VABITS-1 downto PAGE_SZ_BITS+1)'left )&
+           " rig " & integer'image( PC(VABITS-1 downto PAGE_SZ_BITS+1)'right );
   
-  context_update <= '0' when (update = '1' and update_reg = cop0reg_Context)
-                  else '1';
+  tlb_context_inp <= v_addr(VABITS-1 downto PAGE_SZ_BITS+1) when not(hit_mm) else
+                     PC(VABITS-1 downto PAGE_SZ_BITS+1) when not(hit_pc) else
+                     (others => 'X');
   
-  context_inp <= cop0_inp when tlb_read = '0' else tlb_context_mm;
+  MMU_ContextPTE: registerN generic map(9, b"000000000")
+    port map (clk, rst, context_upd_pte,
+              cop0_inp(31 downto 23), Context(31 downto 23));
   
-  MMU_Context: register32 generic map(x"00000000")
-    port map (clk, rst, context_update, context_inp, Context);
+  MMU_ContextBAD: registerN generic map(19, b"0000000000000000000")
+    port map (clk, rst, context_upd_bad, tlb_context_inp, Context(22 downto 4));
+
+  Context(3 downto 0) <= b"0000";
 
-  -- MMU Pagemask -----------------------------  
+  
+  -- MMU Pagemask -- cop0_5 -- pg 68 ----------- 
   -- page size is fixed = 4k, thus PageMask is not register
   
   -- pageMask_update <= '0' when (update='1' and update_reg=cop0reg_PageMask)
@@ -2133,9 +2218,10 @@ begin
   -- MMU_PageMask: register32 generic map(x"00000000")
   --  port map (clk, rst, pageMask_update, pageMask_inp, PageMask);
 
-  PageMask <= x"00001800";              -- pg 68
+  PageMask <= mmu_PageMask;
 
-  -- MMU EntryHi -- pg 76 ----------------------  
+  
+  -- MMU EntryHi -- pg 76 -- cop0_10 -----------  
   -- EntryHi holds the ASID of the current process, to check for a match
   
   entryHi_update <= '0' when (update = '1' and update_reg = cop0reg_EntryHi)
@@ -2146,27 +2232,91 @@ begin
   MMU_EntryHi: register32 generic map(x"00000000")
     port map (clk, rst, entryHi_update, entryHi_inp, EntryHi);
 
+
+  MMU_exceptions: process(EX_mem_t, EX_wrmem, hit_mm, hit_pc,  -- pg 41 --
+                          hit_mm_v, hit_mm_d, hit_pc_v, STATUS)
+    variable i_exception : exception_type;
+  begin
+
+    -- check for InstructionFetches; if something happens in MM, that
+    --   takes priority over IF (i_exception is overwritten later)
+    
+    -- now check for events later in the pipeline: LOADS and STORES
+    
+    if EX_mem_t /= b"0000" then
+
+      if hit_mm = FALSE then     -- miss, check for TLBrefill or TLBdoubleFault
+
+        if STATUS(STATUS_EXL) = '1' then
+          i_exception := exTLBdblFault;
+        else
+          i_exception := exTLBrefill;
+        end if;
+
+      elsif hit_mm_v = '0' then  -- hit; check for TLBinvalid
+        
+        i_exception := exTLBinval;
+
+      elsif EX_wrmem = '0' and hit_mm_d = '0' then
+
+        i_exception := exTLBmod;
+
+      else
+
+        i_exception := exNOP;
+        
+      end if;
+        
+    elsif hit_pc = false then
+
+      if STATUS(STATUS_EXL) = '1' then
+        i_exception := exTLBdblFault;
+      else
+        i_exception := exTLBrefill;
+      end if;
+
+    elsif hit_pc_v = '0' then  -- hit; check for TLBinvalid
+        
+      i_exception := exTLBinval;
+
+    else
+
+      i_exception := exNOP;        
+        
+    end if;
+
+    MM_excp_TLB <= i_exception;
+
+  end process MMU_exceptions; -- -----------------------------------------
+
+
   
-  -- MMU TLB DATA array ------------------------
+  
+  -- MMU TLB TAG-DATA array -- pg 17 ------------------------------------
 
-  -- TLB_tag: 31..10 = VPN, 9 = 0, 8 = G, 7..0 = ASID
-  -- TLB_dat: 27..6 = PPN, 5..3 = C, 2 = D, 1 = V, 0 = G
+  -- TLB_tag: 31..13 = VPN, 12..9 = 0, 8 = G, 7..0 = ASID
+  -- TLB_dat: 29..6 = PPN, 5..3 = C, 2 = D, 1 = V, 0 = G
   
-  MMU_CONTROL: process(rst, clk, EX_exception, hit_mm, tlb_adr_mm,
-                          INDEX, RANDOM, EntryHi, EntryLo0, -- EntryLo1,
-                          tlb_tag0, tlb_tag1, tlb_tag2, tlb_tag3)
-    variable e_hi : reg32;
-    variable e_lo : reg28;
+  MMU_CONTROL: process(EX_exception, INDEX, RANDOM)
+    variable i_tlb_adr : integer range MMU_CAPACITY-1 downto 0;
   begin
 
     tlb_tag0_updt <= '1';
     tlb_tag1_updt <= '1';
     tlb_tag2_updt <= '1';
     tlb_tag3_updt <= '1';
+    tlb_tag4_updt <= '1';
+    tlb_tag5_updt <= '1';
+    tlb_tag6_updt <= '1';
+    tlb_tag7_updt <= '1';
     tlb_dat0_updt <= '1';
     tlb_dat1_updt <= '1';
     tlb_dat2_updt <= '1';
     tlb_dat3_updt <= '1';
+    tlb_dat4_updt <= '1';
+    tlb_dat5_updt <= '1';
+    tlb_dat6_updt <= '1';
+    tlb_dat7_updt <= '1';
     tlb_read      <= '0';
     tlb_probe     <= '0';
 
@@ -2177,57 +2327,28 @@ begin
 
       when exTLBR => 
 
-        tlb_read <= '1';
-        tlb_adr  <= to_integer(unsigned(INDEX));
- 
-        case tlb_adr is
-          when 0 => e_hi := tlb_tag0; e_lo := tlb_dat0;
-          when 1 => e_hi := tlb_tag1; e_lo := tlb_dat1;
-          when 2 => e_hi := tlb_tag2; e_lo := tlb_dat2;
-          when 3 => e_hi := tlb_tag3; e_lo := tlb_dat3;
-          when others => null;
-        end case;
-
-        -- assert false
-        -- report "e_hi="&SLV32HEX(e_hi)&" adr="&natural'image(tlb_adr);--DEBUG
-
-        tlb_entryLo0(31 downto ELO_AHI_BIT+1) <= (others => '0');
-        tlb_entryLo0(ELO_AHI_BIT downto ELO_ALO_BIT)
-          <= e_lo(DAT_AHI_BIT downto DAT_ALO_BIT);
-        tlb_entryLo0(ELO_CHI_BIT  downto ELO_CLO_BIT)
-          <= e_lo(DAT_CHI_BIT  downto DAT_CLO_BIT);
-        tlb_entryLo0(ELO_D_BIT) <= e_lo(DAT_D_BIT);
-        tlb_entryLo0(ELO_V_BIT) <= e_lo(DAT_V_BIT);
-        tlb_entryLo0(ELO_G_BIT) <= e_lo(DAT_G_BIT);
-
-        tlb_entryhi(EHI_AHI_BIT downto EHI_ALO_BIT)
-          <= e_hi(TAG_AHI_BIT downto TAG_ALO_BIT);
-        tlb_entryhi(EHI_ALO_BIT-1 downto EHI_ASIDHI_BIT+1) <= (others => '0');
-        tlb_entryhi(EHI_ASIDHI_BIT downto EHI_ASIDLO_BIT)
-          <= e_hi(TAG_ASIDHI_BIT downto TAG_ASIDLO_BIT);
+        tlb_read  <= '1';
+        i_tlb_adr := to_integer(unsigned(INDEX(MMU_CAPACITY-1 downto 0)));
 
       when exTLBWI | exTLBWR => 
 
-        e_hi := EntryHi;
-        e_hi(TAG_G_BIT) := EntryLo0(ELO_G_BIT);
-        e_hi(TAG_Z_BIT) := '0';
-        tlb_tag_inp <= e_hi;
-
-        e_lo := EntryLo0(ELO_AHI_BIT downto ELO_G_BIT);
-        tlb_dat_inp <= e_lo;
-
-        
         case EX_exception is
-          when exTLBWI => tlb_adr <= to_integer(unsigned(INDEX));
-          when exTLBWR => tlb_adr <= to_integer(unsigned(RANDOM));
+          when exTLBWI =>
+            i_tlb_adr := to_integer(unsigned(INDEX(MMU_CAPACITY-1 downto 0)));
+          when exTLBWR =>
+            i_tlb_adr := to_integer(unsigned(RANDOM));
           when others => null;
         end case;
 
-        case tlb_adr is
+        case i_tlb_adr is
           when 0 => tlb_tag0_updt <= '0'; tlb_dat0_updt <= '0';
           when 1 => tlb_tag1_updt <= '0'; tlb_dat1_updt <= '0';
           when 2 => tlb_tag2_updt <= '0'; tlb_dat2_updt <= '0';
           when 3 => tlb_tag3_updt <= '0'; tlb_dat3_updt <= '0';
+          when 4 => tlb_tag4_updt <= '0'; tlb_dat4_updt <= '0';
+          when 5 => tlb_tag5_updt <= '0'; tlb_dat5_updt <= '0';
+          when 6 => tlb_tag6_updt <= '0'; tlb_dat6_updt <= '0';
+          when 7 => tlb_tag7_updt <= '0'; tlb_dat7_updt <= '0';
           when others => null;
         end case;
           
@@ -2235,119 +2356,347 @@ begin
 
     end case;    
 
-  end process MMU_CONTROL;
+    tlb_adr <= i_tlb_adr;
+    
+  end process MMU_CONTROL;  ------------------------------------------------
+
+
+  with tlb_adr select
+    e_hi <= tlb_tag0 when 0,
+            tlb_tag1 when 1,
+            tlb_tag2 when 2,
+            tlb_tag3 when 3,
+            tlb_tag4 when 4,
+            tlb_tag5 when 5,
+            tlb_tag6 when 6,
+            tlb_tag7 when others;
+
+  with tlb_adr select
+    e_lo0 <= tlb_dat0_0 when 0,
+             tlb_dat1_0 when 1,
+             tlb_dat2_0 when 2,
+             tlb_dat3_0 when 3,
+             tlb_dat4_0 when 4,
+             tlb_dat5_0 when 5,
+             tlb_dat6_0 when 6,
+             tlb_dat7_0 when others;
+
+  with tlb_adr select
+    e_lo1 <= tlb_dat0_1 when 0,
+             tlb_dat1_1 when 1,
+             tlb_dat2_1 when 2,
+             tlb_dat3_1 when 3,
+             tlb_dat4_1 when 4,
+             tlb_dat5_1 when 5,
+             tlb_dat6_1 when 6,
+             tlb_dat7_1 when others;
+  
+  -- assert false
+  -- report "e_hi="&SLV32HEX(e_hi)&" adr="&natural'image(tlb_adr);--DEBUG
+  
+  -- tlb_entryhi(EHI_AHI_BIT downto EHI_ALO_BIT)
+  tlb_entryhi(31 downto PAGE_SZ_BITS + 1)
+    <= e_hi(TAG_AHI_BIT downto TAG_ALO_BIT);
+  tlb_entryhi(PAGE_SZ_BITS downto EHI_ASIDHI_BIT+1) <= (others => '0');
+  tlb_entryhi(EHI_ASIDHI_BIT downto EHI_ASIDLO_BIT)
+    <= e_hi(TAG_ASIDHI_BIT downto TAG_ASIDLO_BIT);
+
+  tlb_entryLo0(31 downto ELO_AHI_BIT+1) <= (others => '0');
+  tlb_entryLo0(ELO_AHI_BIT downto ELO_ALO_BIT)
+    <= e_lo0(DAT_AHI_BIT downto DAT_ALO_BIT);
+  tlb_entryLo0(ELO_CHI_BIT  downto ELO_CLO_BIT)
+    <= e_lo0(DAT_CHI_BIT  downto DAT_CLO_BIT);
+  tlb_entryLo0(ELO_D_BIT) <= e_lo0(DAT_D_BIT);
+  tlb_entryLo0(ELO_V_BIT) <= e_lo0(DAT_V_BIT);
+  tlb_entryLo0(ELO_G_BIT) <= e_lo0(DAT_G_BIT);
+  
+  tlb_entryLo1(31 downto ELO_AHI_BIT+1) <= (others => '0');
+  tlb_entryLo1(ELO_AHI_BIT downto ELO_ALO_BIT)
+    <= e_lo1(DAT_AHI_BIT downto DAT_ALO_BIT);
+  tlb_entryLo1(ELO_CHI_BIT  downto ELO_CLO_BIT)
+    <= e_lo1(DAT_CHI_BIT  downto DAT_CLO_BIT);
+  tlb_entryLo1(ELO_D_BIT) <= e_lo1(DAT_D_BIT);
+  tlb_entryLo1(ELO_V_BIT) <= e_lo1(DAT_V_BIT);
+  tlb_entryLo1(ELO_G_BIT) <= e_lo1(DAT_G_BIT);
+
+
+  e_hi_inp <= EntryHi;
+  e_hi_inp(TAG_G_BIT) <= EntryLo0(ELO_G_BIT) and EntryLo1(ELO_G_BIT);  -- pg64
+  e_hi_inp(TAG_Z_BIT) <= '0';
+
+  tlb_tag_inp <= e_hi_inp;
+
+  tlb_dat0_inp <= EntryLo0(ELO_AHI_BIT downto ELO_G_BIT);
+        
+  tlb_dat1_inp <= EntryLo1(ELO_AHI_BIT downto ELO_G_BIT);
+
 
   
-  -- MMU TLB TAG array -------------------------
+  -- MMU TLB TAG+DATA array -------------------------
+
+  mm <= entryHi(EHI_AHI_BIT downto EHI_ALO_BIT) when tlb_probe = '1' else
+        v_addr(VA_HI_BIT downto VA_LO_BIT);
 
-  mm <= entryHi(VABITS-1 downto PAGE_SZ_BITS) when tlb_probe = '1' else
-        MM_result(VABITS-1 downto PAGE_SZ_BITS);
 
-  MMU_TAG0: register32 generic map(x"00000000")
+  -- TLB entry 0 -- initialized to 1st page of ROM
+  
+  MMU_TAG0: register32 generic map(MMU_ini_tag_ROM0)
     port map (clk, rst, tlb_tag0_updt, tlb_tag_inp, tlb_tag0);
 
-  hit0_pc <= BOOL2SL(
-    tlb_tag0(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag0(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  MMU_DAT0_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM0)
+    port map (clk, rst, tlb_dat0_updt, tlb_dat0_inp, tlb_dat0_0);  -- d=1,v=1,g=1
+  MMU_DAT0_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM1)
+    port map (clk, rst, tlb_dat0_updt, tlb_dat1_inp, tlb_dat0_1);  -- d=1,v=1,g=1
+
+  hit0_pc <= TRUE when (tlb_tag0(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag0(TAG_G_BIT) = '1') OR
+                              tlb_tag0(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  hit0_mm <= TRUE when (tlb_tag0(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag0(TAG_G_BIT) = '1') OR
+                              tlb_tag0(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+
+  -- TLB entry 1 -- initialized to 1st page of ROM
+
+  MMU_TAG1: register32 generic map(MMU_ini_tag_ROM2)
+    port map (clk, rst, tlb_tag1_updt, tlb_tag_inp, tlb_tag1);
+
+  MMU_DAT1_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM2)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat1_updt, tlb_dat0_inp, tlb_dat1_0);
+  MMU_DAT1_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM3)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat1_updt, tlb_dat1_inp, tlb_dat1_1);
+
+  hit1_pc <= TRUE when (tlb_tag1(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag1(TAG_G_BIT) = '1') OR
+                              tlb_tag1(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  hit1_mm <= TRUE when (tlb_tag1(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag1(TAG_G_BIT) = '1') OR
+                              tlb_tag1(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
-  hit0_mm <= BOOL2SL(
-    tlb_tag0(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag0(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
 
-  MMU_DAT0: register28 generic map(x"0000012")  -- PPN=0, v=1
-    port map (clk, rst, tlb_dat0_updt, tlb_dat_inp, tlb_dat0);
   
+  -- TLB entry 2 -- initialized to 3rd page of ROM
   
+  MMU_TAG2: register32 generic map(MMU_ini_tag_ROM4)
+    port map (clk, rst, tlb_tag2_updt, tlb_tag_inp, tlb_tag2);
+
+  MMU_DAT2_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM4)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat2_updt, tlb_dat0_inp, tlb_dat2_0);
+  MMU_DAT2_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM5)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat2_updt, tlb_dat1_inp, tlb_dat2_1);
+
+  hit2_pc <= TRUE when (tlb_tag2(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag2(TAG_G_BIT) = '1') OR
+                              tlb_tag2(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  hit2_mm <= TRUE when (tlb_tag2(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag2(TAG_G_BIT) = '1') OR
+                              tlb_tag2(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+
+
+  -- TLB entry 3 -- initialized to 4th page of ROM
   
-  MMU_TAG1: register32 generic map(x"00000400")
-    port map (clk, rst, tlb_tag1_updt, tlb_tag_inp, tlb_tag1);
+  MMU_TAG3: register32 generic map(MMU_ini_tag_ROM6)
+    port map (clk, rst, tlb_tag3_updt, tlb_tag_inp, tlb_tag3);
+
+  MMU_DAT3_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM6)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat3_updt, tlb_dat0_inp, tlb_dat3_0);
+  MMU_DAT3_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_ROM7)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat3_updt, tlb_dat1_inp, tlb_dat3_1);
 
-  hit1_pc <= BOOL2SL(
-    tlb_tag1(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag1(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  hit3_pc <= TRUE when (tlb_tag3(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag3(TAG_G_BIT) = '1') OR
+                              tlb_tag3(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
-  hit1_mm <= BOOL2SL(
-    tlb_tag1(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag1(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  hit3_mm <= TRUE when (tlb_tag3(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag3(TAG_G_BIT) = '1') OR
+                              tlb_tag3(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
-  MMU_DAT1: register28 generic map(x"0000052")  -- PPN=1, v=1
-    port map (clk, rst, tlb_dat1_updt, tlb_dat_inp, tlb_dat1);
 
   
-  MMU_TAG2: register32 generic map(x"00000800")
-    port map (clk, rst, tlb_tag2_updt, tlb_tag_inp, tlb_tag2);
+  -- TLB entry 4 -- initialized to 1st page of RAM
+
+  MMU_TAG4: register32 generic map(MMU_ini_tag_RAM0)
+    port map (clk, rst, tlb_tag4_updt, tlb_tag_inp, tlb_tag4);
 
-  hit2_pc <= BOOL2SL(
-    tlb_tag2(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag2(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  MMU_DAT4_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM0)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat4_updt, tlb_dat0_inp, tlb_dat4_0);
+  MMU_DAT4_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM1)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat4_updt, tlb_dat1_inp, tlb_dat4_1);
 
-  hit2_mm <= BOOL2SL(
-    tlb_tag2(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag2(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  hit4_pc <= TRUE when (tlb_tag4(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag4(TAG_G_BIT) = '1') OR
+                              tlb_tag4(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  hit4_mm <= TRUE when (tlb_tag4(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag4(TAG_G_BIT) = '1') OR
+                              tlb_tag4(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+
+  
+  -- TLB entry 5 -- initialized to 2nd page of RAM
   
-  MMU_DAT2: register28 generic map(x"0000092")  -- PPN=2, v=1
-    port map (clk, rst, tlb_dat2_updt, tlb_dat_inp, tlb_dat2);
+  MMU_TAG5: register32 generic map(MMU_ini_tag_RAM2)
+    port map (clk, rst, tlb_tag5_updt, tlb_tag_inp, tlb_tag5);
 
+  MMU_DAT5_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM2)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat5_updt, tlb_dat0_inp, tlb_dat5_0);
+  MMU_DAT5_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM3)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat5_updt, tlb_dat1_inp, tlb_dat5_1);
+
+  hit5_pc <= TRUE when (tlb_tag5(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag5(TAG_G_BIT) = '1') OR
+                              tlb_tag5(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  hit5_mm <= TRUE when (tlb_tag5(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag5(TAG_G_BIT) = '1') OR
+                              tlb_tag5(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+
+
+  -- TLB entry 6 -- initialized to top of RAM =  stack
   
-  MMU_TAG3: register32 generic map(x"00000c00")
-    port map (clk, rst, tlb_tag3_updt, tlb_tag_inp, tlb_tag3);
+  MMU_TAG6: register32 generic map(MMU_ini_tag_RAM6)
+    port map (clk, rst, tlb_tag6_updt, tlb_tag_inp, tlb_tag6);
+
+  MMU_DAT6_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM6)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat6_updt, tlb_dat1_inp, tlb_dat6_0);
+  MMU_DAT6_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_RAM7)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat6_updt, tlb_dat1_inp, tlb_dat6_1);
+
+  hit6_pc <= TRUE when (tlb_tag6(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag6(TAG_G_BIT) = '1') OR
+                              tlb_tag6(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
-  hit3_pc <= BOOL2SL(
-    tlb_tag3(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag3(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
+  hit6_mm <= TRUE when (tlb_tag6(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                        and ( (tlb_tag6(TAG_G_BIT) = '1') OR
+                              tlb_tag6(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
-  hit3_mm <= BOOL2SL(
-    tlb_tag3(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT) AND
-    tlb_tag3(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0));
 
-  MMU_DAT3: register28 generic map(x"00000d2")  -- PPN=3, v=1
-    port map (clk, rst, tlb_dat3_updt, tlb_dat_inp, tlb_dat3);
+  
+  -- TLB entry 7 -- initialized to I/O page
+  
+  MMU_TAG7: register32 generic map(MMU_ini_tag_IO)
+    port map (clk, rst, tlb_tag7_updt, tlb_tag_inp, tlb_tag7);
+
+  MMU_DAT7_0: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO0)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat7_updt, tlb_dat0_inp, tlb_dat7_0);
+  MMU_DAT7_1: registerN generic map(DAT_REG_BITS, MMU_ini_dat_IO1)  -- d=1,v=1,g=1
+    port map (clk, rst, tlb_dat7_updt, tlb_dat1_inp, tlb_dat7_1);
+
+  hit7_pc <= TRUE when (tlb_tag7(VA_HI_BIT downto VA_LO_BIT) = PC(VA_HI_BIT downto VA_LO_BIT)
+                       and ( (tlb_tag7(TAG_G_BIT) = '1') OR
+                             tlb_tag7(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
 
+  hit7_mm <= TRUE when (tlb_tag7(VA_HI_BIT downto VA_LO_BIT) = mm(VA_HI_BIT downto VA_LO_BIT)
+                    and ( (tlb_tag7(TAG_G_BIT) = '1') OR
+                          tlb_tag7(ASID_HI_BIT downto 0) = EntryHi(ASID_HI_BIT downto 0) ) )
+             else FALSE;
+
+  -- end of TLB TAG+DATA ARRAY ----------------------------------------
+
+  -- select mapping for IF
   
+  tlb_a2_pc <= 4 when (hit4_pc or hit5_pc or hit6_pc or hit7_pc) else 0;
+  tlb_a1_pc <= 2 when (hit2_pc or hit3_pc or hit6_pc or hit7_pc) else 0;
+  tlb_a0_pc <= 1 when (hit1_pc or hit3_pc or hit5_pc or hit7_pc) else 0;
+  
+  hit_pc    <= hit0_pc or hit1_pc or hit2_pc or hit3_pc or
+               hit4_pc or hit5_pc or hit6_pc or hit7_pc;
+
+  hit_pc_adr <= (tlb_a2_pc + tlb_a1_pc + tlb_a0_pc);
 
-  tlb_a2_pc <= '0';
-  tlb_a1_pc <= hit2_pc or hit3_pc;
-  tlb_a0_pc <= hit1_pc or hit3_pc;
+  with hit_pc_adr select
+    tlb_ppn_pc0 <= tlb_dat0_0 when 0,
+                   tlb_dat1_0 when 1,
+                   tlb_dat2_0 when 2,
+                   tlb_dat3_0 when 3,
+                   tlb_dat4_0 when 4,
+                   tlb_dat5_0 when 5,
+                   tlb_dat6_0 when 6,
+                   tlb_dat7_0 when others;
 
-  hit_pc     <= hit0_pc or hit1_pc or hit2_pc or hit3_pc;
-  tlb_adr_pc <= tlb_a1_pc & tlb_a0_pc;  -- tlb_a2_pc & 
+  with hit_pc_adr select
+    tlb_ppn_pc1 <= tlb_dat0_1 when 0,
+                   tlb_dat1_1 when 1,
+                   tlb_dat2_1 when 2,
+                   tlb_dat3_1 when 3,
+                   tlb_dat4_1 when 4,
+                   tlb_dat5_1 when 5,
+                   tlb_dat6_1 when 6,
+                   tlb_dat7_1 when others;
 
-  tlb_ppn <= tlb_dat0(DAT_AHI_BIT downto DAT_ALO_BIT) when hit0_pc = '1'
-             else (others => 'Z');
-  tlb_ppn <= tlb_dat1(DAT_AHI_BIT downto DAT_ALO_BIT) when hit1_pc = '1'
-             else (others => 'Z');
-  tlb_ppn <= tlb_dat2(DAT_AHI_BIT downto DAT_ALO_BIT) when hit2_pc = '1'
-             else (others => 'Z');
-  tlb_ppn <= tlb_dat3(DAT_AHI_BIT downto DAT_ALO_BIT) when hit2_pc = '1'
-             else (others => 'Z');
+  tlb_ppn_pc <= tlb_ppn_pc0(DAT_AHI_BIT downto DAT_ALO_BIT)
+                     when PC(PAGE_SZ_BITS) = '0'
+                else tlb_ppn_pc1(DAT_AHI_BIT downto DAT_ALO_BIT);
 
+  hit_pc_v   <= tlb_ppn_pc0(DAT_V_BIT) when PC(PAGE_SZ_BITS) = '0' else
+                tlb_ppn_pc1(DAT_V_BIT);
   
---  with "00" select --  tlb_adr_pc select
---    tlb_ppn <= tlb_dat0(DAT_AHI_BIT downto DAT_ALO_BIT) when "00",
---                tlb_dat1(DAT_AHI_BIT downto DAT_ALO_BIT) when "01",
---                tlb_dat2(DAT_AHI_BIT downto DAT_ALO_BIT) when "10",
---                tlb_dat3(DAT_AHI_BIT downto DAT_ALO_BIT) when "11",
---                (others => 'X') when others;
+  phy_i_addr <= tlb_ppn_pc(PPN_BITS-1 downto 0) & PC(PAGE_SZ_BITS-1 downto 0);
 
-  phy_i_addr <= tlb_ppn(21 downto 0) & PC(VA_LO_BIT-1 downto 0);
+
+  -- select mapping for MM
+
+  tlb_a2_mm <= 4 when (hit4_mm or hit5_mm or hit6_mm or hit7_mm) else 0;
+  tlb_a1_mm <= 2 when (hit2_mm or hit3_mm or hit6_mm or hit7_mm) else 0;
+  tlb_a0_mm <= 1 when (hit1_mm or hit3_mm or hit5_mm or hit7_mm) else 0;
+  
+  hit_mm    <= hit0_mm or hit1_mm or hit2_mm or hit3_mm or
+               hit4_mm or hit5_mm or hit6_mm or hit7_mm;
+
+  hit_mm_adr <= (tlb_a2_mm + tlb_a1_mm + tlb_a0_mm);
   
-  assert false report LF& " dathi "&integer'image(DAT_AHI_BIT) &
-                          " datlo "&integer'image(DAT_ALO_BIT) &
-                          " valo " &integer'image(VA_LO_BIT) &
-                          " ppn_b "&integer'image(PPN_BITS) &
-                          " dathl "&integer'image(DAT_AHI_BIT - DAT_ALO_BIT) &
-    " ppn_bits " &integer'image(PPN_BITS)&
-    " 2bits " &integer'image(to_integer(signed(tlb_ppn(1 downto 0))))&
-    LF & " elohi "&integer'image(ELO_AHI_BIT)&
-    " elolo "&integer'image(ELO_ALO_BIT);
+  with hit_mm_adr select
+    tlb_ppn_mm0 <= tlb_dat0_0 when 0,
+                   tlb_dat1_0 when 1,
+                   tlb_dat2_0 when 2,
+                   tlb_dat3_0 when 3,
+                   tlb_dat4_0 when 4,
+                   tlb_dat5_0 when 5,
+                   tlb_dat6_0 when 6,
+                   tlb_dat7_0 when others;
+
+  with hit_mm_adr select
+    tlb_ppn_mm1 <= tlb_dat0_1 when 0,
+                   tlb_dat1_1 when 1,
+                   tlb_dat2_1 when 2,
+                   tlb_dat3_1 when 3,
+                   tlb_dat4_1 when 4,
+                   tlb_dat5_1 when 5,
+                   tlb_dat6_1 when 6,
+                   tlb_dat7_1 when others;
+
+  tlb_ppn_mm <= tlb_ppn_mm0(DAT_AHI_BIT downto DAT_ALO_BIT) when MM_result(PAGE_SZ_BITS) = '0' else
+                tlb_ppn_mm1(DAT_AHI_BIT downto DAT_ALO_BIT);
   
+  hit_mm_v   <= tlb_ppn_mm0(DAT_V_BIT) when PC(PAGE_SZ_BITS) = '0' else
+                tlb_ppn_mm1(DAT_V_BIT);
+
+  hit_mm_d   <= tlb_ppn_mm0(DAT_D_BIT) when PC(PAGE_SZ_BITS) = '0' else
+                tlb_ppn_mm1(DAT_D_BIT);
+
   
-  tlb_a2_mm <= '0';  
-  tlb_a1_mm <= hit2_mm or hit3_mm;
-  tlb_a0_mm <= hit1_mm or hit3_mm;
+  phy_d_addr <= tlb_ppn_mm(PPN_BITS-1 downto 0) & d_addr_pre(PAGE_SZ_BITS-1 downto 0);
 
-  hit_mm     <= hit0_mm or hit1_mm or hit2_mm or hit3_mm;
-  tlb_adr_mm <= tlb_a1_mm & tlb_a0_mm;  -- tlb_a2_mm & 
   
   -- MMU-TLB == end =======================================================
 
@@ -2358,7 +2707,7 @@ begin
               EX_excp_type,MM_excp_type_i, EX_PC,MM_PC,
               EX_LLbit,MM_LLbit,
               EX_cop0_a_c,MM_cop0_a_c, EX_cop0_val,MM_cop0_val,
-              EX_trapped, MM_ex_trapped);
+              EX_trapped, MM_ex_trapped, EX_mfc0,MM_mfc0);
 
     COP0_MM_EXCEPTIONS: process(MM_addr_error, MM_excp_type_i)
     begin
diff --git a/cMIPS/vhdl/exception.vhd b/cMIPS/vhdl/exception.vhd
index e0df19ec8d51e9a1a73c4acc87e6297f53866a33..e788078aca27524a888369d1bb4fb2cc8f8238a1 100644
--- a/cMIPS/vhdl/exception.vhd
+++ b/cMIPS/vhdl/exception.vhd
@@ -136,7 +136,9 @@ entity reg_excp_EX_MM is
        EX_cop0_val:   in  reg32;
        MM_cop0_val:   out reg32;
        EX_trapped:    in  std_logic;
-       MM_ex_trapped: out std_logic);
+       MM_ex_trapped: out std_logic;
+       EX_mfc0:       in  std_logic;
+       MM_mfc0:       out std_logic);
 end reg_excp_EX_MM;
 
 architecture funcional of reg_excp_EX_MM is
@@ -156,6 +158,7 @@ begin
         MM_cop0_a_c   <= EX_cop0_a_c   ;
         MM_cop0_val   <= EX_cop0_val   ;
         MM_ex_trapped <= EX_trapped    ;
+        MM_mfc0       <= EX_mfc0       ;
       end if;
     end if;
   end process;
diff --git a/cMIPS/vhdl/instrcache.vhd b/cMIPS/vhdl/instrcache.vhd
index ab39804067dc183d25a78fa5a3b6d86b22e2b09f..9a607c9279c4635be8dab3ddc235d4c2bdce3a4f 100644
--- a/cMIPS/vhdl/instrcache.vhd
+++ b/cMIPS/vhdl/instrcache.vhd
@@ -53,6 +53,8 @@ entity I_CACHE_fpga is
   constant IC_BOT_W_SEL : natural :=
     32 - (IC_TAG_BITS + IC_INDEX_BITS + IC_WORD_SEL_BITS);
 
+  constant TAG_IDX_REG_INI: std_logic_vector(IC_TAG_BITS + IC_INDEX_BITS - 1 downto 0) :=
+    (others => '0');
   subtype tag_address is integer range 0 to (IC_NUM_BLOCKS - 1);
   subtype ram_address is integer range 0 to (IC_NUM_WORDS - 1);
   subtype tag_sel_width  is std_logic_vector((IC_TAG_BITS - 1) downto 0);
@@ -84,7 +86,7 @@ architecture structural of I_CACHE_fpga is
   end component register32;
 
   component registerN is
-    generic (NUM_BITS: integer);
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector(NUM_BITS-1 downto 0);
          Q:            out std_logic_vector(NUM_BITS-1 downto 0));
@@ -189,7 +191,7 @@ begin
               d_str_rd, mem_rdy, cached_data);
 
   cpu_data <= hold_data when ( hit = '0' ) else cached_data;  
-  U_HOLD_INSTR: registerN  generic map ( 32 )
+  U_HOLD_INSTR: registerN  generic map ( 32, x"00000000" )
     port map (mem_rdy, rst, '0', mem_data, hold_data);
 
   
@@ -213,7 +215,7 @@ begin
   ld_addr <= not(fetching);
   
   U_TAG_INDEX_REGISTER: registerN            -- clk,rst,ld=0
-    generic map ( IC_TAG_BITS + IC_INDEX_BITS )
+    generic map ( IC_TAG_BITS + IC_INDEX_BITS, TAG_IDX_REG_INI)
     port map (fetching, rst, '0', cpu_tag_index, tag_index);
 
   miss_under_miss <= '1' when ( cpu_tag_index /= tag_index ) else '0';
diff --git a/cMIPS/vhdl/io.vhd b/cMIPS/vhdl/io.vhd
index 97aa9c37f5fa2e2ff0dbd2048231dd9332a8bba2..f316810e387538de5b81f388cd0da92750fc99a6 100644
--- a/cMIPS/vhdl/io.vhd
+++ b/cMIPS/vhdl/io.vhd
@@ -271,7 +271,7 @@ end do_interrupt;
 architecture behavioral of do_interrupt is
 
   component registerN is
-    generic (NUM_BITS: integer);
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
@@ -301,7 +301,7 @@ begin
   
   Dlimit <= data_inp(NUM_BITS-1 downto 0);
 
-  U_LIMIT: registerN  generic map (NUM_BITS)
+  U_LIMIT: registerN  generic map (NUM_BITS, START_COUNT)
     port map (clk, rst, ld_reg, Dlimit, Qlimit);
 
   en <= cnt_en and (not equals);
@@ -478,12 +478,14 @@ entity to_7seg is
         display0 : out reg8;
         display1 : out reg8);
   constant NUM_BITS : integer := 10;    -- 2 decimal points, 2 hex digits
+  subtype c_width is std_logic_vector(NUM_BITS - 1 downto 0);
+  constant START_COUNT : c_width := (others => '0');
 end to_7seg;
 
 architecture behavioral of to_7seg is
 
   component registerN is
-    generic (NUM_BITS: integer);
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
@@ -499,7 +501,7 @@ architecture behavioral of to_7seg is
   
 begin
   
-  U_HOLD_data: registerN generic map (NUM_BITS)
+  U_HOLD_data: registerN generic map (NUM_BITS, START_COUNT)
     port map (clk, rst, sel, data(NUM_BITS-1 downto 0), value);
 
   U_DSP1: display_7seg port map (value(7 downto 4), value(9), display1);
@@ -538,10 +540,13 @@ entity read_keys is
         data     : out reg32;
         kbd      : in  std_logic_vector (11 downto 0);
         sw       : in  std_logic_vector (3 downto 0));
-  constant DEB_BITS : integer := 16;  -- debounce counter width
+  constant DEB_BITS : integer := 16;    -- debounce counter width
   constant CNT_MAX : integer := (2**DEB_BITS - 1);
   constant x_DEB_CYCLES : std_logic_vector(DEB_BITS-1 downto 0)
     := std_logic_vector(to_signed((CNT_MAX - DEB_CYCLES),DEB_BITS));
+  constant NUM_BITS : integer := 4;     -- four bits to hold key number
+  subtype c_width is std_logic_vector(NUM_BITS - 1 downto 0);
+  constant NO_KEY : c_width := (others => '0');
 end read_keys;
 
 architecture behavioral of read_keys is
@@ -552,7 +557,7 @@ architecture behavioral of read_keys is
   end component FFD;
 
   component registerN is
-    generic (NUM_BITS: integer);
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector(NUM_BITS-1 downto 0);
          Q:            out std_logic_vector(NUM_BITS-1 downto 0));
@@ -592,7 +597,7 @@ begin
     port map (clk=>clk, rst=>rst, ld=>cnt_ld, en=>cnt_en,
               D=>x_DEB_CYCLES, Q=>open, co=>debounced); 
   
-  U_NEW_DATA: registerN  generic map (4)
+  U_NEW_DATA: registerN  generic map (4, NO_KEY)
     port map (clk, rst, new_ld, keys_data, cpu_data);
 
   d <= new_ld & sel;                    -- new_ld, sel active in '0'
@@ -715,6 +720,8 @@ entity LCD_display is
         LCD_EN   : out   std_logic; -- LCD enable=1
         LCD_BLON : out   std_logic);
   constant NUM_BITS : integer := 8;
+  subtype c_width is std_logic_vector(NUM_BITS - 1 downto 0);
+  constant START_VALUE : c_width := (others => '0');
 end LCD_display;
 
 architecture behavioral of LCD_display is
@@ -728,7 +735,7 @@ architecture behavioral of LCD_display is
   end component wait_states;
   
   component registerN is
-    generic (NUM_BITS: integer);
+    generic (NUM_BITS: integer; INIT_VAL: std_logic_vector);
     port(clk, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
          Q:            out std_logic_vector);
@@ -762,10 +769,10 @@ begin
   sel_rs <= addr when sel = '0' else RS;
   U_INPUT_RS: FFD port map (clk, rst, '1', sel_rs, RS);
 
-  U_INPUT: registerN generic map (NUM_BITS)
+  U_INPUT: registerN generic map (NUM_BITS, START_VALUE)
   port map (clk, rst, sel, data_inp(NUM_BITS-1 downto 0), inp_data);
 
-  U_OUTPUT: registerN generic map (NUM_BITS)
+  U_OUTPUT: registerN generic map (NUM_BITS, START_VALUE)
   port map (clk, rst, lcd_read, out_data, data_out(NUM_BITS-1 downto 0));
   data_out(31 downto NUM_BITS) <= (others => '0');
 
diff --git a/cMIPS/vhdl/memory.vhd b/cMIPS/vhdl/memory.vhd
index f78e68c4e326a7f75fd8bebe1ba487ee7cdf5299..3dc0a2d28828cbac7b469b5fe0c9fbc7d12c9c93 100644
--- a/cMIPS/vhdl/memory.vhd
+++ b/cMIPS/vhdl/memory.vhd
@@ -104,8 +104,8 @@ begin  -- behavioral
       index  := to_integer(u_addr);     -- indexed by word, not by byte
 
       assert (index >= 0) and (index < INST_MEM_SZ/4)
-        report "romRDindex out of bounds: " & natural'image(index)
-        severity failure;
+        report "romRDindex out of bounds: " & SLV32HEX(addr) & " = " &
+               natural'image(index)  severity failure;
 
       if sel = '0' and rising_edge(strobe) then 
         latched := index;
diff --git a/cMIPS/vhdl/packageExcp.vhd b/cMIPS/vhdl/packageExcp.vhd
index 2b7c02cb10f909f7e0e10d1a1156816907752063..c88504f36b12f5dbc7c00aa005cb76e1bb8cef96 100644
--- a/cMIPS/vhdl/packageExcp.vhd
+++ b/cMIPS/vhdl/packageExcp.vhd
@@ -28,8 +28,8 @@ package p_EXCEPTION is
                           exEI,exDI,    -- 5
                           exBREAK, exTRAP, exSYSCALL,  -- 8
                           exRESV_INSTR, exWAIT,  -- 10
-                          IFaddressError, MMaddressErrorLD, MMaddressErrorST,
-                          exTLBrefill, exTLBld, exTLBst, exTLBmod,  -- 17
+                          IFaddressError, MMaddressErrorLD, MMaddressErrorST, --13
+                          exTLBrefill, exTLBdblFault, exTLBinval, exTLBmod, -- 17
                           exOvfl,       -- 18
                           exLL,exSC,    -- 19,20  these are handled by COP0
                           exEHB,        -- 21
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index a1ac9efbd4718cfff8fed2ba2ca3758386965f85..d44c63c9aef1305d07796bd2a27591c4ae0eaaf9 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -146,7 +146,7 @@ package p_MEMORY is
   -- MMU parameters -----------------------------------------------------
 
   -- constants for CONFIG1 cop0 register (Table 8-24 pg 103)
-  constant MMU_CAPACITY : natural := 4;
+  constant MMU_CAPACITY : natural := 8;
   constant MMU_CAPACITY_BITS : natural := log2_ceil( MMU_CAPACITY );
   constant MMU_SIZE: reg6 := 
     std_logic_vector(to_signed( (MMU_CAPACITY-1), 6) );
@@ -154,21 +154,28 @@ package p_MEMORY is
   
   constant VABITS       : natural := 32;
   constant PABITS       : natural := 32;
-  constant PAGE_SZ      : natural := 1024;   -- 1k pages
+  constant PAGE_SZ      : natural := 4096;   -- 4k pages
   constant PAGE_SZ_BITS : natural := log2_ceil( PAGE_SZ );
 
   constant PPN_BITS     : natural := PABITS - PAGE_SZ_BITS;
   constant VA_HI_BIT    : natural := 31; -- VAaddr in EntryHi 31..PG_size
-  constant VA_LO_BIT    : natural := PAGE_SZ_BITS;
+  constant VA_LO_BIT    : natural := PAGE_SZ_BITS + 1;  -- maps 2 phy-pages
 
   constant ASID_HI_BIT  : natural := 7;  -- ASID   in EntryHi 7..0
   constant ASID_LO_BIT  : natural := 0;
 
   constant EHI_ASIDLO_BIT : natural := 0;
   constant EHI_ASIDHI_BIT : natural := 7;  
-  constant EHI_ALO_BIT  : natural := PAGE_SZ_BITS;
-  constant EHI_AHI_BIT  : natural := EHI_ALO_BIT + PPN_BITS - 1;
+  constant EHI_ALO_BIT  : natural := PAGE_SZ_BITS + 1;  -- maps 2 phy-pages
+  constant EHI_AHI_BIT  : natural := 31;
 
+  constant TAG_ASIDLO_BIT : natural := 0;
+  constant TAG_ASIDHI_BIT : natural := 7;
+  constant TAG_G_BIT    : natural := 8;
+  constant TAG_Z_BIT    : natural := 9;
+  constant TAG_ALO_BIT  : natural := PAGE_SZ_BITS + 1;  -- maps 2 phy-pages
+  constant TAG_AHI_BIT  : natural := 31;
+  
   constant ELO_G_BIT    : natural := 0;
   constant ELO_V_BIT    : natural := 1;
   constant ELO_D_BIT    : natural := 2;
@@ -177,13 +184,6 @@ package p_MEMORY is
   constant ELO_ALO_BIT  : natural := 6;
   constant ELO_AHI_BIT  : natural := ELO_ALO_BIT + PPN_BITS - 1;
 
-  constant TAG_ASIDLO_BIT : natural := 0;
-  constant TAG_ASIDHI_BIT : natural := 7;
-  constant TAG_G_BIT    : natural := 8;
-  constant TAG_Z_BIT    : natural := 9;
-  constant TAG_AHI_BIT  : natural := 31;  
-  constant TAG_ALO_BIT  : natural := TAG_AHI_BIT - PPN_BITS + 1;
-
   constant DAT_G_BIT    : natural := 0;
   constant DAT_V_BIT    : natural := 1;
   constant DAT_D_BIT    : natural := 2;
@@ -191,15 +191,102 @@ package p_MEMORY is
   constant DAT_CHI_BIT  : natural := 5;  
   constant DAT_ALO_BIT  : natural := 6;
   constant DAT_AHI_BIT  : natural := DAT_ALO_BIT + PPN_BITS - 1;
+  constant DAT_REG_BITS : natural := DAT_ALO_BIT + PPN_BITS;
 
- 
+  subtype mmu_dat_reg is std_logic_vector (DAT_AHI_BIT downto 0);
+  
   subtype  MMU_idx_bits is std_logic_vector(MMU_CAPACITY_BITS-1 downto 0);
   constant MMU_idx_0s : std_logic_vector(30 downto MMU_CAPACITY_BITS) :=
     (others => '0');
   constant MMU_IDX_BIT : natural := 31;  -- probe hit=1, miss=0
+
+  -- VA tags map a pair of PHY pages, thus VAddr is 1 bit less than (VABITS-1..PAGE_SZ_BITS)
+  constant tag_zeros : std_logic_vector(PAGE_SZ_BITS downto 0) := (others => '0');
+  constant tag_ones  : std_logic_vector(VABITS-1 downto PAGE_SZ_BITS+1) := (others => '1');
+  constant tag_mask  : reg32 := tag_ones & tag_zeros;
+
+  
+  constant x_ROM_PPN_0 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 0*PAGE_SZ, 32));
+  constant x_ROM_PPN_1 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 1*PAGE_SZ, 32));
+  constant x_ROM_PPN_2 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 2*PAGE_SZ, 32));
+  constant x_ROM_PPN_3 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 3*PAGE_SZ, 32));
+  constant x_ROM_PPN_4 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 4*PAGE_SZ, 32));
+  constant x_ROM_PPN_5 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 5*PAGE_SZ, 32));
+  constant x_ROM_PPN_6 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 6*PAGE_SZ, 32));
+  constant x_ROM_PPN_7 : reg32 := std_logic_vector(to_signed(INST_BASE_ADDR + 7*PAGE_SZ, 32));
+
+  constant MMU_ini_tag_ROM0 : reg32 := x_ROM_PPN_0 and tag_mask;
+  constant MMU_ini_dat_ROM0 : mmu_dat_reg := 
+   x_ROM_PPN_0(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_ROM1 : mmu_dat_reg := 
+   x_ROM_PPN_1(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_ROM2 : reg32 := x_ROM_PPN_2 and tag_mask;
+  constant MMU_ini_dat_ROM2 : mmu_dat_reg := 
+   x_ROM_PPN_2(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_ROM3 : mmu_dat_reg := 
+   x_ROM_PPN_3(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_ROM4 : reg32 := x_ROM_PPN_4 and tag_mask;
+  constant MMU_ini_dat_ROM4 : mmu_dat_reg := 
+   x_ROM_PPN_4(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_ROM5 : mmu_dat_reg := 
+   x_ROM_PPN_5(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_ROM6 : reg32 := x_ROM_PPN_6 and tag_mask;
+  constant MMU_ini_dat_ROM6 : mmu_dat_reg := 
+   x_ROM_PPN_6(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_ROM7 : mmu_dat_reg := 
+   x_ROM_PPN_7(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+
+
+  constant x_RAM_PPN_0 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 0*PAGE_SZ, 32));
+  constant x_RAM_PPN_1 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 1*PAGE_SZ, 32));
+  constant x_RAM_PPN_2 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 2*PAGE_SZ, 32));
+  constant x_RAM_PPN_3 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 3*PAGE_SZ, 32));
+  constant x_RAM_PPN_4 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 4*PAGE_SZ, 32));
+  constant x_RAM_PPN_5 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 5*PAGE_SZ, 32));
+  constant x_RAM_PPN_6 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 6*PAGE_SZ, 32));
+  constant x_RAM_PPN_7 : reg32 := std_logic_vector(to_signed(DATA_BASE_ADDR + 7*PAGE_SZ, 32));
+  
+  constant MMU_ini_tag_RAM0 : reg32 := x_RAM_PPN_0 and tag_mask;
+  constant MMU_ini_dat_RAM0 : mmu_dat_reg := 
+   x_RAM_PPN_0(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_RAM1 : mmu_dat_reg := 
+   x_RAM_PPN_1(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_RAM2 : reg32 := x_RAM_PPN_2 and tag_mask;
+  constant MMU_ini_dat_RAM2 : mmu_dat_reg := 
+   x_RAM_PPN_2(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_RAM3 : mmu_dat_reg := 
+   x_RAM_PPN_3(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_RAM4 : reg32 := x_RAM_PPN_4 and tag_mask;
+  constant MMU_ini_dat_RAM4 : mmu_dat_reg := 
+   x_RAM_PPN_4(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_RAM5 : mmu_dat_reg := 
+   x_RAM_PPN_5(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+  constant MMU_ini_tag_RAM6 : reg32 := x_RAM_PPN_6 and tag_mask;
+  constant MMU_ini_dat_RAM6 : mmu_dat_reg := 
+   x_RAM_PPN_6(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_RAM7 : mmu_dat_reg := 
+   x_RAM_PPN_7(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
+
+  constant x_IO_PPN_0 : reg32 := std_logic_vector(to_signed(IO_BASE_ADDR + 0*PAGE_SZ, 32));
+  constant x_IO_PPN_1 : reg32 := std_logic_vector(to_signed(IO_BASE_ADDR + 1*PAGE_SZ, 32));
+
+  constant MMU_ini_tag_IO : reg32 := x_IO_BASE_ADDR and tag_mask;
+  constant MMU_ini_dat_IO0 : mmu_dat_reg := 
+   x_IO_PPN_0(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+  constant MMU_ini_dat_IO1 : mmu_dat_reg := 
+   x_IO_PPN_1(PABITS-1 downto PAGE_SZ_BITS) & b"000111"; -- d,v,g=1
+
   
-  constant mmu_PageMask : reg32 := x"00000000";  -- pg 68, 1k pages only
-  -- constant mmu_PageMask : reg32 := x"00001800";  -- pg 68, 4k pages only
+  -- constant mmu_PageMask : reg32 := x"00000000";  -- pg 68, 1k pages only
+  constant mmu_PageMask : reg32 := x"00001800";  -- pg 68, 4k pages only
 
   
 end p_MEMORY;
diff --git a/cMIPS/vhdl/packageWires.vhd b/cMIPS/vhdl/packageWires.vhd
index 1eafe8470ce2efa7ebb4b545ab00e6f755a3f525..459f38e3217e232b319042c480727b7e7433eaaa 100644
--- a/cMIPS/vhdl/packageWires.vhd
+++ b/cMIPS/vhdl/packageWires.vhd
@@ -54,6 +54,7 @@ package p_WIRES is
   subtype reg18 is std_logic_vector(17 downto 0);
   subtype reg19 is std_logic_vector(18 downto 0);
   subtype reg20 is std_logic_vector(19 downto 0);
+  subtype reg21 is std_logic_vector(20 downto 0);
   subtype reg24 is std_logic_vector(23 downto 0);
   subtype reg28 is std_logic_vector(27 downto 0);
   subtype reg30 is std_logic_vector(29 downto 0);
@@ -63,18 +64,16 @@ package p_WIRES is
   subtype reg64 is std_logic_vector(63 downto 0);  
 
   constant CLOCK_PER   : time := 20 ns;
-  constant REG_LATENCY : time := 0 ns;
-  constant ALU_LATENCY : time := 0 ns;
 
   -- DO NOT change (textual) format of these four lines
   constant NUM_MAX_W_STS  : integer := 1;
   constant ROM_WAIT_STATES: integer := 0;  -- num additional wait states
   constant RAM_WAIT_STATES: integer := 0;  -- num additional wait states
-  -- constant IO_WAIT_STATES : integer := 0;  -- num additional wait states
+  constant IO_WAIT_STATES : integer := 0;  -- num additional wait states
 
+  
   subtype  max_wait_states is integer range 0 to NUM_MAX_W_STS;
 
-
   type t_alu_fun is (opNOP,
                      opSLL, opSLLV, opSRL, opSRA, opSRLV, opSRAV,
                      opMOVZ, opMOVN,
@@ -175,7 +174,11 @@ package p_WIRES is
   function SLV32HEX(w: in std_logic_vector(31 downto 0)) return string;
   function BOOL2SL(b: in boolean) return std_logic;
   function SLV2ASCII(s: std_logic_vector(7 downto 0)) return character;
-
+  function SH_LEFT (inp: std_logic_vector; num_bits: integer) 
+    return std_logic_vector;
+  function SH_RIGHT(inp : std_logic_vector; num_bits : integer) 
+    return std_logic_vector;
+  
 end p_WIRES;
 
 
@@ -193,7 +196,29 @@ package body p_WIRES is
     end if;
   end;
   -- ---------------------------------------------------------  
-   
+
+
+  -- ---------------------------------------------------------
+  -- shift LEFT a std_logic_vector by num_bits positions
+  function SH_LEFT(inp : std_logic_vector; num_bits : integer) 
+    return std_logic_vector is
+    constant zeros : std_logic_vector(num_bits-1 downto 0) := (others => '0');
+  begin
+    return inp(inp'high-num_bits downto inp'low) & zeros;
+  end function;
+  -- ---------------------------------------------------------
+
+  -- ---------------------------------------------------------
+  -- shift RIGHT a std_logic_vector by num_bits positions
+  function SH_RIGHT(inp : std_logic_vector; num_bits : integer) 
+    return std_logic_vector is
+    constant zeros : std_logic_vector(num_bits-1 downto 0) := (others => '0');
+  begin
+    return zeros & inp(inp'high downto inp'low+num_bits);
+  end function;
+  -- ---------------------------------------------------------
+  
+  
   -- --------------------------------------------------------- 
   -- convert boolean to std_logic
   function CONVERT_BOOLEAN(b: in boolean) return std_logic is
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 6fe91e80ba1bb691da9ac55d0e6f117512284ca8..531cd5918c5a35044db18e2e84cf4f5d9b4ef8a4 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -463,7 +463,8 @@ begin  -- TB
 
   mf_altclkctrl_inst_clk4x : mf_altclkctrl port map (
     inclk => clk4x180, outclk => clk4x);
-  
+
+  phi1 <= '0';
   mf_altclkctrl_inst_phi0 : mf_altclkctrl port map (
     inclk => phi0in, outclk => phi0);
   mf_altclkctrl_inst_phi2 : mf_altclkctrl port map (
@@ -518,7 +519,7 @@ begin  -- TB
     port map (rst, clk, mem_i_sel,rom_rdy, phi2, mem_i_addr,datrom);
 
   U_IO_ADDR_DEC: io_addr_decode
-    port map (clk,rst, cpu_d_aVal, d_addr, dev_select_io,
+    port map (phi3,rst, cpu_d_aVal, d_addr, dev_select_io,
               io_print_sel, io_stdout_sel, io_stdin_sel,io_read_sel, 
               io_write_sel, io_counter_sel, io_fpu_sel, io_uart_sel,
               io_sstats_sel, io_7seg_sel, io_keys_sel, io_lcd_sel,
@@ -602,7 +603,7 @@ begin  -- TB
 
   uart_cts <= '1';
   
-  start_remota <= '0', '1' after 200*CLOCK_PER;
+  start_remota <= '0', '1' after 400*CLOCK_PER;
   
   U_uart_remota: remota generic map ("serial.out","serial.inp")
     port map (rst, clk, start_remota, uart_txd, uart_rxd, bit_rt);
@@ -744,7 +745,7 @@ begin
                           x_IO_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)) )
           else '1';
   
-  U_decode: process(aVal,addr)
+  U_decode: process(clk, aVal, addr)
     variable dev_sel    : reg4;
     constant is_noise   : integer := 0;
     constant is_print   : integer := 2;
@@ -778,15 +779,15 @@ begin
     
     case dev is -- to_integer(signed(addr(HI_ADDR downto LO_ADDR))) is
       when  0 => dev_sel     := std_logic_vector(to_signed(is_print, 4));
-                 print_sel   <= aVal;
+                 print_sel   <= aVal or clk;
       when  1 => dev_sel     := std_logic_vector(to_signed(is_stdout, 4));
-                 stdout_sel  <= aVal;
+                 stdout_sel  <= aVal or clk;
       when  2 => dev_sel     := std_logic_vector(to_signed(is_stdin, 4));
-                 stdin_sel   <= aVal;
+                 stdin_sel   <= aVal or clk;
       when  3 => dev_sel     := std_logic_vector(to_signed(is_read, 4));
-                 read_sel    <= aVal;
+                 read_sel    <= aVal or clk;
       when  4 => dev_sel     := std_logic_vector(to_signed(is_write, 4));
-                 write_sel   <= aVal;
+                 write_sel   <= aVal or clk;
       when  5 => dev_sel     := std_logic_vector(to_signed(is_count, 4));
                  counter_sel <= aVal;
       when  6 => dev_sel     := std_logic_vector(to_signed(is_FPU, 4));
diff --git a/cMIPS/vhdl/uart.vhd b/cMIPS/vhdl/uart.vhd
index 7b6ce76f7bc4b4094e511834868b76d28b2e9efe..37aedafbe764aeed07c0362780934d4a683a2b7d 100644
--- a/cMIPS/vhdl/uart.vhd
+++ b/cMIPS/vhdl/uart.vhd
@@ -553,9 +553,9 @@ begin
 
   -- U_bit_rt_tx: counter8 port map (clk,rst,tx_ld,en_tx_clk,x"00",tx_bit_rt);
   with ctrl(2 downto 0) select
-    tx_baud_div <=      8/2 when b"000",
-                       16/2 when b"001",
-                       32/2 when b"010",
+    tx_baud_div <=      4/2 when b"000",
+                        8/2 when b"001",
+                       16/2 when b"010",
                       434/2 when b"011",
                       868/2 when b"100",
                      1302/2 when b"101",
@@ -605,9 +605,9 @@ begin
 
   -- U_bit_rt_rx:counter8 port map(clk,rst,reset_rxck,en_rx_clk,00,rx_bit_rt);
   with ctrl(2 downto 0) select
-    rx_baud_div <=      8/2 when b"000",
-                       16/2 when b"001",
-                       32/2 when b"010",
+    rx_baud_div <=      4/2 when b"000",
+                        8/2 when b"001",
+                       16/2 when b"010",
                       434/2 when b"011",
                       868/2 when b"100",
                      1302/2 when b"101",
@@ -1041,9 +1041,9 @@ begin
   -- baud rate generators ---------------------------------------------
 
   with bit_rt select
-    tx_baud_div <=      8/2 when b"000",
-                       16/2 when b"001",
-                       32/2 when b"010",
+    tx_baud_div <=      4/2 when b"000",
+                        8/2 when b"001",
+                       16/2 when b"010",
                       434/2 when b"011",
                       868/2 when b"100",
                      1302/2 when b"101",
@@ -1071,9 +1071,9 @@ begin
 
   -- RX clock daud rate
   with bit_rt select
-    rx_baud_div <=      8/2 when b"000",
-                       16/2 when b"001",
-                       32/2 when b"010",
+    rx_baud_div <=      4/2 when b"000",
+                        8/2 when b"001",
+                       16/2 when b"010",
                       434/2 when b"011",
                       868/2 when b"100",
                      1302/2 when b"101",