diff --git a/cMIPS/include/cMIPS.h b/cMIPS/include/cMIPS.h
index ea9ad50cbe8c61251abb2af4bc67848281483ba3..5d6b40059cb49ca347767797fbe347d176a6f127 100644
--- a/cMIPS/include/cMIPS.h
+++ b/cMIPS/include/cMIPS.h
@@ -36,6 +36,7 @@ extern void dumpRAM(void);
 
 extern void cmips_delay(int);
 
+// external counter (peripheral)
 extern void startCounter(int, int);
 extern void stopCounter(void);
 extern int  readCounter(void);
@@ -43,6 +44,10 @@ extern int  readCounter(void);
 extern void enableInterr(void);
 extern void disableInterr(void);
 
+// internal counter, CP0 register COUNT
+extern int startCount(void);
+extern int readCount(void);
+
 extern char *memcpy(char*, const char*, int);
 extern char *memset(char*, const int, int);
 
diff --git a/cMIPS/include/cMIPS.ld b/cMIPS/include/cMIPS.ld
index fa13f2198607ac5fec345fdb95ee5b247207b658..dcb75e63914860dab69e5c2575844ddf5e337a1c 100644
--- a/cMIPS/include/cMIPS.ld
+++ b/cMIPS/include/cMIPS.ld
@@ -28,8 +28,6 @@ SECTIONS
           _edata = . ;         /* end of data constant  (from Xinu) */
        }                                   > ram
        .data1   : { *(.data1) }            > ram
-       .rodata  : { *(.rodata .rodata.*) } > ram
-       .rodata1 : { *(.rodata1) }          > ram
        .lit8    : { *(.lit8) }             > ram
        .lit4    : { *(.lit4) }             > ram
        .sdata   : { *(.sdata .sdata.*) }   > ram
@@ -37,9 +35,14 @@ SECTIONS
        .bss :
        {
           *(.bss .bss.*) *(COMMON) 
+       }                                   > ram
+       .rodata1 : { *(.rodata1) }          > ram  
+       .rodata : 
+       { 
+          *(.rodata .rodata.*) 
           _end = . ;           /* end of image constant  (from Xinu) */
        }                                   > ram
-
+  
        end_RAM = 0x00020000; /* x_DATA_MEM_SZ */
        half_RAM = (end_RAM / 2);
        base_PT = ( _bdata + half_RAM );
diff --git a/cMIPS/include/handlers.s b/cMIPS/include/handlers.s
index 8a0dde7468a2c6d7a8d5571c7efc94bd32fad739..38789a900e99478a3b72d38822c03706f63b2ae3 100644
--- a/cMIPS/include/handlers.s
+++ b/cMIPS/include/handlers.s
@@ -133,7 +133,7 @@ UARTret:
 
 	mfc0  $k0, c0_status	    # Read STATUS register
 	ori   $k0, $k0, M_StatusIEn #   but do not modify its contents
-	mtc0  $k0, c0_status	
+	mtc0  $k0, c0_status	    #   except for re-enabling interrupts
 	eret			    # Return from interrupt
 	.end UARTinterr
 	#----------------------------------------------------------------
@@ -158,6 +158,40 @@ countCompare:
 	.end countCompare
 	#----------------------------------------------------------------
 
+
+	#================================================================
+	# startCount enables the COUNT register, returns new CAUSE
+	#   CAUSE.dc <= 0 to enable counting
+	#----------------------------------------------------------------
+	.text
+	.set    noreorder
+	.global startCount
+	.ent    startCount
+startCount:
+	mfc0 $v0, c0_cause
+	lui  $v1, 0xf7ff
+	ori  $v1, $v1, 0xffff
+        and  $v0, $v0, $v1
+	jr   $ra
+	mtc0 $v0, c0_cause
+	.end    startCount
+	#----------------------------------------------------------------
+
+
+	#================================================================
+	# readCount returns the value of the COUNT register
+	#----------------------------------------------------------------
+	.text
+	.set    noreorder
+	.global readCount
+	.ent    readCount
+readCount:
+        mfc0 $v0, c0_count
+        jr   $ra
+        nop
+	.end    readCount
+	#----------------------------------------------------------------
+
 	
 	#================================================================
 	# functions to enable and disable interrupts, both return STATUS
@@ -493,7 +527,7 @@ k_for:	lbu   $a0, 0($a1)
 	.equ kmsg_interr,0
 	.equ kmsg_excep,1
 
-	.data
+	.section .rodata
         .align  2
 _kmsg_interr:	.asciiz "\n\t00 - interrupt\n\n"
 _kmsg_excep:	.asciiz "\n\t01 - exception\n\n"
@@ -502,7 +536,7 @@ _kmsg_seg_fault: 	.asciiz "\n\t03 - segmentation fault\n\n"
 _kmsg_sec_mem: 		.asciiz "\n\t04 - in secondary memory\n\n"
 
 	.global _kmsg_list
-	.data
+	.section .rodata
         .align  2
 _kmsg_list:
 	.word _kmsg_interr,_kmsg_excep, _kmsg_prot_viol, _kmsg_seg_fault
diff --git a/cMIPS/include/start.s b/cMIPS/include/start.s
index ecf80cd352a43436bcef0f5300ed6466b97f55b9..0a6a7fb5dffc3503941f56099b0bca641bce9ba0 100644
--- a/cMIPS/include/start.s
+++ b/cMIPS/include/start.s
@@ -51,7 +51,7 @@ _start:
 	
 	#
 	# the page table is located at the middle of the RAM
-	#   bottom half is reserved for "RAM memory", top for page table
+	#   bottom half is reserved for "RAM memory", top half is for PTable
 	#
 	.set TOP_OF_RAM, (x_DATA_BASE_ADDR + x_DATA_MEM_SZ)
 	.set MIDDLE_RAM, (x_DATA_BASE_ADDR + (x_DATA_MEM_SZ/2))
diff --git a/cMIPS/tests/count.c b/cMIPS/tests/count.c
new file mode 100644
index 0000000000000000000000000000000000000000..9e27a8fac3087007b8801a7510b168a0f619b07d
--- /dev/null
+++ b/cMIPS/tests/count.c
@@ -0,0 +1,71 @@
+//-------------------------------------------------------------------------
+// test if COUNT register counts up monotonically
+// returns error if the time to compute every 11th element of the Fibonacci
+//    sequence, as measured by COUNT is not monotonically increasing
+//-------------------------------------------------------------------------
+
+#include "cMIPS.h"
+
+#define TRUE  (1==1)
+#define FALSE (1==0)
+
+//---------------------------------------------------------------------
+int fibonacci(int n) {
+  int i;
+  int f1 = 0;
+  int f2 = 1;
+  int fi = 0;;
+  
+  if (n == 0)
+    return 0;
+  if(n == 1)
+    return 1;
+  
+  for(i = 2 ; i <= n ; i++ ) {
+    fi = f1 + f2;
+    f1 = f2;
+    f2 = fi;
+  }
+  return fi;
+}
+
+//=====================================================================
+int main() {
+  int i, new, old, monotonic;
+
+  print( startCount() );         // start COUNT
+  monotonic = TRUE;
+
+  for (i=0; i < 44; i += 11) {
+    old = readCount();           // COUNT before computing fib(i)
+    print( fibonacci(i) );
+    new = readCount();           // COUNT after  computing fib(i)
+    monotonic = monotonic && ( (new - old) > 0 );
+    if ( monotonic == FALSE ) {
+      to_stdout('e'); to_stdout('r'); to_stdout('r'); to_stdout('\n');
+      print(new);
+      exit(new);
+    }
+    // print(new);
+  }
+  // print(new);
+  to_stdout('o'); to_stdout('k'); to_stdout('\n');
+
+
+  // now disable COUNT and make sure that it has stopped
+  print( stopCount() );          // stop COUNT
+  old = readCount();             // COUNT before computing fib(i)
+  print( fibonacci(5) );
+  new = readCount();           // COUNT after  computing fib(i)
+  monotonic = monotonic && ( (new - old) > 0 );
+  if ( monotonic == TRUE ) {
+    to_stdout('e'); to_stdout('r'); to_stdout('r'); to_stdout('\n');
+    print(new);
+    exit(new);
+  } else {
+    // print(new);
+    to_stdout('o'); to_stdout('k'); to_stdout('\n');
+  }
+  exit(new);
+}
+//=====================================================================
diff --git a/cMIPS/tests/count.expected b/cMIPS/tests/count.expected
new file mode 100644
index 0000000000000000000000000000000000000000..4da7552e918b672cd5affcf4e75dbb7a1dd67ae9
--- /dev/null
+++ b/cMIPS/tests/count.expected
@@ -0,0 +1,9 @@
+0080007c
+00000000
+00000059
+0000452f
+0035c7e2
+ok
+0880007c
+00000005
+ok
diff --git a/cMIPS/tests/doTests.sh b/cMIPS/tests/doTests.sh
index e0a8fc7ede910cd808bb1c87bf40a39cd433188d..c746be69914d927d01b94de20b69e26e1d4da142 100755
--- a/cMIPS/tests/doTests.sh
+++ b/cMIPS/tests/doTests.sh
@@ -104,7 +104,7 @@ if [ 0 = 0 ] ; then
 fi
 
 
-c_small="divmul fat fib sieve ccitt16 gcd matrix negcnt reduz rand"
+c_small="divmul fat fib count sieve ccitt16 gcd matrix negcnt reduz rand"
 c_types="xram sort-byte sort-half sort-int memcpy"
 c_sorts="bubble insertion merge quick selection shell"
 c_FPU="FPU_m"
@@ -133,7 +133,7 @@ else
 fi
 
 for F in $(echo "$SIMULATE" ) ; do 
-    $bin/compile.sh -O 3 ${F}.c  || exit 1
+    $bin/compile.sh -O3 ${F}.c  || exit 1
     ${simulator} --ieee-asserts=disable --stop-time=$stoptime \
           2>/dev/null >$F.simout
     diff $ignBLANKS -q $F.expected $F.simout
diff --git a/cMIPS/tests/uart_defs.c b/cMIPS/tests/uart_defs.c
new file mode 100644
index 0000000000000000000000000000000000000000..699e596f5bc1b954d1553770fe360382e785e168
--- /dev/null
+++ b/cMIPS/tests/uart_defs.c
@@ -0,0 +1,37 @@
+
+typedef struct control { // control register fields (uses only ls byte)
+  unsigned int ign : 24, // ignore uppermost 3 bytes
+    rts     : 1,         // Request to Send output (bit 7)
+    ign2    : 2,         // bits 6,5 ignored
+    intTX   : 1,         // interrupt on TX buffer empty (bit 4)
+    intRX   : 1,         // interrupt on RX buffer full (bit 3)
+    speed   : 3;         // 4,8,16... {tx,rx}clock data rates  (bits 0..2)
+} Tcontrol;
+
+typedef struct status {  // status register fields (uses only ls byte)
+  unsigned int ign : 24, // ignore uppermost 3 bytes
+    cts     : 1,         // Clear To Send input=1 (bit 7)
+    txEmpty : 1,         // TX register is empty (bit 6)
+    rxFull  : 1,         // octet available from RX register (bit 5)
+    int_TX_empt: 1,      // interrupt pending on TX empty (bit 4)
+    int_RX_full: 1,      // interrupt pending on RX full (bit 3)
+    ign1    : 1,         // ignored (bit 2)
+    framing : 1,         // framing error (bit 1)
+    overun  : 1;         // overun error (bit 0)
+} Tstatus;
+
+typedef union ctlStat { // control + status on same address
+  Tcontrol  ctl;        // write-only
+  Tstatus   stat;       // read-only
+} TctlStat;
+
+typedef union data {    // data registers on same address
+  int tx;               // write-only
+  int rx;               // read-only
+} Tdata;
+
+typedef struct serial {
+  TctlStat cs;          // address is (int *)IO_UART_ADDR
+  Tdata    d;           // address is (int *)(IO_UART_ADDR+1)
+} Tserial;
+
diff --git a/cMIPS/tests/uart_irx.c b/cMIPS/tests/uart_irx.c
index a280a103cded4d977fcb934375737230b0b063d3..007ddb2633d46174d81f8e81b8364f7d109a4787 100644
--- a/cMIPS/tests/uart_irx.c
+++ b/cMIPS/tests/uart_irx.c
@@ -11,46 +11,13 @@
 
 #include "cMIPS.h"
 
-typedef struct control { // control register fields (uses only ls byte)
-  int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send output (bit 7)
-    ign2    : 2,         // bits 6,5 ignored
-    intTX   : 1,         // interrupt on TX buffer empty (bit 4)
-    intRX   : 1,         // interrupt on RX buffer full (bit 3)
-    speed   : 3;         // 4,8,16..256 tx-rx clock data rates  (bits 0..2)
-} Tcontrol;
-
-typedef struct status { // status register fields (uses only ls byte)
-  int ign : 24,       // ignore uppermost bits
-  ign7    : 1,        // ignored (bit 7)
-  txEmpty : 1,        // TX register is empty (bit 6)
-  rxFull  : 1,        // octet available from RX register (bit 5)
-  int_TX_empt: 1,     // interrupt pending on TX empty (bit 4)
-  int_RX_full: 1,     // interrupt pending on RX full (bit 3)
-  ign2    : 1,        // ignored (bit 2)
-  framing : 1,        // framing error (bit 1)
-  overun  : 1;        // overun error (bit 0)
-} Tstatus;
-
-
-typedef union ctlStat { // control + status on same address
-  Tcontrol  ctl;        // write-only
-  Tstatus   stat;       // read-only
-} TctlStat;
-
-typedef union data {    // data registers on same address
-  int tx;               // write-only
-  int rx;               // read-only
-} Tdata;
-
-typedef struct serial {
-  TctlStat cs;          // @ (int *)IO_UART_ADDR
-  Tdata    d;           // @ (int *)(IO_UART_ADDR+1)
-} Tserial;
+#include "uart_defs.c"
 
 
 extern int _uart_buff[16]; // declared in include/handlers.s
 
+#define SPEED 2    // operate at 1/4 of the highest data rate
+
 
 int main(void) { // receive a string through the UART serial interface
   volatile Tserial *uart;  // tell GCC not to optimize away code
@@ -61,10 +28,10 @@ int main(void) { // receive a string through the UART serial interface
   uart = (void *)IO_UART_ADDR; // bottom of UART address range
 
   ctrl.ign   = 0;
-  ctrl.rts   = 0;  // make RTS=0 to hold remote unit
+  ctrl.rts   = 0;  // make RTS=0 to hold remote unit inactive
   ctrl.intTX = 0;
   ctrl.intRX = 0;
-  ctrl.speed = 2;  // operate at 1/4 of the highest data rate
+  ctrl.speed = SPEED;
   uart->cs.ctl = ctrl; // initizlize UART
 
   // handler sets flag=bfr[3] to 1 after new character is received;
@@ -72,10 +39,10 @@ int main(void) { // receive a string through the UART serial interface
   bfr[3] = 0;      //   reset flag  
 
   ctrl.ign   = 0;
-  ctrl.rts   = 1;  // make RTS=1 so RemoteUnit starts its transmission
+  ctrl.rts   = 1;  // make RTS=1 to activate remote unit
   ctrl.intTX = 0;
   ctrl.intRX = 1;  // do generate interrupts on RXbuffer full
-  ctrl.speed = 2;  // operate at 1/4 of the highest data rate
+  ctrl.speed = SPEED;  // operate at 1/4 of the highest data rate
   uart->cs.ctl = ctrl;
 
   do {
@@ -83,7 +50,7 @@ int main(void) { // receive a string through the UART serial interface
       {};                 // nothing new
     c = (char)bfr[2];     // get new character
     bfr[3] = 0;           //   and reset flag
-    to_stdout( (int)c );
+    to_stdout( (int)c );  //   and print new char
   } while (c != '\0');    // end of string?
 
   return c;
diff --git a/cMIPS/tests/uartrx.c b/cMIPS/tests/uartrx.c
index cc00516214d584c310d631ad1099cf933363071d..b8d0b630e2b15d1d35b3328e432cab1596d86af1 100644
--- a/cMIPS/tests/uartrx.c
+++ b/cMIPS/tests/uartrx.c
@@ -1,41 +1,14 @@
+//
+// Test UART's reception circuit.
+//
+// Remote unit reads string from file serial.inp and sends it over the
+//   serial line.  This program prints the string to simulator's stdout.
+
+
 #include "cMIPS.h"
 
-typedef struct control { // control register fields (uses only ls byte)
-  int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send output (bit 7)
-    ign2    : 2,         // bits 6,5 ignored
-    intTX   : 1,         // interrupt on TX buffer empty (bit 4)
-    intRX   : 1,         // interrupt on RX buffer full (bit 3)
-    speed   : 3;         // 4,8,16..256 tx-rx clock data rates  (bits 0..2)
-} Tcontrol;
-
-typedef struct status {  // status register fields (uses only ls byte)
-  unsigned int ign : 24, // ignore uppermost 3 bytes
-  cts     : 1,           // Clear To Send input=1 (bit 7)
-  txEmpty : 1,           // TX register is empty (bit 6)
-  rxFull  : 1,           // octet available from RX register (bit 5)
-  int_TX_empt: 1,        // interrupt pending on TX empty (bit 4)
-  int_RX_full: 1,        // interrupt pending on RX full (bit 3)
-  ign1    : 1,           // ignored (bit 2)
-  framing : 1,           // framing error (bit 1)
-  overun  : 1;           // overun error (bit 0)
-} Tstatus;
-
-
-typedef union ctlStat { // control + status on same address
-  Tcontrol  ctl;        // write-only
-  Tstatus   stat;       // read-only
-} TctlStat;
-
-typedef union data {    // data registers on same address
-  int tx;               // write-only
-  int rx;               // read-only
-} Tdata;
-
-typedef struct serial {
-  TctlStat cs;
-  Tdata    d;
-} Tserial;
+#include "uart_defs.c"
+
 
 
 #if 0
@@ -44,6 +17,8 @@ char s[32]; // = "the quick brown fox jumps over the lazy dog";
 char s[32]; // = "               ";
 #endif
 
+#define SPEED 1
+
 int main(void) { // receive a string through the UART serial interface
   int i;
   volatile int state;
@@ -53,22 +28,23 @@ int main(void) { // receive a string through the UART serial interface
 
   uart = (void *)IO_UART_ADDR; // bottom of UART address range
 
+  // reset all UART's signals
   ctrl.ign   = 0;
-  ctrl.rts   = 0;   // make RTS=0 to hold RemoteUnit
+  ctrl.rts   = 0;      // make RTS=0 to keep RemoteUnit inactive
   ctrl.ign2  = 0;
   ctrl.intTX = 0;
   ctrl.intRX = 0;
-  ctrl.speed = 1;   // operate at the second highest data rate
+  ctrl.speed = SPEED;  // operate at the second highest data rate
   uart->cs.ctl = ctrl;
 
   i = -1;
 
   ctrl.ign   = 0;
-  ctrl.rts   = 1;   // make RTS=1 to activate RemoteUnit
+  ctrl.rts   = 1;      // make RTS=1 to activate RemoteUnit
   ctrl.ign2  = 0;
   ctrl.intTX = 0;
   ctrl.intRX = 0;
-  ctrl.speed = 1;   // operate at the second highest data rate
+  ctrl.speed = SPEED;  // operate at the second highest data rate
   uart->cs.ctl = ctrl;
 
   do {
diff --git a/cMIPS/tests/uarttx.c b/cMIPS/tests/uarttx.c
index a9b68c8d3cd19216a8efad220df70ca53cad7a97..7aace2b7c09655fbf90c72a010620ee04d836c66 100644
--- a/cMIPS/tests/uarttx.c
+++ b/cMIPS/tests/uarttx.c
@@ -1,42 +1,15 @@
+//
+// Test UART's transmission circuit.
+//
+// Remote unit receives a string over the serial line and prints it
+//   on the simulator's standard output.
+//
 
 #include "cMIPS.h"
 
-typedef struct control { // control register fields (uses only ls byte)
-  int ign   : 24,        // ignore uppermost bits
-    rts     : 1,         // Request to Send output (bit 7)
-    ign2    : 2,         // bits 6,5 ignored
-    intTX   : 1,         // interrupt on TX buffer empty (bit 4)
-    intRX   : 1,         // interrupt on RX buffer full (bit 3)
-    speed   : 3;         // 4,8,16..256 tx-rx clock data rates  (bits 0..2)
-} Tcontrol;
-
-typedef struct status {  // status register fields (uses only ls byte)
-  unsigned int ign : 24, // ignore uppermost 3 bytes
-  cts     : 1,           // Clear To Send input=1 (bit 7)
-  txEmpty : 1,           // TX register is empty (bit 6)
-  rxFull  : 1,           // octet available from RX register (bit 5)
-  int_TX_empt: 1,        // interrupt pending on TX empty (bit 4)
-  int_RX_full: 1,        // interrupt pending on RX full (bit 3)
-  ign1    : 1,           // ignored (bit 2)
-  framing : 1,           // framing error (bit 1)
-  overun  : 1;           // overun error (bit 0)
-} Tstatus;
-
-
-typedef union ctlStat { // control + status on same address
-  Tcontrol  ctl;        // write-only
-  Tstatus   stat;       // read-only
-} TctlStat;
-
-typedef union data {    // data registers on same address
-  int tx;               // write-only
-  int rx;               // read-only
-} Tdata;
-
-typedef struct serial {
-  TctlStat cs;
-  Tdata    d;
-} Tserial;
+#include "uart_defs.c"
+
+
 
 #define LONG_STRING 1
 
@@ -60,14 +33,15 @@ int strcopy(const char *y, char *x)
 
 #define SPEED 0       // operate at the highest data rate
 
-#define COUNTING ((SPEED+1)*100) // how long to wait for last bits to be sent out
+// how long to wait for last bits to be sent out before ending simulation
+#define COUNTING ((SPEED+1)*100) 
 
 
 int main(void) { // send a string through the UART serial interface
   int i;
   volatile unsigned int state, val;
 
-  volatile Tserial *uart;  // tell GCC to not optimize away tests
+  volatile Tserial *uart;  // tell GCC to not optimize away any code
   Tcontrol ctrl;
 
   volatile int *counter;        // address of counter
@@ -78,9 +52,9 @@ int main(void) { // send a string through the UART serial interface
   s[0] = '1';   s[1] = '2';   s[2] = '3';   s[3] = '\0';
 #endif 
 
-  uart = (void *)IO_UART_ADDR;  // UART's address
+  uart    = (void *)IO_UART_ADDR;  // UART's address
 
-  counter = (int *)IO_COUNT_ADDR; // counter's address
+  counter = (void *)IO_COUNT_ADDR; // counter's address
 
   ctrl.speed = SPEED;
   ctrl.intTX = 0;  // no interrupts
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 9b8ea27c37562b71e5f797cb6282fae31f3995cc..b03d1a713dce1afe2b618e54211ffe58d7878560 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -1911,10 +1911,8 @@ begin
             i_update   := '1';
             i_stall    := '0';
           when cop0reg_COUNT    | cop0reg_COMPARE  | cop0reg_CAUSE   |
-               cop0reg_EntryLo0 | cop0reg_EntryLo1 | cop0reg_EntryHi =>
-            i_update   := '1';
-            i_stall    := '0';
-          when cop0reg_Index  | cop0reg_Context | cop0reg_Wired =>
+               cop0reg_EntryLo0 | cop0reg_EntryLo1 | cop0reg_EntryHi |
+               cop0reg_Index    | cop0reg_Context  | cop0reg_Wired   =>
             i_update   := '1';
             i_stall    := '0';
           when cop0reg_EPC =>