diff --git a/cMIPS/cop0.sav b/cMIPS/cop0.sav
index 53ae4a9054aedb452a1b83c6b6e4b6daa97abdd4..42540845a582be8ff5014432b22f71b46c283f5e 100644
--- a/cMIPS/cop0.sav
+++ b/cMIPS/cop0.sav
@@ -1,15 +1,15 @@
 [*]
 [*] GTKWave Analyzer v3.3.37 (w)1999-2012 BSI
-[*] Mon Apr 14 13:16:41 2014
+[*] Fri Mar 20 20:27:42 2015
 [*]
 [dumpfile] "/home/roberto/cMIPS/v_cMIPS.vcd"
-[dumpfile_mtime] "Fri Apr 11 21:14:44 2014"
-[dumpfile_size] 20098880
+[dumpfile_mtime] "Fri Mar 20 19:54:48 2015"
+[dumpfile_size] 21630696
 [savefile] "/home/roberto/cMIPS/cop0.sav"
-[timestart] 0
+[timestart] 5829800000
 [size] 1062 920
-[pos] 461 -1
-*-27.000000 127500000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+[pos] 854 0
+*-26.000000 5940000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] u_core.
 [treeopen] u_core.u_alu.
 [sst_width] 210
@@ -18,6 +18,8 @@
 [sst_vpaned_height] 291
 @28
 clk
+@200
+-    fetch
 @22
 u_core.pc[31:0]
 u_core.instr_fetched[31:0]
@@ -25,12 +27,14 @@ u_core.instr_fetched[31:0]
 u_core.pcsel[1:0]
 u_core.excp_pcsel[2:0]
 u_core.exception_stall
-@25
+@24
 u_core.nullify
+@29
+u_core.interrupt_taken
 @200
 -    decode, reg fetch
 @22
-u_core.instr_valid[31:0]
+u_core.rf_instruction[31:0]
 @24
 u_core.a_rs[4:0]
 u_core.a_rt[4:0]
@@ -59,28 +63,30 @@ u_core.epc[31:0]
 -  counter
 @28
 u_core.irq[5:0]
-@24
-u_interrupt_counter.q[15:0]
+@22
+u_interrupt_counter.q[29:0]
 @200
 -    memory
 @28
 u_core.b_sel[3:0]
 @22
+u_core.data_inp[31:0]
+u_core.data_out[31:0]
 d_addr[31:0]
-u_print_data.data[31:0]
 @28
 cpu_d_aval
 wr
 @200
 -    write-back
 @28
-u_core.wb_muxc[2:0]
 u_core.wb_wreg
 @24
 u_core.wb_a_c[4:0]
 @22
-u_core.wb_result[31:0]
 u_core.wb_cop0_val[31:0]
+@28
+u_core.wb_muxc[2:0]
+@22
 u_core.wb_c[31:0]
 [pattern_trace] 1
 [pattern_trace] 0
diff --git a/cMIPS/tests/badVAddr.s b/cMIPS/tests/badVAddr.s
index eb1b24e1f32869e51d38947639d727ebfd833dba..5f4ac62dbee169e2611671efd56f21ecbee835e6 100644
--- a/cMIPS/tests/badVAddr.s
+++ b/cMIPS/tests/badVAddr.s
@@ -8,17 +8,13 @@
 	.global exit
 	.ent    _start
 _start: nop
-        li   $k0, cop0_CAUSE_reset  # RESET, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
-
-        li   $k0, cop0_CAUSE_reset  # RESET, COUNTER stopped, no interrupts
-        mtc0 $k0, cop0_CAUSE
-
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-
-	nop
-	jal main
-	nop
+        la   $k0, main
+        nop
+        mtc0 $k0, cop0_EPC
+        nop
+        eret    # go into user mode, all else disabled
+        nop
 exit:	
 _exit:	nop	# flush pipeline
 	nop
@@ -37,19 +33,13 @@ _exit:	nop	# flush pipeline
 excp_180:	
 _excp_180:
         mfc0  $k0, cop0_CAUSE
-	sw    $k0,0($15)        # print CAUSE
-
-	addiu $7,$7,-1
-
-	li    $k0, 0x18000300   # disable interrupts
-        mtc0  $k0, cop0_STATUS
+	sw    $k0, 0($15)       # print CAUSE
 	mfc0  $k0, cop0_EPC     # fix return address
-	srl   $k0,$k0,2
-	sll   $k0,$k0,2
+	addiu $7, $7, -1
+	srl   $k0, $k0, 2
+	sll   $k0, $k0, 2
 	mtc0  $k0, cop0_EPC
-
-	li    $k0, cop0_CAUSE_reset # clear CAUSE
-	mtc0  $k0, cop0_CAUSE
+	nop
 	eret
 	.end _excp_180
 
diff --git a/cMIPS/tests/badVAddrMM.s b/cMIPS/tests/badVAddrMM.s
index d8bc0835f3369a76be3f01b7dd6d417c953ce678..112111015f8144235e0dddd054aa7b4b8eacd657 100644
--- a/cMIPS/tests/badVAddrMM.s
+++ b/cMIPS/tests/badVAddrMM.s
@@ -8,24 +8,21 @@
 	.global exit
 	.ent    _start
 _start: nop
-        li   $k0, cop0_STATUS_reset # RESET, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
-
-	li   $k0, cop0_CAUSE_reset # RESET, no exceptions 
-        mtc0 $k0, cop0_CAUSE
-
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-
-	nop
-	jal main
+        la   $k0, main
+        nop
+        mtc0 $k0, cop0_EPC
+        nop
+        eret    # go into user mode, all else disabled
+        nop
 	nop
 exit:	
-_exit:	nop	     # flush pipeline
+_exit:	nop	# flush pipeline
 	nop
 	nop
 	nop
 	nop
-	wait 0 # then stop VHDL simulation
+	wait    # then stop VHDL simulation
 	nop
 	nop
 	.end _start
@@ -46,44 +43,40 @@ _excp_180:
 	addiu $k1, $zero, -4	# -4 = 0xffff.fffc
 	and   $15,$15,$k1	# fix the invalid address
 
-	li    $k0, 0x18000300   # disable interrupts
-        mtc0  $k0, cop0_STATUS
-	#mfc0  $k0, cop0_EPC     # fix the return address, align to word
-	#and   $k0, $k0, $k1
 	eret
 	.end _excp_180
 
 
 	.org x_ENTRY_POINT,0    # normal code start
-main:	la $14,x_IO_BASE_ADDR
-	la $15,x_IO_BASE_ADDR
-	li $7,3                 # do 3 rounds
-	la $3,-1
+main:	la $14, x_IO_BASE_ADDR
+	la $15, x_IO_BASE_ADDR
+	li $7, 3                # do 3 rounds
+	la $3, -1
 	nop
 
-here:	addiu $3,$3,1
+here:	addiu $3, $3, 1
 	sw  $3, 0($15)          # exception handler decreases $7
-	beq $7,$zero, next      # there should be 3 exceptions: addr&{01,10,11}
+	beq $7, $zero, next     # there should be 3 exceptions: addr&{01,10,11}
 	nop			# of type AddrError store=x14
-	addu $15,$15,$3
+	addu $15, $15, $3
 	j here
 	nop
 	
 next:	li $29, '\n'           # to separate output
 	sw $29, x_IO_ADDR_RANGE($14)
-	la $15,x_DATA_BASE_ADDR
-	la $18,x_IO_BASE_ADDR
-	li $7,3
-	la $3,-1
-	sw  $7, 0($15)
+	la $15, x_DATA_BASE_ADDR
+	la $18, x_IO_BASE_ADDR
+	li $7, 3
+	la $3, -1
+	sw $7, 0($15)
 	nop
 
 there:	addiu $3,$3,1
 	lw  $3, 0($15)      	# there should be 3 exceptions: addr&{01,10,11}
 	sw  $7, 0($18)		# of type AddrError if/ld=x10
-	beq $7,$zero, after
+	beq $7, $zero, after
 	nop
-	addu $15,$15,$3
+	addu $15, $15, $3
 	j there
 	nop
 
@@ -92,15 +85,15 @@ after:	li $29, '\n'           	# to separate output
 	sw $29, x_IO_ADDR_RANGE($14)
 	la $14, x_IO_BASE_ADDR
 	la $15, x_IO_BASE_ADDR
-	li $7,3
-	la $3,-1
+	li $7, 3
+	la $3, -1
 	nop
 
-here2:	addiu $3,$3,1      	# there should be 3 exceptions: addr&{01,11}
+here2:	addiu $3, $3, 1      	# there should be 3 exceptions: addr&{01,11}
 	sh  $3, 0($15)		# of type AddrError store=x14
-	beq $7,$zero, next2
+	beq $7, $zero, next2
 	nop
-	addu $15,$15,$3
+	addu $15, $15, $3
 	j here2
 	nop
 
@@ -109,16 +102,16 @@ next2:	li $29, '\n'           # to separate output
 	sw $29, x_IO_ADDR_RANGE($14)
 	la $15, x_DATA_BASE_ADDR
 	la $18, x_IO_BASE_ADDR
-	li $7,3
-	la $3,-1
+	li $7, 3
+	la $3, -1
 	sw  $7, 0($15)
 	nop
 
 there2:	lh  $3, 0($15)      	# there should be 3 exceptions: addr&{01,11}
 	sw  $7, 0($18)		# of type AddrError if/ld=x10
-	beq $7,$zero, end
+	beq $7, $zero, end
 	nop
-	addu $15,$15,$3
+	addu $15, $15, $3
 	nop
 	j there2
 	nop
diff --git a/cMIPS/tests/break.s b/cMIPS/tests/break.s
index ffe3a95ab1672ccc712226bef0fb358889acc514..9492d9d61fb8657683707a75eafb4732eec97f04 100644
--- a/cMIPS/tests/break.s
+++ b/cMIPS/tests/break.s
@@ -7,25 +7,25 @@
 	.global exit
 	.set noreorder
 	.ent    _start
-_start: nop
-        li   $k0, cop0_STATUS_reset # RESET, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
-
-	li   $k0, cop0_CAUSE_reset # RESET, no exceptions
-        mtc0 $k0, cop0_CAUSE
 
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
+_start: nop
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	
-	nop
-	jal main
-	nop
+        la   $k0, main
+        nop
+        mtc0 $k0, cop0_EPC
+        nop
+        eret     # go into user mode, all else disabled
+        nop
 exit:	
-_exit:	nop	     # flush pipeline
+_exit:	nop	 # flush pipeline
 	nop
 	nop
 	nop
 	nop
-	wait 0       # then stop VHDL simulation
+	wait     # then stop VHDL simulation
 	nop
 	nop
 	.end _start
@@ -41,10 +41,6 @@ _excp_180:
         li    $k0, '\n'
 	sw    $k0, x_IO_ADDR_RANGE($15)  # print new-line
 	addiu $7, $7, -1
-	li    $k0, 0x18000300   # disable interrupts
-        mtc0  $k0, cop0_STATUS
-	li    $k0, cop0_CAUSE_reset # RESET, no exceptions
-        mtc0  $k0, cop0_CAUSE
 	eret
 	.end _excp_180
 
@@ -54,13 +50,12 @@ _excp_180:
 	.ent _excp_200
 excp_200:
 _excp_200:
+        ##
+        ## this exception should not happen
+        ##
         mfc0  $k0, cop0_CAUSE
 	sw    $k0,0($15)        # print CAUSE
-	addiu $7,$7,-1
-	li    $k0, 0x18000300   # disable interrupts
-        mtc0  $k0, cop0_STATUS
-	li    $k0, cop0_CAUSE_reset # RESET, no exceptions
-        mtc0  $k0, cop0_CAUSE
+	addiu $7,$7,+1
 	eret
 	.end _excp_200
 
@@ -71,12 +66,10 @@ main:	la $15,x_IO_BASE_ADDR
 	li $5,0
 here:	sw $5, 0($15)
 
-	li    $6, 0x18000302       # kernel mode, disable interrupts
-	mtc0  $6, cop0_STATUS
 	addiu $5, $5,2
 	break 15
 	bne   $7, $zero, here
-
 	nop
+
 	j exit
 	nop
diff --git a/cMIPS/tests/doTests.sh b/cMIPS/tests/doTests.sh
index 63b755cd8ada23cc3a917812d0294808de4cafeb..8e8a4bd92352f1de1b65aa61e1fc1aaa9d74b882 100755
--- a/cMIPS/tests/doTests.sh
+++ b/cMIPS/tests/doTests.sh
@@ -63,7 +63,7 @@ done
 
 touch input.data serial.inp
 
-a_FWD="fwdAddAddAddSw fwd_SW lwFWDsw lwFWDsw2 slt32 reg0"
+a_FWD="fwdAddAddAddSw fwd_SW lwFWDsw lwFWDsw2 slt32 slt_u_32 slt_s_32 reg0"
 a_CAC="dCacheTst lhUshUCache lbUsbUCache lbsbCache dCacheTstH dCacheTstB"
 a_BEQ="lw-bne bXtz sltbeq beq_dlySlot jr_dlySlot"
 a_FUN="jaljr jr_2 jal_fun_jr jalr_jr bltzal_fun_jr"
diff --git a/cMIPS/tests/mfc0CONFIG.s b/cMIPS/tests/mfc0CONFIG.s
index c6791dcf81ee19166d3315355cf7c7b63a144936..85b96176c90bd4fb26456c99b7c0292cb3a0376c 100644
--- a/cMIPS/tests/mfc0CONFIG.s
+++ b/cMIPS/tests/mfc0CONFIG.s
@@ -7,12 +7,7 @@
 	.global exit
 	.ent    _start
 _start: nop
-        li   $k0, 0x10000002  # RESET_STATUS, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	li   $k0, 0x0000007c # CAUSE_STATUS, no exceptions 
-        mtc0 $k0, cop0_CAUSE # clear CAUSE
-
 	nop
 	jal main
 	nop
@@ -27,23 +22,20 @@ _exit:	nop	     # flush pipeline
 	nop
 	.end _start
 	
-	.org x_EXCEPTION_0180,0 # exception vector_180 at 0x00000060
+	.org x_EXCEPTION_0180,0 # exception vector_180
 	.global _excp_180
-	.global excp_180
 	.ent _excp_180
-excp_180:	
 _excp_180:
         mfc0  $k0, cop0_CAUSE
-	sw    $k0,0($15)        # print CAUSE
+	sw    $k0,0($15)         # print CAUSE
 	addiu $7,$7,-1
-	li    $k0, 0x10000300   # disable interrupts
+	li    $k0, 0x10000300    # disable interrupts
         mtc0  $k0, cop0_STATUS
-	mtc0  $zero, cop0_CAUSE # clear CAUSE
 	eret
 	.end _excp_180
 
 
-	.org x_ENTRY_POINT,0      # normal code starts at 0x0000.0100
+	.org x_ENTRY_POINT,0     # normal code starts at ENTRY_POINT
 main:	la $15,x_IO_BASE_ADDR
 	nop
 	mfc0 $6,cop0_STATUS
@@ -56,8 +48,8 @@ main:	la $15,x_IO_BASE_ADDR
 	sw   $6, 0($15)
 	nop
 	mfc0 $6,cop0_CONFIG,1
-	li   $7, 0xfe00003f      # mask off cache configuration
-	and  $6,$6,$7            #  so changes in caches won't break this
+	li   $7, 0x8000007f    # mask off TLB/cache configuration
+	and  $6,$6,$7          #  so changes in TLB/caches won't break this
 	sw   $6, 0($15)
 
 	j exit
diff --git a/cMIPS/tests/overflow.expected b/cMIPS/tests/overflow.expected
index b37fe4218bee6c1f3cddd7906d8a31ff94cf6dc8..3695f814613effc47b8db5f2a6ec372122ddfff6 100644
--- a/cMIPS/tests/overflow.expected
+++ b/cMIPS/tests/overflow.expected
@@ -1,11 +1,20 @@
-00000030
+08800030
 80000000
+
 ffffffff
+
 80000000
+
 ffffffff
+
 00000000
-00000030
+
+08800030
 7fffffff
+
+7fffffff
+
 00000000
-00000030
+
+08800030
 80000000
diff --git a/cMIPS/tests/overflow.s b/cMIPS/tests/overflow.s
index ba66b46d7cc81ac263ecd261dd4b0224eddefd92..9faab0a5e14d04482568d5df54788f90da05af25 100644
--- a/cMIPS/tests/overflow.s
+++ b/cMIPS/tests/overflow.s
@@ -6,23 +6,25 @@
 	.global _exit
 	.global exit
 	.ent    _start
+
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
 _start: nop
-        li   $k0, 0x18000002  # RESET_STATUS, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	li   $k0, 0x0000007c # CAUSE_STATUS, no exceptions 
-        mtc0 $k0, cop0_CAUSE # clear CAUSE
-
-	nop
-	jal main
-	nop
+        la   $k0, main
+        nop
+        mtc0 $k0, cop0_EPC
+        nop
+        eret     # go into user mode, all else disabled
+        nop
 exit:	
-_exit:	nop	     # flush pipeline
+_exit:	nop	 # flush pipeline
 	nop
 	nop
 	nop
 	nop
-	wait 0 # then stop VHDL simulation
+	wait     # then stop VHDL simulation
 	nop
 	nop
 	.end _start
@@ -34,15 +36,20 @@ _exit:	nop	     # flush pipeline
 excp_180:	
 _excp_180:
         mfc0  $k0, cop0_CAUSE
+	#sw    $k0, 0($15)       # print CAUSE = 0000.0030
+	#sw    $k0, 0($15)       # print CAUSE = 0000.0030
 	sw    $k0, 0($15)       # print CAUSE = 0000.0030
-	li    $k0, 0x18000300   #   disable interrupts
+	li    $k0, 0x18000302   #   disable interrupts
 	mtc0  $k0, cop0_STATUS  #   and return
+	nop
 	eret
 	.end _excp_180
 
 
 	.org x_ENTRY_POINT,0    # normal code starts at 0x0000.0100
 main:	la $15,x_IO_BASE_ADDR
+	la $16,x_IO_BASE_ADDR+x_IO_ADDR_RANGE
+	li $17, '\n'
 
 	# signed overflow       
 	li  $3,0x7FFFFFFF	# positive +s positive -> positive
@@ -50,10 +57,16 @@ main:	la $15,x_IO_BASE_ADDR
 	add $5,$3,$4
 	sw  $5, 0($15)		# ===exception=== 0x8000.0000 == negative
 
+	nop
+	sw $17, 0($16)
+	
 	# no overflow
 	li   $6,0xFFFFFFFe      # negative + positive -> no overflow
 	addi $7,$6,1
 	sw   $7, 0($15)		# 0xffff.ffff == negative
+
+	nop
+	sw $17, 0($16)
 	
 	# add unsigned, no overflow
 	li   $3,0x7FFFFFFF      # positive +u positive -> positive
@@ -61,10 +74,16 @@ main:	la $15,x_IO_BASE_ADDR
 	addu $5,$3,$4
 	sw   $5, 0($15)		# 0x8000.0000 == unsigned positive
 
+	nop
+	sw $17, 0($16)
+	
 	# add unsigned, no overflow
 	li    $6,0xFFFFFFFe	# negative +u positive -> positive
 	addiu $7,$6,1
 	sw    $7, 0($15)	# 0xffff.ffff == unsigned positive
+
+	nop
+	sw $17, 0($16)
 	
 	# no overflow
 	li   $3,0xFFFFFFFF	# negative +s positive -> negative 
@@ -72,17 +91,34 @@ main:	la $15,x_IO_BASE_ADDR
 	add  $5,$3,$4
 	sw   $5, 0($15)		# 0x0000.0000
 
+	nop
+	sw $17, 0($16)
+	
 	# signed overflow
 	li   $6,0x80000000      # negative -s negative -> negative
 	addi $7,$6,-1
 	sw   $7, 0($15)		# ===exception=== 0x7fff.ffff == positive
 
+	nop
+	sw $17, 0($16)
+	
+	# unsigned overflow
+	li   $6,0x80000000      # positive -u negative -> positive
+	addiu $7,$6,-1
+	sw   $7, 0($15)		# 0x7fff.ffff == positive
+
+	nop
+	sw $17, 0($16)
+	
 	# no overflow, unsigned
 	li   $3,0xFFFFFFFF      # positive +u positive -> positive
 	li   $4,0x00000001
 	addu $5,$3,$4
 	sw   $5, 0($15)		# 0x0000.0000  ok since instr is an addU
 
+	nop
+	sw $17, 0($16)
+	
 	# signed overflow 
 	li    $6,0x7FFFFFFe	# positive +s positive -> positive
 	addi  $7,$6,2
diff --git a/cMIPS/tests/syscall.expected b/cMIPS/tests/syscall.expected
index f8aeb531aa390d9bb32eec9fb978db53ae0492fc..5b606746ff7700973befcbd0b4e7ff1d567b3b04 100644
--- a/cMIPS/tests/syscall.expected
+++ b/cMIPS/tests/syscall.expected
@@ -1,8 +1,8 @@
 00000000
-00000020
+08800020
 00000002
-00000020
+08800020
 00000004
-00000020
+08800020
 00000006
-00000020
+08800020
diff --git a/cMIPS/tests/syscall.s b/cMIPS/tests/syscall.s
index 562d28456f85cd2794354d6ddde4b619762b9f6e..72e025c2c957d1e3fb6f12bc4ac99d6ae1f826c1 100644
--- a/cMIPS/tests/syscall.s
+++ b/cMIPS/tests/syscall.s
@@ -6,23 +6,24 @@
 	.global _exit
 	.global exit
 	.ent    _start
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
 _start: nop
-        li   $k0, 0x18000002  # RESET_STATUS, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	li   $k0, 0x0000007c # CAUSE_STATUS, no exceptions 
-        mtc0 $k0, cop0_CAUSE # clear CAUSE
-
-	nop
-	jal main
-	nop
+        la   $k0, main
+        nop
+        mtc0 $k0, cop0_EPC
+        nop
+        eret     # go into user mode, all else disabled
+        nop
 exit:	
-_exit:	nop	     # flush pipeline
+_exit:	nop	 # flush pipeline
 	nop
 	nop
 	nop
 	nop
-	wait  # then stop VHDL simulation
+	wait     # then stop VHDL simulation
 	nop
 	nop
 	.end _start
@@ -39,25 +40,26 @@ excp_200:
 _excp_200:
         mfc0  $k0, cop0_CAUSE
 	sw    $k0,0($15)        # print CAUSE
-	addiu $7,$7,-1
 	li    $k0, 0x18000300   # disable interrupts
         mtc0  $k0, cop0_STATUS
-	mtc0  $zero, cop0_CAUSE # clear CAUSE
+	addiu $7,$7,-1
 	eret
 	.end _excp_180
 
 
 	.org x_ENTRY_POINT,0      # normal code starts at 0x0000.0100
-main:	la $15,x_IO_BASE_ADDR
-	li $7,4
-	li $5,0
+main:	la $15, x_IO_BASE_ADDR
+	li $7, 4
+	li $5, 0
 here:	sw $5, 0($15)
 
-	li   $6, 0x18000302   # kernel mode, disable interrupts
-	mtc0 $6,cop0_STATUS
-	addiu $5,$5,2
+	#li   $6, 0x18000302       # kernel mode, disable interrupts
+	#mtc0 $6, cop0_STATUS
+	addiu $5, $5, 2
 	syscall
-	bne   $7,$zero, here
-
+	bne   $7, $zero, here
+	nop
+	
 	j exit
+	nop
 	
diff --git a/cMIPS/tests/teq_tne.expected b/cMIPS/tests/teq_tne.expected
index 66d189011bc342dfeda2645539f2cd049ba338ad..0b8d7af318c61f72b91c6ed28faa3fecbb02450c 100644
--- a/cMIPS/tests/teq_tne.expected
+++ b/cMIPS/tests/teq_tne.expected
@@ -9,36 +9,36 @@
 00000004
 00000006
 00000008
-00000034
+08800034
 00000000
 00000002
 00000004
 00000006
 00000008
-00000034
+08800034
 
 00000004
-00000034
+08800034
 00000003
-00000034
+08800034
 00000002
-00000034
+08800034
 00000001
-00000034
+08800034
 
 0000000a
 00000008
 00000006
 00000004
-00000034
+08800034
 
 00000005
-00000034
+08800034
 00000004
-00000034
+08800034
 00000003
-00000034
+08800034
 00000002
-00000034
+08800034
 00000001
-00000034
+08800034
diff --git a/cMIPS/tests/teq_tne.s b/cMIPS/tests/teq_tne.s
index caf9f4ef21e49478931b6102eef5dec175c68f62..fc58e3270efa53eb34c87c47faae00752caf0e20 100644
--- a/cMIPS/tests/teq_tne.s
+++ b/cMIPS/tests/teq_tne.s
@@ -7,12 +7,16 @@
 	.global _exit
 	.global exit
 	.ent    _start
+
+	##
+	## reset leaves processor in kernel mode, all else disabled
+	##
 _start: nop
-        li   $k0, 0x18000002  # RESET_STATUS, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	nop
-	jal main
+        la   $k0, main
+        mtc0 $k0, cop0_EPC
+	ehb
+        eret     # go into user mode, all else disabled
 	nop
 exit:	
 _exit:	nop	 # flush pipeline
@@ -30,28 +34,30 @@ _exit:	nop	 # flush pipeline
 	.ent _excp_180
 excp_180:	
 _excp_180:
+	##
+	## print CAUSE, reset counter, decrement iteration control
+	##
         mfc0  $k0, cop0_CAUSE
 	sw    $k0,0($15)       # print CAUSE
 	li    $5,0
 	addiu $7,$7,-1         # decrement iteration control
-	nop
 excp_180ret:
-	li   $k0, 0x18000000   # disable interrupts
-        mtc0 $k0, cop0_STATUS
-	mtc0 $zero, cop0_CAUSE # clear CAUSE
 	eret
 	.end _excp_180
 
 
 	.org x_EXCEPTION_0200,0
 	.ent _excp_200
-excp_200:	
-_excp_200:	
+excp_200:			
+_excp_200:
+	##
+	## this exception should not happen
+	##
 	li   $28,-1
 	sw   $28, 0($15)       # signal exception to std_out
 	sw   $28, 0($15)
         mfc0 $k0, cop0_CAUSE
-	sw   $k0,0($15)        # print CAUSE
+	sw   $k0, 0($15)       # print CAUSE
 	sw   $28, 0($15)
 	sw   $28, 0($15)
 	eret                   #   and return
@@ -62,29 +68,41 @@ _excp_200:
 	.org x_ENTRY_POINT,0
 main:	la    $15, x_IO_BASE_ADDR # print out address (simulator's stdout)
 	li    $7, 3
-	li    $6, 10
+	li    $6, 10		# limit = 10
 	li    $5, 0             # value to print
 
+	##
+	## print sequence 2,4,6,8,cause=34, three times
+	##
 here:	sw    $5, 0($15)        # print out value: 3x(2,4,6,8,34)
 	addiu $5, $5, 2         # value += 2
-	teq   $5, $6            # trap if value = 10, $7--
+	teq   $5, $6            # trap if value = 10, handler does $7--
 	beq   $7, $zero, there  # if done 3 rounds, go on to next test
 	nop
 	b here
 	nop
-	
+
+	## print out '\n' to separate tests
 there:	li    $28, '\n'
-	sw    $28, x_IO_ADDR_RANGE($15)     # print out '\n' to separate tests
-	li   $7, 4              # will do 4 traps/exceptions
+	sw    $28, x_IO_ADDR_RANGE($15)     
+
+	##
+	## print sequence 4,cause,3,cause,2,cause,1,cause
+	##
+	li    $7, 4             # will do 4 traps/exceptions
 
 then:	sw   $7, 0($15)         # print out number of rounds to do: (4,3,2,1)
 	tne  $5, $7             # trap if value != 4, $7--
 	bnez $7, then
 	nop
 
+	## print out '\n' to separate tests	
 	li    $28, '\n'
-	sw    $28, x_IO_ADDR_RANGE($15)     # print out '\n' to separate tests
+	sw    $28, x_IO_ADDR_RANGE($15)
 
+	##
+	## print sequence a,8,6,4,cause=34
+	##
 	li    $7, 1
 	li    $6, 10
 	nop
@@ -95,9 +113,13 @@ here2:	sw    $6, 0($15)        # print out values: (a,8,6,4,34)
 	b     here2
 	nop
 
+	## print out '\n' to separate tests	
 there2:	li    $28, '\n'
 	sw    $28, x_IO_ADDR_RANGE($15)     # print out '\n' to separate tests
 	
+	##
+	## print sequence  5,cause,4,cause,3,cause,2,cause,1,cause=34
+	##
 	li   $7, 5		# will do 5 rounds
 then2:	sw   $7, 0($15)         # print out values: (5,34,4,34,3,34,2,34,1,34)
 	tnei $7, 0              # trap handler decreases $7
@@ -105,4 +127,4 @@ then2:	sw   $7, 0($15)         # print out values: (5,34,4,34,3,34,2,34,1,34)
 	nop
 	j    exit
 	nop
-	
+	
\ No newline at end of file
diff --git a/cMIPS/tests/tlt_tlti.expected b/cMIPS/tests/tlt_tlti.expected
index c12e87815a3922b98850ab9a806e63b65b3db2ba..f7f8131e40f14b8f221b642fd9c3078b5a27f3cb 100644
--- a/cMIPS/tests/tlt_tlti.expected
+++ b/cMIPS/tests/tlt_tlti.expected
@@ -1,40 +1,40 @@
 00000000
 08800034
 00000002
-00000034
+08800034
 00000004
-00000034
+08800034
 00000006
-00000034
+08800034
 00000000
 00000000
 00000000
-00000034
+08800034
 00000002
-00000034
+08800034
 00000004
-00000034
+08800034
 00000006
-00000034
+08800034
 00000000
 00000000
 00000000
 0000000a
-00000034
+08800034
 00000008
-00000034
+08800034
 00000006
-00000034
+08800034
 00000004
-00000034
+08800034
 00000000
 00000000
 0000000a
-00000034
+08800034
 00000008
-00000034
+08800034
 00000006
-00000034
+08800034
 00000004
-00000034
+08800034
 00000000
diff --git a/cMIPS/tests/tlt_tlti.s b/cMIPS/tests/tlt_tlti.s
index dddd520e461f9cfa504ca235b493e2a73c8806ee..bb50ef997ce79e22d40fa9630978d9dd9a4e3eea 100644
--- a/cMIPS/tests/tlt_tlti.s
+++ b/cMIPS/tests/tlt_tlti.s
@@ -1,4 +1,7 @@
-	# mips-as -O0 -EL -mips32r2
+        ##
+        ## this test is run in User Mode
+        ##
+ 	# mips-as -O0 -EL -mips32r2
 	.include "cMIPS.s"
 	.text
 	.align 2
@@ -6,12 +9,15 @@
 	.global _exit
 	.global exit
 	.ent    _start
+
+	##
+	## reset leaves processor in kernel mode, all else disabled
+	##
 _start: nop
-        li   $k0, 0x10000002  # RESET_STATUS, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
 	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	nop
-	jal main
+	la   $k0, main
+	mtc0 $k0, cop0_EPC
+	eret # go into user mode, all else disabled
 	nop
 exit:	
 _exit:	nop	     # flush pipeline
@@ -34,13 +40,10 @@ excp_180:
 _excp_180:
 excp_200:	
 _excp_200:
-        mfc0 $k0, cop0_CAUSE
-	j excp_handler
-	nop
+        mfc0  $k0, cop0_CAUSE
+	sw    $k0,0($15)        # print CAUSE to stdout
+	addiu $7,$7,-1		# and decrement $7
 excp_180ret:
-	li   $k0, 0x10000000   # disable interrupts
-        mtc0 $k0, cop0_STATUS
-	mtc0 $zero, cop0_CAUSE # clear CAUSE
 	eret
 	.end _excp_180
 
@@ -52,12 +55,6 @@ excp_180ret:
 #	nop
 #	.end _excp_200
 
-excp_handler:
-	sw    $k0,0($15)        # print CAUSE to stdout
-	addiu $7,$7,-1		# and decrement $7
-	j excp_180ret
-
-	
 	.org x_ENTRY_POINT,0      # normal code starts at 0x0000.0100
 main:	la    $15,x_IO_BASE_ADDR
 	li    $7,4
diff --git a/cMIPS/tests/tltu_tgeu.s b/cMIPS/tests/tltu_tgeu.s
index e702ce619efb566bc8c66b553bfaebb59d9b454e..3fbde78534021eb51613f1e4bcf929919f6e1631 100644
--- a/cMIPS/tests/tltu_tgeu.s
+++ b/cMIPS/tests/tltu_tgeu.s
@@ -1,3 +1,6 @@
+        ##
+        ## this test is run in User Mode
+        ##
 	# mips-as -O0 -EL -mips32r2
 	.include "cMIPS.s"
 	.text
@@ -7,15 +10,20 @@
 	.global _exit
 	.global exit
 	.ent    _start
+
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
 _start: nop
-        li   $k0, cop0_STATUS_reset # RESET, kernel mode, all else disabled
-        mtc0 $k0, cop0_STATUS
-        li   $k0, cop0_CAUSE_reset  # RESET, COUNTER stopped, no interrupts
-        mtc0 $k0, cop0_CAUSE
-	li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
-	nop
-	jal main
-	nop
+        li   $sp,(x_DATA_BASE_ADDR+x_DATA_MEM_SZ-8) # initialize SP: ramTop-8
+        la   $k0, main
+        mtc0 $k0, cop0_EPC
+        nop
+        nop
+        nop
+        eret # go into user mode, all else disabled
+        nop
+
 exit:	
 _exit:	nop	# flush pipeline
 	nop
@@ -32,10 +40,10 @@ _exit:	nop	# flush pipeline
 excp_180:	
 _excp_180:
         mfc0  $k0, cop0_CAUSE
-        sw    $k0,0($15)       # print CAUSE
         addiu $7,$7,-1         # decrement iteration control
-	li    $k0, 0x18000000  # disable interrupts
-        mtc0  $k0, cop0_STATUS
+        sw    $k0,0($15)       # print CAUSE
+	#li    $k0, 0x18000000  # disable interrupts
+        #mtc0  $k0, cop0_STATUS
 	eret
 	.end _excp_180
 
@@ -44,6 +52,9 @@ _excp_180:
         .ent _excp_200
 excp_200:
 _excp_200:
+        ##
+        ## this exception should not happen
+        ##
         li   $28,'\n'
         sw   $28, x_IO_ADDR_RANGE($15)  # signal exception to std_out
         sw   $28, x_IO_ADDR_RANGE($15)  #  print two \n
diff --git a/cMIPS/v_irx.sav b/cMIPS/v_irx.sav
index 65d6caa828d65c00945a4c9026ddcb18e092f384..d270feb4c17675c87575c9164ac4b7065dbd4f05 100644
--- a/cMIPS/v_irx.sav
+++ b/cMIPS/v_irx.sav
@@ -1,31 +1,28 @@
 [*]
 [*] GTKWave Analyzer v3.3.37 (w)1999-2012 BSI
-[*] Tue Apr 29 21:22:49 2014
+[*] Mon Mar 23 00:21:20 2015
 [*]
 [dumpfile] "/home/roberto/cMIPS/v_cMIPS.vcd"
-[dumpfile_mtime] "Tue Apr 29 20:55:49 2014"
-[dumpfile_size] 6651472
-[savefile] "/home/roberto/cMIPS/v_irx.sav"
-[timestart] 8343000000
-[size] 1062 891
-[pos] -1 31
-*-30.000000 9833400000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+[dumpfile_mtime] "Mon Mar 23 00:06:43 2015"
+[dumpfile_size] 21610740
+[savefile] "/home/roberto/cmips.git/cMIPS/v_irx.sav"
+[timestart] 6484000000
+[size] 1062 950
+[pos] 854 0
+*-28.000000 7780000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] u_simple_uart.
 [sst_width] 210
 [signals_width] 249
 [sst_expanded] 1
-[sst_vpaned_height] 259
+[sst_vpaned_height] 280
 @28
 clk
 @22
 u_core.pc[31:0]
-u_core.instr_fetched[31:0]
-@28
-u_core.exception_stall
 @200
 -    decode, reg fetch
 @22
-u_core.instr_valid[31:0]
+u_core.rf_instruction[31:0]
 u_core.regs_a[31:0]
 u_core.regs_b[31:0]
 @200
@@ -50,11 +47,20 @@ u_core.status[31:0]
 u_core.b_sel[3:0]
 @22
 d_addr[31:0]
-u_print_data.data[31:0]
+u_core.data_inp[31:0]
+u_core.data_out[31:0]
 @28
 cpu_d_aval
 u_core.mm_wrmem
 @200
+-    write-back
+@28
+u_core.wb_muxc[2:0]
+u_core.wb_wreg
+@22
+u_core.wb_a_c[4:0]
+u_core.wb_c[31:0]
+@200
 -  UART
 @28
 u_simple_uart.u_uart.s_stat
@@ -87,13 +93,5 @@ u_simple_uart.u_uart.rxdat
 u_uart_remota.tx_dbg_st[31:0]
 @28
 u_uart_remota.outdat
-@200
--    write-back
-@28
-u_core.wb_muxc[2:0]
-u_core.wb_wreg
-@22
-u_core.wb_a_c[4:0]
-u_core.wb_c[31:0]
 [pattern_trace] 1
 [pattern_trace] 0
diff --git a/cMIPS/v_rx.sav b/cMIPS/v_rx.sav
index 34d2fccf5889b42ec407ab563ec7eb4a3ca90e9f..409d8e99a3f039333358bb92234aa8b6b050542b 100644
--- a/cMIPS/v_rx.sav
+++ b/cMIPS/v_rx.sav
@@ -1,15 +1,15 @@
 [*]
 [*] GTKWave Analyzer v3.3.37 (w)1999-2012 BSI
-[*] Tue Apr 29 12:58:31 2014
+[*] Fri Mar 20 22:47:56 2015
 [*]
 [dumpfile] "/home/roberto/cMIPS/v_cMIPS.vcd"
-[dumpfile_mtime] "Tue Apr 29 12:56:03 2014"
-[dumpfile_size] 3613672
-[savefile] "/home/roberto/cMIPS/v_rx.sav"
-[timestart] 19270000000
+[dumpfile_mtime] "Fri Mar 20 22:35:01 2015"
+[dumpfile_size] 21835141
+[savefile] "/home/roberto/cmips.git/cMIPS/v_rx.sav"
+[timestart] 57013700000
 [size] 1062 914
-[pos] -1 2
-*-31.000000 23100000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+[pos] 854 0
+*-27.000000 57200000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] u_simple_uart.
 [sst_width] 210
 [signals_width] 249
@@ -23,7 +23,7 @@ u_core.instr_fetched[31:0]
 @200
 -    decode, reg fetch
 @22
-u_core.instr_valid[31:0]
+u_core.rf_instruction[31:0]
 u_core.regs_a[31:0]
 u_core.regs_b[31:0]
 @200
@@ -39,12 +39,21 @@ u_core.result[31:0]
 @28
 u_core.b_sel[3:0]
 @22
+u_core.data_inp[31:0]
+u_core.data_out[31:0]
 d_addr[31:0]
-u_print_data.data[31:0]
 @28
 cpu_d_aval
 u_core.mm_wrmem
 @200
+-    write-back
+@28
+u_core.wb_muxc[2:0]
+u_core.wb_wreg
+@22
+u_core.wb_a_c[4:0]
+u_core.wb_c[31:0]
+@200
 -  UART
 @28
 u_simple_uart.u_uart.s_stat
@@ -60,16 +69,14 @@ u_simple_uart.u_uart.s_rx
 u_simple_uart.u_uart.rxreg[7:0]
 @200
 -  reception
-@25
+@24
 u_simple_uart.u_uart.rxcpu_dbg_st[31:0]
 @28
-u_simple_uart.u_uart.car_rx
 u_simple_uart.u_uart.rx_bfr_full
 @24
 u_simple_uart.u_uart.rx_dbg_st[31:0]
 @28
 u_simple_uart.u_uart.sta_recv_sto[9:0]
-u_simple_uart.u_uart.reset_rxck
 u_simple_uart.u_uart.rxclk
 u_simple_uart.u_uart.rxdat
 @200
@@ -81,14 +88,6 @@ u_uart_remota.tx_dbg_st[31:0]
 @28
 u_uart_remota.outdat
 @200
--    write-back
-@28
-u_core.wb_muxc[2:0]
-u_core.wb_wreg
-@22
-u_core.wb_a_c[4:0]
-u_core.wb_c[31:0]
-@200
 -
 [pattern_trace] 1
 [pattern_trace] 0
diff --git a/cMIPS/v_tx.sav b/cMIPS/v_tx.sav
index 2835b63209ace157283a8a9a5579fa7ecb8cf4a3..740042cb21205adcd89312087678d4369c97269a 100644
--- a/cMIPS/v_tx.sav
+++ b/cMIPS/v_tx.sav
@@ -1,31 +1,33 @@
 [*]
 [*] GTKWave Analyzer v3.3.37 (w)1999-2012 BSI
-[*] Tue Apr 29 11:12:13 2014
+[*] Fri Mar 20 22:26:10 2015
 [*]
 [dumpfile] "/home/roberto/cMIPS/v_cMIPS.vcd"
-[dumpfile_mtime] "Tue Apr 29 10:40:31 2014"
-[dumpfile_size] 1744109
-[savefile] "/home/roberto/cMIPS/v_tx.sav"
-[timestart] 1526000000
-[size] 1062 917
-[pos] 20 -1
-*-30.000000 2000000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
+[dumpfile_mtime] "Fri Mar 20 22:00:50 2015"
+[dumpfile_size] 32537287
+[savefile] "/home/roberto/cmips.git/cMIPS/v_tx.sav"
+[timestart] 83864500000
+[size] 1062 927
+[pos] -1 -1
+*-26.000000 83960000000 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 [treeopen] u_core.
 [treeopen] u_core.u_alu.
 [treeopen] u_simple_uart.
 [sst_width] 210
 [signals_width] 235
 [sst_expanded] 1
-[sst_vpaned_height] 268
+[sst_vpaned_height] 271
 @28
 clk
+@200
+-    fetch
 @22
 u_core.pc[31:0]
 u_core.instr_fetched[31:0]
 @200
 -    decode, reg fetch
 @22
-u_core.instr_valid[31:0]
+u_core.rf_instruction[31:0]
 u_core.regs_a[31:0]
 u_core.regs_b[31:0]
 @200
@@ -46,7 +48,16 @@ d_addr[31:0]
 cpu_d_aval
 u_core.mm_wrmem
 @22
-u_print_data.data[31:0]
+cpu_data_inp[31:0]
+cpu_data_out[31:0]
+@200
+-    write-back
+@28
+u_core.wb_muxc[2:0]
+u_core.wb_wreg
+@22
+u_core.wb_a_c[4:0]
+u_core.wb_c[31:0]
 @200
 -   UART
 @28
@@ -63,7 +74,6 @@ u_simple_uart.u_uart.txreg[7:0]
 u_simple_uart.u_uart.txcpu_dbg_st[31:0]
 @28
 u_simple_uart.u_uart.tx_bfr_empt
-u_simple_uart.u_uart.car_tx
 @24
 u_simple_uart.u_uart.tx_dbg_st[31:0]
 @28
@@ -71,7 +81,7 @@ u_simple_uart.u_uart.tx_shr_full
 u_simple_uart.u_uart.txclk
 u_simple_uart.u_uart.txdat
 @22
-u_interrupt_counter.q[15:0]
+u_interrupt_counter.q[29:0]
 @200
 -  REMOTE (fake) UART
 @24
@@ -80,13 +90,5 @@ u_uart_remota.rx_dbg_st[31:0]
 u_uart_remota.recv[7:0]
 @23
 u_uart_remota.recv[7:0]
-@200
--    write-back
-@28
-u_core.wb_muxc[2:0]
-u_core.wb_wreg
-@22
-u_core.wb_a_c[4:0]
-u_core.wb_c[31:0]
 [pattern_trace] 1
 [pattern_trace] 0
diff --git a/cMIPS/vhdl/altera.vhd b/cMIPS/vhdl/altera.vhd
index 4a25e7b0e145b5a80252bc9091585f551391a34d..f6f43698d2956a7894549eec312296259b829735 100644
--- a/cMIPS/vhdl/altera.vhd
+++ b/cMIPS/vhdl/altera.vhd
@@ -54,6 +54,8 @@ end architecture functional;
 -- -----------------------------------------------------------------------
 
 
+-- -----------------------------------------------------------------------
+-- add/subtract SIGNED numbers
 -- -----------------------------------------------------------------------
 library IEEE;
 use IEEE.std_logic_1164.all;
@@ -89,6 +91,34 @@ end architecture functional;
 -- -----------------------------------------------------------------------
 
 
+-- -----------------------------------------------------------------------
+-- add/subtract UN-SIGNED numbers, does not signal overflow
+-- -----------------------------------------------------------------------
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+entity mf_alt_add_sub_u is
+  port(add_sub         : IN STD_LOGIC;  -- add=1, sub=0
+       dataa           : IN STD_LOGIC_VECTOR (31 DOWNTO 0);
+       datab           : IN STD_LOGIC_VECTOR (31 DOWNTO 0);
+       result          : OUT STD_LOGIC_VECTOR (31 DOWNTO 0));
+end mf_alt_add_sub_u;
+  
+architecture functional of mf_alt_add_sub_u is
+  signal add_C, sub_C : STD_LOGIC_VECTOR (31 DOWNTO 0);
+begin
+
+  add_C <= std_logic_vector(unsigned(dataa) + unsigned(datab));
+    
+  sub_C <= std_logic_vector(unsigned(dataa)+unsigned(unsigned(not datab)+1));
+
+  result <= add_C(31 downto 0) when add_sub='1' else
+            sub_C(31 downto 0);
+  
+end architecture functional;
+-- -----------------------------------------------------------------------
+
+
 -- -----------------------------------------------------------------------
 library IEEE;
 use IEEE.std_logic_1164.all;
diff --git a/cMIPS/vhdl/remota.vhd b/cMIPS/vhdl/remota.vhd
index 6f012cc504c7ecdf07755e5c95687f5a82da33d4..2f12dd5c34c69c64d2c279511123642e45bab0a2 100644
--- a/cMIPS/vhdl/remota.vhd
+++ b/cMIPS/vhdl/remota.vhd
@@ -294,14 +294,16 @@ begin
   -- baud rate generators ---------------------------------------------
 
   with bit_rt select
-    tx_baud_div <=     4/2 when b"000",
-                     434/2  when b"001",
-                     868/2 when b"010",
-                    1302/2 when b"011",
-                    1736/2 when b"100",
-                    2604/2 when b"101",
-                    3472/2 when b"110",
-                    5208/2 when others;
+    tx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
 
   U_bit_rt_tx: process(clk, rst)
     variable baud_cnt : integer;
@@ -322,14 +324,16 @@ begin
 
   -- RX clock daud rate
   with bit_rt select
-    rx_baud_div <=     4/2 when b"000",
-                     434/2 when b"001",
-                     868/2 when b"010",
-                    1302/2 when b"011",
-                    1736/2 when b"100",
-                    2604/2 when b"101",
-                    3472/2 when b"110",
-                    5208/2 when others;
+    rx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
 
   U_bit_rt_rx: process(clk, rst, reset_rxck, rx_run)
     variable baud_cnt : integer;
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index fdddb7304e79dc894879d13e9aa0f77bea47ebb3..66e0ab3ea4ca90bbb96a9633e51f130cd5bbf44c 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -600,6 +600,9 @@ begin  -- TB
     port map (rst,clk, io_uart_sel, io_uart_wait, wr, d_addr(2),
               cpu_data, uart_d_out,
               uart_txd, uart_rxd, uart_rts, uart_cts, uart_irq, bit_rt);
+              -- uncoment next line for loop back, comment out previous line
+              -- uart_txd, uart_txd, uart_rts, uart_cts, uart_irq, bit_rt);
+
   uart_cts <= '1';
   
   start_remota <= '0', '1' after 200*CLOCK_PER;
diff --git a/cMIPS/vhdl/uart.vhd b/cMIPS/vhdl/uart.vhd
index d08c8758f0f7eb612799614f0f794f4d2c35d6dd..7b6ce76f7bc4b4094e511834868b76d28b2e9efe 100644
--- a/cMIPS/vhdl/uart.vhd
+++ b/cMIPS/vhdl/uart.vhd
@@ -22,14 +22,14 @@
 
 -- control register, least significant byte only
 -- b0..b2: transmit/receive clock speed
---         000: 1/4 CPU clock rate -- for VHDL/C debugging only
---         001: 115.200 baud
---         010:  57.600 baud
---         011:  38.400 baud
---         100:  28.800 baud
---         101:  19.200 baud
---         110:  14.400 baud
---         111:   9.600 baud
+--         000: 1/4  CPU clock rate -- for VHDL/C debugging only
+--         001: 1/8  CPU clock rate -- for VHDL/C debugging only
+--         010: 1/16 CPU clock rate -- for VHDL/C debugging only
+--         011: 115.200 baud
+--         100:  57.600 baud
+--         101:  38.400 baud
+--         110:  28.800 baud
+--         111:  19.200 baud
 -- b3=1:   signal interrupt on RX buffer full, when a new octet is available
 -- b4=1:   signal interrupt on TX buffer empty, when TX space is available
 -- b5,b6:  ignored, not used
@@ -66,6 +66,8 @@ end uart_int;
 
 architecture estrutural of uart_int is
 
+  constant CLOCK_DIVIDER : integer := 50;
+  
   component register8 is
     port(rel, rst, ld: in  std_logic;
          D:            in  std_logic_vector;
@@ -126,7 +128,7 @@ architecture estrutural of uart_int is
   signal d_int_tx_empty, d_rx_int_set, d_err_framing, d_err_overrun : std_logic;
 
   signal tx_baud_div, rx_baud_div : integer := 0;
-  
+
 begin
 
   d_out <= x"000000" & received when s_rx = '1'   else
@@ -212,7 +214,7 @@ begin
     end case;
   end process U_TXCPU_outputs;   -------------------------------------------
 
-  
+
 
   -- state machine controls data transmission circuit ----------------------
   U_TX_st_reg: process(rst,clk)
@@ -238,8 +240,8 @@ begin
       when st_check =>
         tx_next_st <= st_start;
       when st_start =>
-        if txclk_rise = '1' then      -- synchronize CPUclock with TXclock
-          tx_next_st <= st_b0;
+        if txclk_rise = '1' then
+          tx_next_st <= st_b0;      -- synchronize CPUclock with TXclock
         else
           tx_next_st <= st_start;
         end if;
@@ -345,7 +347,7 @@ begin
   
   -- framing error: 10th bit not a STOP=1 or 1st bit not a START=0
   a_framing <= '1' when ( (rx_ld = '1') and
-                          (sta_recv_sto(9)/='1' or sta_recv_sto(0)/='0') )
+                          (sta_recv_sto(9) /= '1' or sta_recv_sto(0)/='0') )
                else '0';
 
   d_err_framing <= (a_framing or err_framing) and not(sel_delayed);
@@ -434,7 +436,7 @@ begin
 
   rx_dbg_st <= integer(rx_state'pos(rx_current_st));  -- debugging only
   
-  U_RX_st_transitions: process(rx_current_st, rxclk_fall, rxdat_1to0)
+  U_RX_st_transitions: process(rx_current_st, rxclk_fall, rxdat_1to0, rxdat)
   begin
     case rx_current_st is
       when st_idle =>
@@ -444,7 +446,11 @@ begin
           rx_next_st <= st_idle;
         end if;
       when st_check =>
-        rx_next_st <= st_start;
+        if rxdat = '0' then
+          rx_next_st <= st_start;
+        else
+          rx_next_st <= st_idle;
+        end if;
       when st_start =>
         if rxclk_fall = '1' then
           rx_next_st <= st_b0;
@@ -547,15 +553,28 @@ begin
 
   -- U_bit_rt_tx: counter8 port map (clk,rst,tx_ld,en_tx_clk,x"00",tx_bit_rt);
   with ctrl(2 downto 0) select
-    tx_baud_div <=      4/2 when b"000",
-                      434/2 when b"001",
-                      868/2 when b"010",
-                     1302/2 when b"011",
-                     1736/2 when b"100",
-                     2604/2 when b"101",
-                     3472/2 when b"110",
-                     5208/2 when others;
+    tx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
+
+
+--         000: 1/4  CPU clock rate -- for VHDL/C debugging only
+--         001: 1/8  CPU clock rate -- for VHDL/C debugging only
+--         010: 1/16 CPU clock rate -- for VHDL/C debugging only
+--         011: 115.200 baud
+--         100:  57.600 baud
+--         101:  38.400 baud
+--         110:  28.800 baud
+--         111:  19.200 baud
 
+  
   U_bit_rt_tx: process(clk, rst, tx_ld, en_tx_clk)
     variable baud_cnt : integer range 0 to 50000000;
   begin
@@ -586,34 +605,34 @@ begin
 
   -- U_bit_rt_rx:counter8 port map(clk,rst,reset_rxck,en_rx_clk,00,rx_bit_rt);
   with ctrl(2 downto 0) select
-    rx_baud_div <=     4/2 when b"000",
-                     434/2 when b"001",
-                     868/2 when b"010",
-                    1302/2 when b"011",
-                    1736/2 when b"100",
-                    2604/2 when b"101",
-                    3472/2 when b"110",
-                    5208/2 when others;
-
+    rx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
 
   U_bit_rt_rx: process(clk, rst, reset_rxck, en_rx_clk)
     variable baud_cnt : integer range 0 to 50000000;
   begin
-     if rst = '0' then
+    if rst = '0' then
       baud_cnt  := 0;
       rxclk <= '0';
       rxclk_fall <= '0';
       rxclk_rise <= '0';
     elsif reset_rxck = '1' and rising_edge(clk) then
-      baud_cnt  := 1;
-      rxclk <= '1';
+      baud_cnt  := (rx_baud_div / 2);
+      rxclk <= '0';
       rxclk_fall <= '0';
-      rxclk_rise <= '0';      
+      rxclk_rise <= '0';
     elsif en_rx_clk = '1' and rising_edge(clk) then
       if baud_cnt = rx_baud_div then
         if rxclk = '1' then
           rxclk_fall <= '1';
-          -- assert false report "rxclk falling_edge" severity note;
         else
           rxclk_fall <= '0';
           rxclk_rise <= '1';
@@ -628,14 +647,11 @@ begin
     end if;
   end process U_bit_rt_rx;
 
-  
 end estrutural;
 -- -------------------------------------------------------------------
 
 
 
-
-
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 -- 8 bit register, reset=0 asynchronous, load=1 synchronous
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
@@ -733,3 +749,359 @@ begin
 end functional;
 -- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
+
+
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- functional model for the "remote computer" -- for testing only
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE; use IEEE.std_logic_1164.all; use IEEE.numeric_std.all;
+use std.textio.all;
+use work.p_WIRES.all;
+
+        
+entity remota is
+  generic(OUTPUT_FILE_NAME : string := "serial.out";
+          INPUT_FILE_NAME  : string := "serial.inp");
+  port(rst, clk  : in  std_logic;
+       start     : in  std_logic;    -- start operation =1
+       inpDat    : in  std_logic;    -- serial input
+       outDat    : out std_logic;    -- serial output
+       bit_rt    : in  reg3);        -- selects bit rate
+end remota;
+
+architecture behavior of remota is
+
+  component counter8 is
+    port(rel, rst, ld, en: in  std_logic;
+         D:            in  std_logic_vector;
+         Q:            out std_logic_vector);
+  end component counter8;
+
+
+  -- transmission signals & states -----------------------------------------
+  type tx_state is (st_init, st_idle, st_start,
+                    st_b0, st_b1, st_b2, st_b3, st_b4, st_b5, st_b6, st_b7,
+                    st_stop, st_wait, st_done);
+  signal tx_current_st, tx_next_st : tx_state;
+  signal tx_dbg_st : integer;  -- for debugging only
+  
+  signal tx_bit_rt : reg8;
+  signal tx_clk, tx_run : std_logic;
+
+  file input_stream : text open read_mode is INPUT_FILE_NAME;
+  -- file input_stream : text open read_mode is "STD_INPUT";
+  -- -----------------------------------------------------------------------
+  
+  -- reception signals & states --------------------------------------------
+  type rx_state is (st_idle, st_check, st_start,
+                    st_b0, st_b1, st_b2, st_b3, st_b4, st_b5, st_b6, st_b7,
+                    st_stop, st_done);
+  signal rx_current_st, rx_next_st : rx_state;
+  signal rx_dbg_st : integer;  -- for debugging only
+  
+  signal recv, rx_bit_rt : reg8;
+  signal rx_clk, rx_run, reset_rxck : std_logic;
+
+  signal tx_baud_div, rx_baud_div : integer := 0;
+  
+  -- file output_stream : text open write_mode is OUTPUT_FILE_NAME;
+  file output_stream : text open write_mode is "STD_OUTPUT";
+  -- -----------------------------------------------------------------------
+
+  
+begin
+
+  -- transmission control SM ----------------------------------------------
+  U_TX_st_reg: process(rst,tx_clk)
+  begin
+    if rst = '0' then
+      tx_current_st <= st_wait;
+    elsif rising_edge(tx_clk) then
+      tx_current_st <= tx_next_st;
+    end if;
+  end process U_TX_st_reg;
+
+  tx_dbg_st <= integer(tx_state'pos(tx_current_st));  -- debugging only
+
+  U_tx: process (tx_current_st, start)
+    variable sentence : line;
+    variable char : character;
+    variable good, send_null : boolean;
+    variable bfr  : reg8;
+    variable j : integer;
+  begin
+
+    case tx_current_st is
+      when st_wait =>                   -- 12 wait for starting signal
+        outDat <= '1';
+        tx_run <= '0';                  -- hold TX clock
+        send_null := FALSE;
+        if start = '0'  then
+          tx_next_st <= st_wait;
+        else
+          if not endfile(input_stream) then
+            readline( input_stream, sentence );  -- read first line of text
+            -- assert false report "fst line: "&integer'image(sentence'length);
+            j := 1;
+            tx_next_st <= st_init;
+          else
+            tx_next_st <= st_done;      -- no input, done!
+          end if;
+        end if;
+      when st_init =>                   -- 0
+        outDat <= '1';
+        tx_run <= '1';              -- start TX clock
+        tx_next_st <= st_idle;
+      when st_idle =>                   -- 1
+        if not endfile(input_stream) then
+          if j > sentence'right then    -- read new line of input
+            readline( input_stream, sentence );
+            -- assert false report "new line: "&integer'image(sentence'length);
+            bfr := x"0a";               -- new line
+            j := 0;
+          elsif sentence'length = 0 then
+            bfr := x"0a";             -- send new line for empty line
+            -- assert false report "empty line: " & integer'image(j)&" " & LF;
+          else
+            read (sentence, char, good);
+            -- assert false report "read: " & integer'image(j) & " " &char;
+            bfr := std_logic_vector(to_signed( character'pos(char), 8));
+          end if;
+          tx_next_st <= st_start;
+        else
+          tx_next_st <= st_done;        -- no more input, done!
+        end if;
+      when st_start =>                  -- 2
+        outDat <= '0';
+        tx_next_st <= st_b0;
+      when st_b0 =>                     -- 3
+        outDat <= bfr(0);
+        tx_next_st <= st_b1;
+      when st_b1 =>                     -- 4
+        outDat <= bfr(1);
+        tx_next_st <= st_b2;
+      when st_b2 =>                     -- 5
+        outDat <= bfr(2);
+        tx_next_st <= st_b3;
+      when st_b3 =>                     -- 6
+        outDat <= bfr(3);
+        tx_next_st <= st_b4;
+      when st_b4 =>                     -- 7
+        outDat <= bfr(4);
+        tx_next_st <= st_b5;
+      when st_b5 =>                     -- 8
+        outDat <= bfr(5);
+        tx_next_st <= st_b6;
+      when st_b6 =>                     -- 9
+        outDat <= bfr(6);
+        tx_next_st <= st_b7;
+      when st_b7 =>                     -- 10
+        outDat <= bfr(7);
+        tx_next_st <= st_stop;
+      when st_stop =>                   -- 11
+        j := j + 1;
+        outDat <= '1';
+        tx_next_st <= st_idle;
+      when st_done =>                   -- 13 wait forever
+        if send_null = FALSE then
+          bfr := x"00";               -- send out a NULL character
+          send_null := TRUE;
+          tx_next_st <= st_start;
+        else
+          tx_next_st <= st_done;        -- no more input, done!
+          outDat <= '1';
+        end if;
+        tx_run <= '0';                  -- stop clock
+      when others =>
+        assert false report "REMOTE TX stateMachine broken"
+          & integer'image(tx_state'pos(tx_current_st)) severity failure;
+      end case;
+
+  end process U_tx;
+  -- ======================================================================
+
+
+  
+  -- reception ============================================================
+
+  -- reception control SM -------------------------------------------------
+  U_RX_st_reg: process(rst,clk)
+  begin
+    if rst = '0' then
+      rx_current_st <= st_idle;
+    elsif rising_edge(clk) then
+      rx_current_st <= rx_next_st;
+    end if;
+  end process U_RX_st_reg;
+
+  rx_dbg_st <= integer(rx_state'pos(rx_current_st));  -- debugging only
+
+  U_rx: process(rx_current_st, rx_clk, inpDat)
+    variable msg : line;
+  begin
+    case rx_current_st is
+      when st_idle =>
+        reset_rxck <= '0';
+        rx_run     <= '0';
+        recv       <= (others => 'U');
+        if falling_edge(inpDat) then    -- start bit
+          rx_next_st <= st_check;
+        else
+          rx_next_st <= st_idle;
+        end if;
+      when st_check =>
+        reset_rxck <= '1';
+        rx_run     <= '1';
+        rx_next_st <= st_start;
+      when st_start =>
+        reset_rxck <= '0';
+        -- if rising_edge(rx_clk) then
+          rx_next_st <= st_b0;
+        -- else
+        --   rx_next_st <= st_start;
+        -- end if;
+      when st_b0 =>
+        if falling_edge(rx_clk) then
+          recv(0) <= inpDat;
+          rx_next_st <= st_b1;
+        else
+          rx_next_st <= st_b0;
+        end if;
+      when st_b1 =>
+        if falling_edge(rx_clk) then
+          recv(1) <= inpDat;
+          rx_next_st <= st_b2;
+        else
+          rx_next_st <= st_b1;
+        end if;
+      when st_b2 =>
+        if falling_edge(rx_clk) then
+          recv(2) <= inpDat;
+          rx_next_st <= st_b3;
+        else
+          rx_next_st <= st_b2;
+        end if;
+      when st_b3 =>
+        if falling_edge(rx_clk) then
+          recv(3) <= inpDat;
+          rx_next_st <= st_b4;
+        else
+          rx_next_st <= st_b3;
+        end if;
+      when st_b4 =>
+        if falling_edge(rx_clk) then
+          recv(4) <= inpDat;
+          rx_next_st <= st_b5;
+        else
+          rx_next_st <= st_b4;
+        end if;
+      when st_b5 =>
+        if falling_edge(rx_clk) then
+          recv(5) <= inpDat;
+          rx_next_st <= st_b6;
+        else
+          rx_next_st <= st_b5;
+        end if;
+      when st_b6 =>
+        if falling_edge(rx_clk) then
+          recv(6) <= inpDat;
+          rx_next_st <= st_b7;
+        else
+          rx_next_st <= st_b6;
+        end if;
+      when st_b7 =>
+        if falling_edge(rx_clk) then
+          recv(7) <= inpDat;
+          rx_next_st <= st_stop;
+        else
+          rx_next_st <= st_b7;
+        end if;
+      when st_stop =>
+        if falling_edge(rx_clk) then
+          rx_next_st <= st_done;
+        else
+          rx_next_st <= st_stop;
+        end if;
+      when st_done =>
+        rx_run     <= '0';
+        rx_next_st <= st_idle;
+
+        write ( msg, character'val(to_integer( unsigned(recv))) );
+        if recv = x"00" or recv = x"0a"  then
+          writeline( output_stream, msg );      
+        end if;
+
+      when others =>
+        assert false report "REMOTE RX stateMachine broken"
+          & integer'image(rx_state'pos(rx_current_st)) severity failure;
+    end case;
+  end process U_rx;
+
+
+  -- baud rate generators ---------------------------------------------
+
+  with bit_rt select
+    tx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
+
+  U_bit_rt_tx: process(clk, rst)
+    variable baud_cnt : integer;
+  begin
+     if rst = '0' then
+      baud_cnt  := 0;
+      tx_clk <= '0';
+    elsif rising_edge(clk) then
+      if baud_cnt = tx_baud_div then
+        tx_clk <= not(tx_clk);
+        baud_cnt := 1;
+      else
+        baud_cnt := baud_cnt + 1;
+      end if;
+    end if;
+  end process U_bit_rt_tx;
+
+
+  -- RX clock daud rate
+  with bit_rt select
+    rx_baud_div <=      8/2 when b"000",
+                       16/2 when b"001",
+                       32/2 when b"010",
+                      434/2 when b"011",
+                      868/2 when b"100",
+                     1302/2 when b"101",
+                     1736/2 when b"110",
+                     2604/2 when others;
+                     -- 3472/2 when b"110",
+                     -- 5208/2 when others;
+
+  U_bit_rt_rx: process(clk, rst, reset_rxck, rx_run)
+    variable baud_cnt : integer;
+  begin
+     if rst = '0' then
+      baud_cnt  := 0;
+      rx_clk <= '0';
+    elsif reset_rxck = '1' and rising_edge(clk) then
+      baud_cnt  := 1;
+      rx_clk <= '0';
+    elsif rx_run = '1' and rising_edge(clk) then
+      if baud_cnt = rx_baud_div then
+        rx_clk <= not(rx_clk);
+        baud_cnt := 1;
+      else
+        baud_cnt := baud_cnt + 1;
+      end if;
+    end if;
+  end process U_bit_rt_rx;
+
+  
+end behavior;
+-- +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
diff --git a/cMIPS/vhdl/units.vhd b/cMIPS/vhdl/units.vhd
index 02f27c74bf6c18faff74a561f9b2b0ef6a8349bb..1da323d208a4ea65abdd80abb847183715f08040 100644
--- a/cMIPS/vhdl/units.vhd
+++ b/cMIPS/vhdl/units.vhd
@@ -212,6 +212,13 @@ architecture functional of alu is
          result          : OUT STD_LOGIC_VECTOR (31 DOWNTO 0));
   end component mf_alt_add_sub;
   
+  component mf_alt_add_sub_u is
+    port(add_sub         : IN STD_LOGIC;  -- add=1, sub=0
+         dataa           : IN STD_LOGIC_VECTOR (31 DOWNTO 0);
+         datab           : IN STD_LOGIC_VECTOR (31 DOWNTO 0);
+         result          : OUT STD_LOGIC_VECTOR (31 DOWNTO 0));
+  end component mf_alt_add_sub_u;
+  
   component mask_off_bits is
     port(B : in  std_logic_vector;
          X : out std_logic_vector);
@@ -232,8 +239,8 @@ architecture functional of alu is
 
   signal operation : integer;
   signal s_HI,s_LO, loc_HI,loc_LO, inp_HI,inp_LO, mask,mask_and : reg32;
-  signal sh_left, sh_right, sh_inp, sh_lft_ins, summ_diff : reg32;
-  signal addition, overflow, shift_arith,  wr_hi,wr_lo : std_logic;
+  signal sh_left, sh_right, sh_inp, sh_lft_ins, summ_diff, summ_diff_u : reg32;
+  signal addition, overflow, overflow_u, shift_arith,  wr_hi,wr_lo : std_logic;
   signal size,index, shift_amnt : reg5;
   
 begin
@@ -277,13 +284,13 @@ begin
                       i_C  := summ_diff;
                       ovfl <= overflow;
       when opADDU  => addition <= '1';
-                      i_C  := summ_diff;
+                      i_C  := summ_diff_u;
                       ovfl <= '0';
       when opSUB   => addition <= '0';
                       i_C  := summ_diff;
                       ovfl <= overflow;
       when opSUBU  => addition <= '0';
-                      i_C  := summ_diff;
+                      i_C  := summ_diff_u;
                       ovfl <= '0';
       when opAND   => i_C := A and B;
       when opOR    => i_C := A or  B;
@@ -297,7 +304,8 @@ begin
         end if;
         -- this instr cannot cause an exception
       when opSLTU  => addition <= '0';  -- ignore overflow/signal
-        i_C := x"0000000" & b"000" & summ_diff(31);
+                      i_C := x"0000000" & b"000" & summ_diff_u(31);
+                      ovfl <= '0';
       when opLUI   => i_C := B(15 downto 0) & x"0000";
       when opSWAP  =>                   -- word swap bytes within halfwords
         i_C := B(23 downto 16)&B(31 downto 24)&B(7 downto 0) &B(15 downto 8);
@@ -332,8 +340,13 @@ begin
     
   end process U_alu; -- -------------------------------------------
 
-  U_ADD_SUB: mf_alt_add_sub port map (add_sub => addition, overflow => overflow,
-                dataa  => A, datab => B, result => summ_diff);
+  U_ADD_SUB: mf_alt_add_sub             -- signed add/subtract
+    port map (add_sub => addition, overflow => overflow,
+              dataa  => A, datab => B, result => summ_diff);
+
+  U_ADD_SUB_U: mf_alt_add_sub_u         -- UNsigned add/subtract, no overflow
+    port map (add_sub => addition,
+              dataa  => A, datab => B, result => summ_diff_u);
 
   U_HILO: process (A,B, fun, loc_HI,loc_LO)
     variable i_hi,i_lo, i_quoc,i_rem: reg32;