diff --git a/cMIPS/bin/compile.sh b/cMIPS/bin/compile.sh
index 2a9e4595a8f6a9ea325129398751be0240dc6f41..9032d6900ef6eb1ae0f7cf3c20ab4ed0e979a36c 100755
--- a/cMIPS/bin/compile.sh
+++ b/cMIPS/bin/compile.sh
@@ -106,7 +106,7 @@ c_ld="${include}"/cMIPS.ld
 c_h="${include}"/cMIPS.h
 c_s="${include}"/cMIPS.s
 c_io="${include}"/cMIPSio
-c_start="${include}"/start
+# c_start="${include}"/start ## see below for synthesis version
 c_hndlrs="${include}"/handlers
 c_stop="${include}"/stop
 
@@ -128,9 +128,11 @@ dat=data.bin
 if [ $verbose = true ]; then  memory_map="-Map ${inp}.map" ; fi
 
 if [ $synth = true ]; then
-   S="-D FOR_SYNTHESIS" ; 
+   S="-D FOR_SYNTHESIS" ;
+   c_start="${include}"/syn_start
 else 
    S="-U FOR_SYNTHESIS" ;
+   c_start="${include}"/start
 fi
 
 (mips-gcc -O${level} $warn -DcMIPS -mno-gpopt -I"${include}" \
diff --git a/cMIPS/include/cMIPSio.c b/cMIPS/include/cMIPSio.c
index 38c3e8b3295b5eeea41f90f074b1fc21025b1512..5554749aa652ebc53cb54ddbb2752a9d8e9c3ced 100644
--- a/cMIPS/include/cMIPSio.c
+++ b/cMIPS/include/cMIPSio.c
@@ -3,7 +3,7 @@
 // -- cMIPS I/O functions -----------------------------------------------
 
 
-// do not generate extra code when programming the FPGA
+// do not generate extra (simulation only) code when programming the FPGA
 
 #ifdef FOR_SYNTHESIS
   #define FOR_SIMULATION 0
@@ -357,7 +357,7 @@ void DSP7SEGput(int MSD, int MSdot, int lsd, int lsdot) {
 // external counter -- counts down to zero and stops or interrupts
 //=======================================================================
 // write an integer with number of pulses to count and start counter
-//  if interr not 0, then will interrupt when count reaches zero
+//  if interr is not 0, then will interrupt when count reaches zero
 void startCounter(int n, int interr) {
   int *IO = (int *)IO_COUNT_ADDR;
   int interrupt;
diff --git a/cMIPS/include/start.s b/cMIPS/include/start.s
index 0c7931597b921de227f213c2ddee77629319b398..b69f4a2d3435f814b7b9a901f77ad6362968bd2f 100644
--- a/cMIPS/include/start.s
+++ b/cMIPS/include/start.s
@@ -1,19 +1,21 @@
-	# mips-as -O0 -EL -mips32 -o start.o start.s
+	##
+	##== simulation version of startup code ==========================
+	##
+
 	.include "cMIPS.s"
 	.text
 	.set noreorder
 	.align 2
 	.extern main
-	.global _start
-	.global _exit
-	.global exit
+	.global _start,_exit,exit
+	.global _excp_0000, _excp_0100, _excp_0180, _excp_0200, _excp_BFC0
+	
 	.org x_INST_BASE_ADDR,0
 	.ent _start
 
         ##
         ## reset leaves processor in kernel mode, all else disabled
         ##
-
 _start:
 	# get physical page number for 2 pages at the bottom of RAM, for .data
 	#  needed so simulations without a page table will not break
@@ -92,18 +94,9 @@ _exit:	nop	  # flush pipeline
 	nop
 	nop
 	.end _start
+	##----------------------------------------------------------------
 
 
-	##----------------------------------------------------------------
-	.global _excp_0000
-	.global _excp_0100
-	.global _excp_0180
-	.global _excp_0200
-	.global _excp_0200
-	.global _excp_BFC0
-	##
-	##================================================================
-	##
 
 	##
 	##================================================================
@@ -342,6 +335,7 @@ _excp_BFC0:
 	wait 0x38
 	nop
 	.end _excp_BFC0
+	##---------------------------------------------------------------
 
 	
 	##
diff --git a/cMIPS/include/syn_start.s b/cMIPS/include/syn_start.s
new file mode 100644
index 0000000000000000000000000000000000000000..34e35cdefaf4b32f30897918323c99adb2553a3a
--- /dev/null
+++ b/cMIPS/include/syn_start.s
@@ -0,0 +1,84 @@
+	##
+	##== synthesis version of startup code ===========================
+	##
+	##   simple startup code for synthesis
+
+	.include "cMIPS.s"
+	.text
+	.set noreorder
+	.align 2
+	.extern main
+	.global _start,_exit,exit
+	.global _excp_0000, _excp_0100, _excp_0180, _excp_0200, _excp_BFC0
+
+        .set MMU_WIRED,    2  ### do not change mapping for base of ROM, I/O
+	
+	.org x_INST_BASE_ADDR,0
+	.ent _start
+
+        ##
+        ## reset leaves processor in kernel mode, all else disabled
+        ##
+_start:	nop
+	li   $k0, 0x10000000
+        mtc0 $k0, cop0_STATUS
+
+        li   $k0, MMU_WIRED
+        mtc0 $k0, cop0_Wired
+
+        j main
+        nop
+
+exit:	
+_exit:	j exit	  # wait forever
+	nop
+	.end _start
+	
+
+        .org x_EXCEPTION_0000,0
+_excp_0000:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0399		# display .9.9
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0000:	j    h0000			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0100,0
+_excp_0100:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0388		# display .8.8
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0100:	j    h0100			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0180,0
+_excp_0180:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0377		# display .7.7
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0180:	j    h0180			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0200,0
+_excp_0200:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0366		# display .6.6
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0200:	j    h0200			# wait forever
+	nop
+	
+        .org x_EXCEPTION_BFC0,0
+_excp_BFC0:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0355		# display .5.5
+	sw   $k1, 0($k0)		# write to 7 segment display
+hBFC0:	j    hBFC0			# wait forever
+	nop
+
+	##================================================================
+
+	##
+	##===============================================================
+	## main(), normal code starts below -- do not edit next line
+	.org x_ENTRY_POINT,0
+
diff --git a/cMIPS/tests/mac_7seg.s b/cMIPS/tests/mac_7seg.s
index 7cdcc34802d10819f2f0c5b036e0d1e1ebae1318..3ecee8f5334ea011cbed094d391ab25e660951a9 100644
--- a/cMIPS/tests/mac_7seg.s
+++ b/cMIPS/tests/mac_7seg.s
@@ -6,11 +6,68 @@
         .globl _start
         .ent _start
 
+        .set MMU_WIRED,  2  ### do not change mapping for ROM-0, I/O
+	
 	.set waitFor, 50000000/4     # wait for 1 second @ 50 MHz
 	# .set waitFor, 5            # this is for simulation only
+
+        .org x_INST_BASE_ADDR,0
 	
 _start: nop
-	la   $25, HW_dsp7seg_addr  # 7 segment display
+	li   $k0, 0x10000000
+        mtc0 $k0, cop0_STATUS
+
+        li   $k0, MMU_WIRED
+        mtc0 $k0, cop0_Wired
+
+	j main
+	nop
+
+        .org x_EXCEPTION_0000,0
+_excp_0000:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0399		# display .9.9
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0000:	j    h0000			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0100,0
+_excp_0100:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0388		# display .8.8
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0100:	j    h0100			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0180,0
+_excp_0180:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0377		# display .7.7
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0180:	j    h0180			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0200,0
+_excp_0200:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0366		# display .6.6
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0200:	j    h0200			# wait forever
+	nop
+	
+        .org x_EXCEPTION_BFC0,0
+_excp_BFC0:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0355		# display .5.5
+	sw   $k1, 0($k0)		# write to 7 segment display
+hBFC0:	j    hBFC0			# wait forever
+	nop
+
+
+	#
+	# main -----------------------------------------------------
+	#
+main:	la   $25, HW_dsp7seg_addr  # 7 segment display
 	la   $5,  waitFor
 	li   $3,  0
 	
diff --git a/cMIPS/tests/mac_intRam.s b/cMIPS/tests/mac_intRam.s
index 0f6e196f56e4c52dcb1b6fc1fbed2db3b314769d..f63197be9fd044fa6f21eec065ce1cac7c91ba0e 100644
--- a/cMIPS/tests/mac_intRam.s
+++ b/cMIPS/tests/mac_intRam.s
@@ -23,7 +23,65 @@
 	# .set LCD_oper_delay,   4   # 15us / 20ns
 	# .set LCD_write_delay,  2   # 15us / 20ns
 
+	.set MMU_WIRED,  2  ### do not change mapping for ROM-0, I/O
+
+        .org x_INST_BASE_ADDR,0
+
 _start: nop
+        li   $k0, 0x10000000
+        mtc0 $k0, cop0_STATUS
+
+        li   $k0, MMU_WIRED
+        mtc0 $k0, cop0_Wired
+
+        j main
+        nop
+
+
+        .org x_EXCEPTION_0000,0
+_excp_0000:
+        la   $k0, HW_dsp7seg_addr       # 7 segment display
+        li   $k1, 0x0399                # display .9.9
+        sw   $k1, 0($k0)                # write to 7 segment display
+h0000:  j    h0000                      # wait forever
+        nop
+
+        .org x_EXCEPTION_0100,0
+_excp_0100:
+        la   $k0, HW_dsp7seg_addr       # 7 segment display
+        li   $k1, 0x0388                # display .8.8
+        sw   $k1, 0($k0)                # write to 7 segment display
+h0100:  j    h0100                      # wait forever
+        nop
+
+        .org x_EXCEPTION_0180,0
+_excp_0180:
+        la   $k0, HW_dsp7seg_addr       # 7 segment display
+        li   $k1, 0x0377                # display .7.7
+        sw   $k1, 0($k0)                # write to 7 segment display
+h0180:  j    h0180                      # wait forever
+        nop
+
+        .org x_EXCEPTION_0200,0
+_excp_0200:
+        la   $k0, HW_dsp7seg_addr       # 7 segment display
+        li   $k1, 0x0366                # display .6.6
+        sw   $k1, 0($k0)                # write to 7 segment display
+h0200:  j    h0200                      # wait forever
+        nop
+
+        .org x_EXCEPTION_BFC0,0
+_excp_BFC0:
+        la   $k0, HW_dsp7seg_addr       # 7 segment display
+        li   $k1, 0x0355                # display .5.5
+        sw   $k1, 0($k0)                # write to 7 segment display
+hBFC0:  j    hBFC0                      # wait forever
+        nop
+
+
+        .org x_ENTRY_POINT,0
+
+main:	nop
 
 	### tell the world we are alive
 	la  $15, HW_dsp7seg_addr   # 7 segment display
@@ -83,11 +141,8 @@ w_ntry:	lw   $4, 0($26)
 	jal LCDclr
 	nop
 
-	# jal LCDhome1               # cursor at home, clear screen
-	# nop
-
 	la  $15, HW_dsp7seg_addr # 7 segment display
-	li  $16, 0x05
+	li  $16, 0x01
 	sw  $16, 0($15)            # write to 7 segment display
 	la $4, wait_1_sec          # wait ONE second
 	jal delay
@@ -137,24 +192,19 @@ w_ntry:	lw   $4, 0($26)
 	
 	### tell where we are
 	la  $15, HW_dsp7seg_addr # 7 segment display
-	li  $16, 0x06
+	li  $16, 0x02
 	sw  $16, 0($15)            # write to 7 segment display
-	la $4, wait_1_sec          # wait ONE second
-	jal delay
-	nop
-	la $4, wait_1_sec          # wait ONE second
-	jal delay
-	nop
-	la $4, wait_1_sec          # wait ONE second
-	jal delay
-	nop
+
 	la $4, wait_1_sec          # wait ONE second
 	jal delay
 	nop
 	
 
-	### test internal FPGA RAM ------------------------------------
-
+	##
+	## test internal FPGA RAM ------------------------------------
+	##
+	## write chars '0' to '9' to RAM, read them back then print
+	##
 	jal LCDclr
 	nop
 
@@ -162,15 +212,15 @@ w_ntry:	lw   $4, 0($26)
 	nop
 	
 	la $8,  x_DATA_BASE_ADDR
-	la $10, 0x30313233       # 
+	la $10, 0x33323130       # 
 	sw $10, 0($8)
-	la $11, 0x34353637       # 
+	la $11, 0x37363534       # 
 	sw $11, 4($8)
-	la $12, 0x003a3938       #
+	la $12, 0x00003938       #
 	sw $12, 8($8)
 
-
-loop:	lbu   $13, 0($8)
+loop1:	lbu   $13, 0($8)
+	nop
 	addiu $8, $8, 1
 	beq   $13, $zero, endT1
 	nop
@@ -178,25 +228,53 @@ loop:	lbu   $13, 0($8)
 	jal  LCDput
 	move  $4, $13            # print number
 
-	#jal  LCDput
-	#li   $4, 0x20            #   and a SPACE
-	
-	j    loop
+	j    loop1
 	nop
 
-	
-	la  $15, HW_dsp7seg_addr # 7 segment display
-	li  $16, 7
+endT1:	la  $15, HW_dsp7seg_addr # 7 segment display
+	li  $16, 0x03
 	sw  $16, 0($15)          # write to 7 segment display
 	nop
 
+	jal delay
 	la $4, wait_1_sec          # wait ONE second
 	jal delay
+	la $4, wait_1_sec          # wait ONE second
+	
+	##
+	## 2nd test of internal FPGA RAM -----------------------------
+	##
+	## write chars 'a' to 'z' to RAM, read them back then print
+	##
+	jal LCDhome2
 	nop
 	
-endT1:	j   endT1                # wait forever 
+	la $8,  x_DATA_BASE_ADDR
+	li $9,  'a'
+	li $10, 'p'	# 'a'..'p' = one full display line
+
+loop2:	sb    $9, 0($8)		# store char
+	addiu $9, $9, 1	
+	lbu    $13, 0($8)	# read it back
+	addiu $8, $8, 1
+
+	jal   LCDput		# then print it
+	move  $4, $13
+	
+	beq   $13, $10, endT2
+	nop
+
+	j    loop2
+	nop
+
+endT2:	la  $15, HW_dsp7seg_addr # 7 segment display
+	li  $16, 0x04
+	sw  $16, 0($15)          # write to 7 segment display
 	nop
 	
+endAll:	j   endAll               # wait forever 
+	nop
+
 #----------------------------------------------------------------------
 	
 	
@@ -217,24 +295,19 @@ dlyput:	lw   $4, 0($6)
 
 
 ### put cursor at home, write do 1st position of 1st line -------------
-LCDhome1: la  $6, HW_lcd_addr   # LCD display
+LCDhome1:
+	la  $6, HW_lcd_addr   # LCD display
 	li  $4, 0b10000000      # x80 RAMaddrs=00, cursor at home
 	sw  $4, 0($6)
 
-	la  $4, LCD_clear_delay    # wait for CLEAR
-dlyhm1:	addiu $4, $4, -1
 	nop
-	bne $4, $zero, dlyhm1
+	nop			# give the controller time
+	nop	
+dlyhm1:	lw   $4, 0($6)
+	nop
+	andi $4, $4, 0x80
+	bne  $4, $zero, dlyhm1
 	nop
-
-#       nop
-#	nop			# give the controller time
-#	nop	
-#dlyhm1:	lw   $4, 0($6)
-#	nop
-#	andi $4, $4, 0x80
-#	bne  $4, $zero, dlyhm1
-#	nop
 
 	jr $ra
 	nop
@@ -242,23 +315,23 @@ dlyhm1:	addiu $4, $4, -1
 
 ### put cursor at home, write do 1st position of 2nd line -------------
 LCDhome2: la  $6, HW_lcd_addr   # LCD display
-	li  $4, 0b11000000      # x80 RAMaddrs=40, cursor at home
+	li  $4, 0b11000000      # xc0 RAMaddrs=40, cursor at home
 	sw  $4, 0($6)
-
-	la  $4, LCD_clear_delay    # wait for CLEAR
-dlyhm2:	addiu $4, $4, -1
-	nop
-	bne $4, $zero, dlyhm2
-	nop
-
-#	nop
-#	nop			# give the controller time
-#	nop	
-#dlyhm2:	lw   $4, 0($6)
+	
+#	la  $4, LCD_clear_delay    # wait for CLEAR
+#dlyhm2:	addiu $4, $4, -1
 #	nop
-#	andi $4, $4, 0x80
-#	bne  $4, $zero, dlyhm2
+#	bne $4, $zero, dlyhm2
 #	nop
+	
+	nop
+	nop			# give the controller time
+	nop	
+dlyhm2:	lw   $4, 0($6)
+	nop
+	andi $4, $4, 0x80
+	bne  $4, $zero, dlyhm2
+	nop
 
 	jr $ra
 	nop
@@ -292,8 +365,9 @@ dlyclr:	addiu $4, $4, -1
 	
 ### send 4 characters to LCD's RAM ------------------------------------
 send:	la  $26, HW_lcd_addr    # LCD display
-	
-	sw   $4, 4($26)		# write character to LCD's RAM
+
+	andi $6, $4, 0xff
+	sw   $6, 4($26)		# write character to LCD's RAM
 	srl  $4, $4, 8
 
 	la $5, LCD_write_delay
@@ -302,25 +376,28 @@ delay0:	addiu $5, $5, -1
 	bne $5, $zero, delay0
 	nop
 
-	sw   $4, 4($26)		# write character to LCD's RAM
-	srl  $4, $4, 8	
-
+	andi $6, $4, 0xff
+	sw   $6, 4($26)		# write character to LCD's RAM
+	srl  $4, $4, 8
+	
 	la $5, LCD_write_delay
 delay1:	addiu $5, $5, -1
 	nop
 	bne $5, $zero, delay1
 	nop
 
-	sw  $4, 4($26)		# write character to LCD's RAM
-	srl $4, $4, 8
-
+	andi $6, $4, 0xff
+	sw   $6, 4($26)		# write character to LCD's RAM
+	srl  $4, $4, 8
+	
 	la $5, LCD_write_delay
 delay2:	addiu $5, $5, -1
 	nop
 	bne $5, $zero, delay2
 	nop
 
-	sw  $4, 4($26)		# write character to LCD's RAM
+	andi $6, $4, 0xff
+	sw   $6, 4($26)		# write character to LCD's RAM
 
 	la $5, LCD_write_delay
 delay3:	addiu $5, $5, -1
@@ -340,10 +417,12 @@ delay:	addiu $4, $4, -1
 	nop
 	jr $ra
 	nop
-
 	
 	.end _start
 
+	.data
+vec:	.space 4,0xffffffff
+	
 	
 	### command table in initialized RAM, for when it works	;)
 # 	.data
@@ -360,7 +439,7 @@ delay:	addiu $4, $4, -1
         # .byte  0b00000001        # x01 clear display
         # .byte  0b10000000        # x80 RAMaddrs=0, cursor at home
         # .byte  0b10000000        # x80 RAMaddrs=0, cursor at home
-        # .byte  0b11000000        # x80 RAMaddrs=40, cursor at home
+        # .byte  0b11000000        # xc0 RAMaddrs=40, cursor at home
 	# .byte 0,0
 # 
 #string:	 .asciiz "Hello world! said cMIPS"	
diff --git a/cMIPS/tests/mac_kbd_lcd.s b/cMIPS/tests/mac_kbd_lcd.s
new file mode 100644
index 0000000000000000000000000000000000000000..efb26e6f456bb5d1cac9616868d015e8ce817c59
--- /dev/null
+++ b/cMIPS/tests/mac_kbd_lcd.s
@@ -0,0 +1,105 @@
+	.file	1 "mac_kbd_lcd.c"
+	.section .mdebug.abi32
+	.previous
+	.nan	legacy
+	.module	fp=32
+	.module	nooddspreg
+	.text
+	.align	2
+	.globl	main
+	.set	nomips16
+	.set	nomicromips
+	.ent	main
+	.type	main, @function
+main:
+	.frame	$sp,32,$31		# vars= 0, regs= 4/0, args= 16, gp= 0
+	.mask	0x80070000,-4
+	.fmask	0x00000000,0
+	.set	noreorder
+	.set	nomacro
+	addiu	$sp,$sp,-32
+	sw	$31,28($sp)
+	sw	$18,24($sp)
+	sw	$17,20($sp)
+	jal	LCDinit
+	sw	$16,16($sp)
+
+	jal	LCDtopLine
+	li	$17,-1			# 0xffffffffffffffff
+
+	jal	LCDput
+	li	$4,32			# 0x20
+
+	jal	LCDput
+	li	$4,72			# 0x48
+
+	jal	LCDput
+	li	$4,101			# 0x65
+
+	jal	LCDput
+	li	$4,108			# 0x6c
+
+	jal	LCDput
+	li	$4,108			# 0x6c
+
+	jal	LCDput
+	li	$4,111			# 0x6f
+
+	jal	LCDput
+	li	$4,32			# 0x20
+
+	jal	LCDput
+	li	$4,119			# 0x77
+
+	jal	LCDput
+	li	$4,111			# 0x6f
+
+	jal	LCDput
+	li	$4,114			# 0x72
+
+	jal	LCDput
+	li	$4,108			# 0x6c
+
+	jal	LCDput
+	li	$4,100			# 0x64
+
+	jal	LCDput
+	li	$4,33			# 0x21
+
+	jal	LCDbotLine
+	li	$18,12451840			# 0xbe0000
+
+	ori	$18,$18,0xbc20
+$L2:
+	jal	KBDget
+	nop
+
+	beq	$2,$17,$L2
+	move	$16,$2
+
+	move	$7,$0
+	move	$6,$0
+	li	$5,1			# 0x1
+	jal	DSP7SEGput
+	move	$4,$2
+
+	jal	LCDput
+	addiu	$4,$16,48
+
+	jal	cmips_delay
+	move	$4,$18
+
+	li	$7,1			# 0x1
+	move	$6,$16
+	move	$5,$0
+	jal	DSP7SEGput
+	move	$4,$0
+
+	b	$L2
+	nop
+
+	.set	macro
+	.set	reorder
+	.end	main
+	.size	main, .-main
+	.ident	"GCC: (GNU) 5.1.0"
diff --git a/cMIPS/tests/mac_lcd.s b/cMIPS/tests/mac_lcd.s
index 1b941b56742e08e851360975bcd247356543f631..63f35f12c3e60c78fdd0a72f1f7d1018d7172165 100644
--- a/cMIPS/tests/mac_lcd.s
+++ b/cMIPS/tests/mac_lcd.s
@@ -24,7 +24,64 @@
 	# .set LCD_oper_delay,   4   # 15us / 20ns
 	# .set LCD_write_delay,  3   # 15us / 20ns
 
+        .set MMU_WIRED,  2  ### do not change mapping for ROM-0, I/O
+	
+        .org x_INST_BASE_ADDR,0
+	
 _start: nop
+	li   $k0, 0x10000000
+        mtc0 $k0, cop0_STATUS
+
+        li   $k0, MMU_WIRED
+        mtc0 $k0, cop0_Wired
+
+	j main
+	nop
+
+        .org x_EXCEPTION_0000,0
+_excp_0000:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0399		# display .9.9
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0000:	j    h0000			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0100,0
+_excp_0100:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0388		# display .8.8
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0100:	j    h0100			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0180,0
+_excp_0180:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0377		# display .7.7
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0180:	j    h0180			# wait forever
+	nop
+	
+        .org x_EXCEPTION_0200,0
+_excp_0200:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0366		# display .6.6
+	sw   $k1, 0($k0)		# write to 7 segment display
+h0200:	j    h0200			# wait forever
+	nop
+	
+        .org x_EXCEPTION_BFC0,0
+_excp_BFC0:
+	la   $k0, HW_dsp7seg_addr  	# 7 segment display
+	li   $k1, 0x0355		# display .5.5
+	sw   $k1, 0($k0)		# write to 7 segment display
+hBFC0:	j    hBFC0			# wait forever
+	nop
+
+
+        .org x_ENTRY_POINT,0
+	
+main:	nop
 
 	### tell the world we are alive
 	la  $15, HW_dsp7seg_addr   # 7 segment display
@@ -298,7 +355,6 @@ delay:	addiu $4, $4, -1
 	jr $ra
 	nop	
 
-	.org 0x400,0
 	.end _start
 
 	
diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd
index 5ec67f0a5f61a95f08b68d72e059b898492323d9..6e9275f0646c9e98e385236ed4b0b7dd85342bf6 100644
--- a/cMIPS/vhdl/core.vhd
+++ b/cMIPS/vhdl/core.vhd
@@ -1260,7 +1260,7 @@ begin
   -- EXECUTION ---------------------------------------------
 
   EX_FORWARDING_ALU: process (EX_a_rs,EX_a_rt,EX_a_c, EX_A,EX_B,
-                              is_exception, MM_ll_sc_abort,
+                              is_exception, MM_ll_sc_abort, MM_is_SC,
                               MM_a_c,MM_wreg,WB_a_c,WB_wreg,
                               MM_is_MFC0,MM_cop0_val, MM_result,WB_C)
     variable i_A,i_B : reg32;
diff --git a/cMIPS/vhdl/io.vhd b/cMIPS/vhdl/io.vhd
index f316810e387538de5b81f388cd0da92750fc99a6..775410eaf62b7193ba1569f122fcee57ef4327f9 100644
--- a/cMIPS/vhdl/io.vhd
+++ b/cMIPS/vhdl/io.vhd
@@ -31,7 +31,6 @@ entity print_data is
   port (rst     : in  std_logic;
         clk     : in  std_logic;
         sel     : in  std_logic;
-        rdy     : out std_logic;
         wr      : in  std_logic;
         addr    : in  reg32;
         data    : in  reg32);
@@ -43,8 +42,6 @@ architecture behavioral of print_data is
 
 begin
 
-  rdy <= '1';
-
   U_WRITE_OUT: process(sel,clk)
     variable msg : line;
   begin
@@ -72,7 +69,6 @@ entity to_stdout is
   port (rst     : in  std_logic;
         clk     : in  std_logic;
         sel     : in  std_logic;
-        rdy     : out std_logic;
         wr      : in  std_logic;
         addr    : in  std_logic_vector;
         data    : in  std_logic_vector);
@@ -84,8 +80,6 @@ architecture behavioral of to_stdout is
 
 begin
 
-  rdy <= '1';
-
   U_WRITE_OUT: process(clk,sel)
     variable msg : line;
   begin
@@ -121,7 +115,6 @@ entity write_data_file is
   port (rst      : in  std_logic;
         clk      : in  std_logic;
         sel      : in  std_logic;
-        rdy      : out std_logic;
         wr       : in  std_logic;
         addr     : in  reg32;
         data     : in  reg32;
@@ -136,8 +129,6 @@ architecture behavioral of write_data_file is
 
 begin
 
-  rdy <= '1';
-
   U_write_uint: process (clk,sel)
   begin
 
@@ -182,7 +173,6 @@ entity read_data_file is
   port (rst      : in  std_logic;
         clk      : in  std_logic;
         sel      : in  std_logic;
-        rdy      : out std_logic;
         wr       : in  std_logic;
         addr     : in  reg32;
         data     : out reg32;
@@ -198,9 +188,6 @@ architecture behavioral of read_data_file is
 
 begin
 
-  rdy <= '1';
-
-
   U_read_uint: process(clk,sel)
     variable datum : integer := 0;
     variable value : reg32;                 -- for debugging only
@@ -257,7 +244,6 @@ entity do_interrupt is
   port (rst      : in    std_logic;
         clk      : in    std_logic;     -- clock pulses counted
         sel      : in    std_logic;
-        rdy      : out   std_logic;
         wr       : in    std_logic;
         addr     : in    std_logic_vector;
         data_inp : in    std_logic_vector;
@@ -321,8 +307,6 @@ begin
 
   data_out <= int_en & cnt_en & Q;
 
-  rdy <= '1';  -- never generates wait states
-
 end behavioral;
 -- ++ do_interrupt +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -340,7 +324,6 @@ entity simple_uart is
   port (rst     : in    std_logic;
         clk     : in    std_logic;      -- processor clock
         sel     : in    std_logic;
-        rdy     : out   std_logic;
         wr      : in    std_logic;
         addr    : in    std_logic;
         data_inp : in   std_logic_vector;
@@ -373,8 +356,6 @@ architecture behavioral of simple_uart is
 
 begin
 
-  rdy <= '1';
-
   U_UART: uart_int port map (clk, rst, s_ctrl,s_stat, s_tx,s_rx,
                              d_inp,d_out, txdat,rxdat, rts,cts, irq, bit_rt);
   
@@ -416,7 +397,6 @@ entity sys_stats is
   port (rst     : in    std_logic;
         clk     : in    std_logic;
         sel     : in    std_logic;
-        rdy     : out   std_logic;
         wr      : in    std_logic;
         addr    : in    reg32;
         data    : out   reg32;
@@ -452,9 +432,6 @@ begin
 
   end process U_SYNC_OUTPUT;
 
-  
-  rdy <= '1';
-
 end behavioral;
 -- ++ system statistics ++++++++++++++++++++++++++++++++++++++++++++++++++
 
@@ -472,7 +449,6 @@ entity to_7seg is
   port (rst      : in  std_logic;
         clk      : in  std_logic;
         sel      : in  std_logic;
-        rdy      : out std_logic;
         wr       : in  std_logic;
         data     : in  std_logic_vector;
         display0 : out reg8;
@@ -508,12 +484,10 @@ begin
 
   U_DSP0: display_7seg port map (value(3 downto 0), value(8), display0);
 
-  rdy <= '1';
-  
   U_sim: process(sel,rst)
   begin
     if rst = '1' then
-      assert not(rising_edge(sel))
+      assert not(falling_edge(sel))
         report "dsp7seg: "&  SLV32HEX(data) severity NOTE;
     end if;
   end process;
@@ -536,7 +510,6 @@ entity read_keys is
   port (rst      : in  std_logic;
         clk      : in  std_logic;
         sel      : in  std_logic;
-        rdy      : out std_logic;
         data     : out reg32;
         kbd      : in  std_logic_vector (11 downto 0);
         sw       : in  std_logic_vector (3 downto 0));
@@ -591,8 +564,6 @@ begin
   data(4) <= sw(0);
   data(3 downto 0) <= cpu_data(3 downto 0);
   
-  rdy <= '1';
-
   U_DEBOUNCER: countNup generic map (DEB_BITS)
     port map (clk=>clk, rst=>rst, ld=>cnt_ld, en=>cnt_en,
               D=>x_DEB_CYCLES, Q=>open, co=>debounced); 
diff --git a/cMIPS/vhdl/packageMemory.vhd b/cMIPS/vhdl/packageMemory.vhd
index b67e3b2c8c2ad5aa8cfde1b8204f6550abe5c01b..5070aa9de9af788ff0d63245d83038135e8e7073 100644
--- a/cMIPS/vhdl/packageMemory.vhd
+++ b/cMIPS/vhdl/packageMemory.vhd
@@ -41,8 +41,8 @@ package p_MEMORY is
   -- begin DO NOT change these names as several scripts depend on them --
   --  you may change the values, not names neither formatting          --
   constant x_INST_BASE_ADDR : reg32   := x"00000000";
-  constant x_INST_MEM_SZ    : reg32   := x"00004000";  
-  constant x_DATA_BASE_ADDR : reg32   := x"00040000";  
+  constant x_INST_MEM_SZ    : reg32   := x"00004000";
+  constant x_DATA_BASE_ADDR : reg32   := x"00040000";
   constant x_DATA_MEM_SZ    : reg32   := x"00008000";
   constant x_IO_BASE_ADDR   : reg32   := x"0F000000";
   constant x_IO_MEM_SZ      : reg32   := x"00002000";
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 01bf3a46a4e806c8ad89971008ceef46291adb68..953bdb291713419a4a81ccba38b4dfa5c4dc06d0 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -55,7 +55,6 @@ architecture TB of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           wr       : in  std_logic;
           data     : in  std_logic_vector;
           display0 : out std_logic_vector;
@@ -67,7 +66,6 @@ architecture TB of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           data     : out reg32;
           kbd      : in  std_logic_vector (11 downto 0);
           sw       : in  std_logic_vector (3 downto 0));
@@ -77,7 +75,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
           addr    : in  std_logic_vector;
           data    : in  std_logic_vector);
@@ -87,7 +84,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
           addr    : in  std_logic_vector;
           data    : in  std_logic_vector);
@@ -98,7 +94,6 @@ architecture TB of tb_cMIPS is
     port (rst      : in  std_logic;
           clk      : in  std_logic;
           sel      : in  std_logic;
-          rdy      : out std_logic;
           wr       : in  std_logic;
           addr     : in  std_logic_vector;
           data     : in  std_logic_vector;
@@ -111,7 +106,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in  std_logic;
           clk     : in  std_logic;
           sel     : in  std_logic;
-          rdy     : out std_logic;
           wr      : in  std_logic;
           addr    : in  std_logic_vector;
           data    : out std_logic_vector;
@@ -122,7 +116,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
           addr    : in    std_logic_vector;
           data_inp : in   std_logic_vector;
@@ -134,7 +127,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
           addr    : in    std_logic;
           data_inp : in   std_logic_vector;
@@ -160,7 +152,6 @@ architecture TB of tb_cMIPS is
     port (rst     : in    std_logic;
           clk     : in    std_logic;
           sel     : in    std_logic;
-          rdy     : out   std_logic;
           wr      : in    std_logic;
           addr    : in    std_logic_vector;
           data    : out   std_logic_vector;
@@ -410,17 +401,17 @@ architecture TB of tb_cMIPS is
   signal inst_aVal, inst_wait, rom_rdy : std_logic := '1';
   signal data_aVal, data_wait, ram_rdy, mem_wr : std_logic;
   signal cpu_xfer, mem_xfer, dev_select, dev_select_ram, dev_select_io : reg4;
-  signal io_print_sel,   io_print_wait   : std_logic := '1';
-  signal io_stdout_sel,  io_stdout_wait  : std_logic := '1';
-  signal io_stdin_sel,   io_stdin_wait   : std_logic := '1';
-  signal io_write_sel,   io_write_wait   : std_logic := '1';
-  signal io_read_sel,    io_read_wait    : std_logic := '1';
-  signal io_counter_sel, io_counter_wait : std_logic := '1';
+  signal io_print_sel   : std_logic := '1';
+  signal io_stdout_sel  : std_logic := '1';
+  signal io_stdin_sel   : std_logic := '1';
+  signal io_write_sel   : std_logic := '1';
+  signal io_read_sel    : std_logic := '1';
+  signal io_counter_sel : std_logic := '1';
+  signal io_uart_sel    : std_logic := '1';
+  signal io_sstats_sel  : std_logic := '1';
+  signal io_7seg_sel    : std_logic := '1';
+  signal io_keys_sel    : std_logic := '1';
   signal io_fpu_sel,     io_fpu_wait     : std_logic := '1';
-  signal io_uart_sel,    io_uart_wait    : std_logic := '1';
-  signal io_sstats_sel,  io_sstats_wait  : std_logic := '1';
-  signal io_7seg_sel,    io_7seg_wait    : std_logic := '1';
-  signal io_keys_sel,    io_keys_wait    : std_logic := '1';
   signal io_lcd_sel,     io_lcd_wait     : std_logic := '1';
   signal d_cache_d_out, stdin_d_out, read_d_out, counter_d_out : reg32;
   signal fpu_d_out, uart_d_out, sstats_d_out, keybd_d_out : reg32;
@@ -487,12 +478,7 @@ begin  -- TB
   
   cpu_i_wait <= inst_wait;
   cpu_d_wait <= data_wait and io_wait;
-  io_wait    <= io_lcd_wait;
-                -- '1'; io_print_wait and io_stdout_wait and io_stdin_wait and
-                -- io_write_wait and io_read_wait and
-                -- io_counter_wait and -- io_uart_wait and
-                -- io_sstats_wait and --  io_fpu_wait 
-                -- io_7seg_wait and  io_keys_wait;
+  io_wait    <= io_lcd_wait;  -- and io_fpu_wait;
 
   not_waiting <= (inst_wait and data_wait); --  and io_wait);
 
@@ -561,32 +547,30 @@ begin  -- TB
               mem_addr, datram_out, datram_inp, mem_xfer, dump_ram);
   
   U_read_inp: read_data_file generic map ("input.data")
-    port map (rst,clk, io_read_sel,io_read_wait,  wr,d_addr,read_d_out,
-              cpu_xfer);
+    port map (rst,clk, io_read_sel,  wr, d_addr,read_d_out, cpu_xfer);
 
   U_write_out: write_data_file generic map ("output.data")
-    port map (rst,clk, io_write_sel,io_write_wait, wr,d_addr,cpu_data,
-              cpu_xfer, dump_ram);
+    port map (rst,clk, io_write_sel, wr, d_addr,cpu_data, cpu_xfer, dump_ram);
 
   U_print_data: print_data
-    port map (rst,clk, io_print_sel,io_print_wait, wr,d_addr,cpu_data);
+    port map (rst,clk, io_print_sel, wr, d_addr, cpu_data);
 
   U_to_stdout: to_stdout
-    port map (rst,clk,io_stdout_sel,io_stdout_wait,wr,d_addr,cpu_data);
+    port map (rst,clk, io_stdout_sel, wr, d_addr, cpu_data);
 
   U_interrupt_counter: do_interrupt     -- external counter+interrupt
-    port map (rst,clk, io_counter_sel, io_counter_wait,
-              wr, d_addr, cpu_data, counter_d_out, counter_irq);
+    port map (rst,clk, io_counter_sel, wr, d_addr, cpu_data,
+              counter_d_out, counter_irq);
 
   U_to_7seg: to_7seg
-    port map (rst,clk,io_7seg_sel,io_7seg_wait,wr,cpu_data,disp0,disp1);
+    port map (rst,clk,io_7seg_sel, wr, cpu_data, disp0, disp1);
 
   keys <= b"000000000000", b"000000000100" after 1 us, b"000000000000" after 2 us,  b"001000000000" after 3 us, b"000000000000" after 4 us, b"000001000000" after 5 us, b"000000000000" after 6 us; 
   switches <= b"0000";
           
   U_read_keys: read_keys
     generic map (6)        -- debouncing interval, in clock cycles
-    port map (rst,clk, io_keys_sel,io_keys_wait,keybd_d_out,keys,switches);
+    port map (rst,clk, io_keys_sel, keybd_d_out, keys, switches);
 
   led_r <= keybd_d_out(0);
   led_g <= keybd_d_out(1);
@@ -598,8 +582,7 @@ begin  -- TB
               lcd_data, lcd_rs, lcd_rw, lcd_en, lcd_blon);
   
   U_simple_uart: simple_uart
-    port map (rst,clk, io_uart_sel, io_uart_wait, wr, d_addr(2),
-              cpu_data, uart_d_out,
+    port map (rst,clk, io_uart_sel, wr, d_addr(2), cpu_data, uart_d_out,
               uart_txd, uart_rxd, uart_rts, uart_cts, uart_irq, bit_rt);
               -- uncoment next line for loop back, comment out previous line
               -- uart_txd, uart_txd, uart_rts, uart_cts, uart_irq, bit_rt);
@@ -615,8 +598,7 @@ begin  -- TB
   -- port map (rst,clk, io_FPU_sel, io_FPU_wait, wr, d_addr, cpu_data);
 
   -- U_sys_stats: sys_stats                -- CPU reads system counters
-  --   port map (cpu_reset,clk, io_sstats_sel, io_sstats_wait,
-  --             wr, d_addr, sstats_d_out,
+  --   port map (cpu_reset,clk, io_sstats_sel, wr, d_addr, sstats_d_out,
   --             cnt_d_ref,cnt_d_rd_hit,cnt_d_wr_hit,cnt_d_flush,
   --             cnt_i_ref,cnt_i_hit);
   
@@ -689,8 +671,8 @@ architecture behavioral of ram_addr_decode is
   constant HI_ADDR  : integer := log2_ceil(DATA_BASE_ADDR + DATA_MEM_SZ - 1);
   constant in_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '1');
   constant ng_r : std_logic_vector(HI_ADDR downto LO_ADDR) := (others => '0');
-  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others => '1');
-  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others => '0');
+  constant oth  : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'1');
+  constant ng_o : std_logic_vector(HI_SEL_BITS downto HI_ADDR+1):=(others=>'0');
 begin
 
   aVal <= '0' when ( cpu_d_aVal = '0' and rst = '1' and
@@ -761,7 +743,10 @@ begin
                    ) else
           '1';
   
-  U_decode: process(clk, aVal, addr)
+
+  dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
+
+  U_decode: process(clk, aVal, addr, dev)
     variable dev_sel    : reg4;
     constant is_noise   : integer := 0;
     constant is_print   : integer := 2;
@@ -791,8 +776,6 @@ begin
     keybd_sel   <= '1';
     lcd_sel     <= '1';
 
-    dev <= to_integer(signed(addr(HI_SEL_ADDR downto LO_SEL_ADDR)));
-    
     case dev is -- to_integer(signed(addr(HI_ADDR downto LO_ADDR))) is
       when  0 => dev_sel     := std_logic_vector(to_signed(is_print, 4));
                  print_sel   <= aVal or clk;