diff --git a/cMIPS/docs/cMIPS.pdf b/cMIPS/docs/cMIPS.pdf index 8092a007ecab134b226874eee489e3cdfc1d8bf9..132dcc6716f71839a57c6520fa453512337bedb6 100644 Binary files a/cMIPS/docs/cMIPS.pdf and b/cMIPS/docs/cMIPS.pdf differ diff --git a/cMIPS/include/cMIPSio.c b/cMIPS/include/cMIPSio.c index 5554749aa652ebc53cb54ddbb2752a9d8e9c3ced..c1f74aac4eee700bc3ba85524a948bdb49e877d7 100644 --- a/cMIPS/include/cMIPSio.c +++ b/cMIPS/include/cMIPSio.c @@ -26,8 +26,9 @@ void print(int n) { // write a character to VHDL simulator's standard output void to_stdout(char c) { int *IO = (int *)IO_STDOUT_ADDR; - - *IO = c; // prints only after receiving a '\0' or a '\n' (line-feed, 0x0a) + + // prints line only after receiving a '\0' or a '\n' (line-feed, 0x0a) + *IO = (unsigned char)c; } // read a character from VHDL simulator's standard input @@ -91,8 +92,9 @@ void dumpRAM(void) { // system statistics -- read system counters //======================================================================= void readStats(sStats *s) { - int *IO = (int *)IO_STATS_ADDR; #if 0 + int *IO = (int *)IO_STATS_ADDR; + s->dc_ref = *(IO+0); s->dc_rd_hit = *(IO+1); s->dc_wr_hit = *(IO+2); diff --git a/cMIPS/tests/jr_2.expected b/cMIPS/tests/jr_2.expected index 1c70bb742e984c02ccb0a4f688f19ddba74474f2..09972f8846e0b03bff8ff1f4fe69a02aa3bf4fba 100644 --- a/cMIPS/tests/jr_2.expected +++ b/cMIPS/tests/jr_2.expected @@ -7,3 +7,12 @@ 00000054 00000060 0000006c +0000000c +00000018 +00000024 +00000030 +0000003c +00000048 +00000054 +00000060 +0000006c diff --git a/cMIPS/tests/jr_2.s b/cMIPS/tests/jr_2.s index 4a853a64b82d8f8d14cfc33b9033fe6f070233da..b5f9208e279f8090eaf569c7935e6bc19c6ebbb7 100644 --- a/cMIPS/tests/jr_2.s +++ b/cMIPS/tests/jr_2.s @@ -6,25 +6,60 @@ .ent _start _start: la $16, x_IO_BASE_ADDR la $15,(x_DATA_BASE_ADDR+0x10) + + ## + ## let's check stalls for add r1 ; jr r1 + ## + la $5, snd - li $3,1 - li $4,5 - addi $29,$0,100 - move $8,$zero -snd: #sw $31, 0($16) # $31 <- 0,snd+4 - add $8,$8,$3 # $8 <- 1, 7,13,19,25,31, - add $8,$8,$4 # $8 <- 6,12,18,24,30,36, - add $9,$8,$8 # $9 <- 12,24,36,48,60,72, - sw $9, 4($16) - slt $28,$9,$29 - beq $28,$0,.L1 + li $3, 1 + li $4, 5 + addi $29, $0, 100 + move $8, $zero +snd: #sw $31, 0($16) # $31 <- 0,snd+4 + add $8, $8, $3 # $8 <- 1, 7,13,19,25,31, + add $8, $8, $4 # $8 <- 6,12,18,24,30,36, + add $9, $8, $8 # $9 <- 12,24,36,48,60,72, + sw $9, 0($16) + slt $28, $9, $29 + beq $28, $0, trd nop - add $9,$0,$5 + add $9, $0, $5 jr $9 nop + + ## + ## now let's check stalls for lw r1 ; jr r1 + ## + +trd: la $10, loop # start of loop address + la $11, addr # keep it in in memory + sw $10, 0($11) + move $8, $zero + li $3, 1 + li $4, 5 + +loop: + add $8, $8, $3 # $8 <- 1, 7,13,19,25,31, + add $8, $8, $4 # $8 <- 6,12,18,24,30,36, + add $9, $8, $8 # $9 <- 12,24,36,48,60,72, + sw $9, 0($16) + slt $28, $9, $29 + beq $28, $0, .L1 + nop + la $11, addr # keep it in in memory + lw $9, 0($11) + jr $9 + nop + .L1: end: nop nop wait nop .end _start + + .data + .align 4 + .space 128 +addr: .word 0 diff --git a/cMIPS/tests/lwFWDsw.s b/cMIPS/tests/lwFWDsw.s index d6745c3ced471a26f50d784b8870bbfd89205cc7..242839113f5d8277a3724b135836a354fdcc709b 100644 --- a/cMIPS/tests/lwFWDsw.s +++ b/cMIPS/tests/lwFWDsw.s @@ -4,20 +4,22 @@ .set noreorder .globl _start .ent _start + _start: nop - la $15, x_DATA_BASE_ADDR + 0x10 - la $16, x_IO_BASE_ADDR - addi $3,$0,-10 - ori $5,$0,4 - addi $9,$0,10 + la $15, x_DATA_BASE_ADDR + la $16, x_IO_BASE_ADDR + addi $3, $0, -10 + ori $5, $0, 4 + addi $9, $0, 10 nop -snd: sw $3, 4($15) - addi $3,$3,1 - lw $4, 4($15) - sw $4, 0($16) - add $15,$15,$5 - slt $8,$3,$9 - bne $8,$0,snd + +snd: sw $3, 4($15) # mem[i+1] <= count + addi $3, $3, 1 # count ++ + lw $4, 4($15) # $4 <= mem[i+1] + sw $4, 0($16) # print $4 + add $15, $15, $5 # i++ + slt $8, $3, $9 # reached 10 rounds? + bne $8, $0, snd # no, continue nop wait nop diff --git a/cMIPS/tests/lwFWDsw2.s b/cMIPS/tests/lwFWDsw2.s index 2c3d3784bfd2b06160d13c2ce20562192493bbdc..72119a0bceccf200ad0331c4b8b189956eee853b 100644 --- a/cMIPS/tests/lwFWDsw2.s +++ b/cMIPS/tests/lwFWDsw2.s @@ -5,12 +5,12 @@ .globl _start .ent _start _start: nop - la $17, (x_DATA_BASE_ADDR) # base address of RAM + la $17, x_DATA_BASE_ADDR # base address of RAM addiu $15, $17, 4*4 # $15 <- &RAM[4] la $16, x_IO_BASE_ADDR # address to print out results - addi $3,$0,-10 # value to print = -10 - addi $5,$0,4 # scan from RAM[4]..RAM[24] - addi $9,$0,10 # stop when done 20 loops = +10 + addi $3, $0, -10 # value to print = -10 + addi $5, $0, 4 # scan from RAM[4]..RAM[24] + addi $9, $0, 10 # stop when done 20 loops = +10 sw $15, 0($17) # save pointer to RAM[0] sw $15, 0($16) # and print it out nop diff --git a/cMIPS/tests/lwsw.s b/cMIPS/tests/lwsw.s index fa7cd5181a17cb578a92f59c47bf3eed9a25096b..b6040230defe241ce8b3d0c7cad30a5655869dd3 100644 --- a/cMIPS/tests/lwsw.s +++ b/cMIPS/tests/lwsw.s @@ -5,17 +5,21 @@ .ent _start _start: la $15, (x_DATA_BASE_ADDR+0x10) la $16, x_IO_BASE_ADDR - addi $3,$0,-10 - ori $5,$0,4 + addi $3, $0, -10 + ori $5, $0, 4 nop + snd: sw $3, 4($15) - addi $3,$3,1 + addi $3, $3, 1 lw $4, 4($15) - add $15,$15,$5 + add $15, $15, $5 sw $4, 0($16) - bne $3,$0,snd + bne $3, $0, snd nop wait nop nop .end _start + + # fffffff6 fffffff7 fffffff8 fffffff9 fffffffa fffffffb fffffffc fffffffd fffffffe ffffffff + diff --git a/cMIPS/tests/lwswIncr.s b/cMIPS/tests/lwswIncr.s index 229577bfac4d22f7b30b525a14a11ef26159aa50..d97431733f8e29ca1f77216214a6338473b91c1c 100644 --- a/cMIPS/tests/lwswIncr.s +++ b/cMIPS/tests/lwswIncr.s @@ -3,22 +3,22 @@ .align 2 .globl _start .ent _start -_start: la $15, x_IO_BASE_ADDR - la $16, x_IO_BASE_ADDR - la $14, x_DATA_BASE_ADDR - addi $3,$0,-16 - ori $5,$0,2 - la $29,(x_IO_BASE_ADDR+0x40) +_start: la $15, 0 # start + la $29, 0x40 # end + la $16, x_IO_BASE_ADDR + la $14, x_DATA_BASE_ADDR + addi $3, $0, -16 + addi $5, $0, 2 nop - nop -snd: add $3,$5,$3 - sw $3, 0($14) - addi $14,$14,4 - lw $3, -4($14) - addi $15,$15,4 - sw $3, ($16) - slt $30,$15,$29 - bne $30,$0,snd + +snd: add $3, $5, $3 + sw $3, 0($14) # mem[i] <= count + addi $14, $14, 4 # i++ + lw $3, -4($14) # $3 <= mem[i-1] + addi $15, $15, 4 # limit += 4 + sw $3, 0($16) # print count + slt $30, $15, $29 # limit = 0x40 ? + bne $30, $0, snd # no, continue nop wait nop @@ -26,5 +26,3 @@ snd: add $3,$5,$3 # fffffff2 fffffff4 fffffff6 fffffff8 fffffffa fffffffc fffffffe 00000000 00000002 00000004 00000006 00000008 0000000a 0000000c 0000000e 00000010 - - \ No newline at end of file diff --git a/cMIPS/tests/swlw.s b/cMIPS/tests/swlw.s index 081056452fdc423c7553449d9e13c1c4374a7c6d..fa9428c4a7f402767e5b0d12fe0e76a8d5af9d6d 100644 --- a/cMIPS/tests/swlw.s +++ b/cMIPS/tests/swlw.s @@ -6,19 +6,19 @@ .ent _start _start: la $15, (x_DATA_BASE_ADDR+0x10) la $16, x_IO_BASE_ADDR - addi $3,$0,10 - ori $5,$0,2 - addi $29,$0,800 - sw $5, -4($15) + addi $3, $0, 10 + ori $5, $0, 2 # count = 2 + addi $29, $0, 800 + sw $5, -4($15) # mem[i-1] <= count nop -snd: add $3,$5,$3 - sw $3, 4($15) - lw $4, -4($15) - lw $9, 4($15) - add $5,$5,$5 # 2, 4, 8,16,32,64,128,256,512,1024 - sw $9, 0($16) # 10,12,16,24,40,72,136,264,520,1032 - slt $28,$9,$29 - bne $28,$0,snd +snd: add $3, $5, $3 # $3 <= count + 10 + sw $3, 4($15) # mem[i+1] <= $3 + lw $4, -4($15) # $4 <= mem[i-1] + lw $9, 4($15) # $9 <= mem[i+1] + add $5, $5, $5 # count *= 2 : 2,4,8,16,32,64,128,256,512,1024 + sw $9, 0($16) # print: 10,12,16,24,40,72,136,264,520,1032 + slt $28, $9, $29 # less than 800? + bne $28, $0, snd # yes, continue nop nop nop diff --git a/cMIPS/vhdl/core.vhd b/cMIPS/vhdl/core.vhd index 7aafb4df5241ebb32c01841e464028e3a80ee607..4b1ca9481fc59287dd97020bb57ddf7666c795f7 100644 --- a/cMIPS/vhdl/core.vhd +++ b/cMIPS/vhdl/core.vhd @@ -304,6 +304,7 @@ architecture rtl of core is signal br_target, br_addend, br_tgt_pl4, br_tgt_displ, j_target : reg32; signal RF_PCincd, RF_instruction : reg32; signal eq_fwd_A,eq_fwd_B : reg32; + signal dbg_jr_stall: integer; -- debugging only -- register fetch/read and instruction decode -- component reg_IF_RF is @@ -523,12 +524,12 @@ architecture rtl of core is ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--39 ('0','0',SB, '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sb=40 ('0','0',SH, '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sh=41 - ('1','1',NIL, '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swl=42 + ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swl=42 ('0','0',SW, '1','1','0',opADD,"001","00", '1', "00",cNOP,"11"),--sw=43 ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--44 ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--45 - ('1','1',NIL, '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swr=46 - ('1','1',NIL, '1','1','0',opNOP,"001","00", '0', "00",cNOP,"00"),--cache=47 + ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--swr=46 + ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--cache=47 ('0','1',LL, '0','1','0',opADD,"000","01", '1', "00",cNOP,"11"),--ll=48 ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--lwc1=49 ('1','1',NIL, '1','0','0',opNOP,"001","00", '0', "00",cNOP,"00"),--lwc2=50 @@ -907,19 +908,29 @@ begin j_target <= RF_PCincd(31 downto 28) & RF_instruction(25 downto 0) & b"00"; RF_JR_STALL: process (funct_word,a_rs,EX_a_c,MM_a_c,EX_wreg,MM_wreg) + variable i_dbg_jr_stall : integer := 0; -- debug only begin if ( (funct_word.PCsel = b"11")and -- load-delay slot (EX_a_c /= a_rs)and(EX_wreg = '0')and (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") ) then jr_stall <= '1'; + i_dbg_jr_stall := 1; elsif ( (funct_word.PCsel = b"11")and -- ALU hazard (EX_a_c = a_rs)and(EX_wreg = '0')and(EX_a_c /= b"00000") ) then jr_stall <= '1'; + i_dbg_jr_stall := 2; + elsif ( (funct_word.PCsel = b"11")and -- 2nd load-delay slot + (MM_a_c = a_rs)and(MM_wreg = '0')and(MM_a_c /= b"00000") and + (MM_aVal = '0') ) then + jr_stall <= '1'; + i_dbg_jr_stall := 3; else jr_stall <= '0'; - end if; + i_dbg_jr_stall := 0; + end if; + dbg_jr_stall <= i_dbg_jr_stall; end process RF_JR_STALL; - + RF_SW_STALL: process (ctrl_word,a_rs,EX_a_c,EX_wreg,EX_is_load) variable is_store : boolean := false; diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd index bbb4f6ed973b37c78f3118fabfd2c2ad7177eda9..93ff89519885a721aa51d719114c61f57e909681 100644 --- a/cMIPS/vhdl/tb_cMIPS.vhd +++ b/cMIPS/vhdl/tb_cMIPS.vhd @@ -632,17 +632,16 @@ entity inst_addr_decode is -- CPU side triggers access cpu_i_aVal : in std_logic; -- CPU instr addr valid (act=0) addr : in reg32; -- CPU address aVal : out std_logic); -- decoded address in range (act=0) - constant LO_ADDR : integer := 0; - constant HI_ADDR : integer := log2_ceil(INST_MEM_SZ); end entity inst_addr_decode; architecture behavioral of inst_addr_decode is + constant HI_ADDR : integer := HI_SEL_BITS; + constant LO_ADDR : integer := log2_ceil(INST_BASE_ADDR + INST_MEM_SZ); + constant PREFIX : std_logic_vector(HI_ADDR downto LO_ADDR) := (others=>'0'); begin aVal <= '0' when ( cpu_i_aVal = '0' and rst = '1' - and (addr(HI_SEL_BITS downto LO_SEL_BITS) - = - x_INST_BASE_ADDR(HI_SEL_BITS downto LO_SEL_BITS)) ) + and (addr(HI_ADDR downto LO_ADDR) = PREFIX) ) else '1'; end architecture behavioral;