diff --git a/cMIPS/bin/build.sh b/cMIPS/bin/build.sh
index 8f4d8719af968d75d3905b8df624dbac953161b2..11bbffd2bc95de59413f389a422fb235aef95d16 100755
--- a/cMIPS/bin/build.sh
+++ b/cMIPS/bin/build.sh
@@ -66,7 +66,7 @@ simulator=tb_cmips
 
 pkg="packageWires.vhd packageMemory.vhd packageExcp.vhd"
 
-src="altera.vhd macnica.vhd aux.vhd memory.vhd cache.vhd instrcache.vhd ram.vhd rom.vhd units.vhd io.vhd uart.vhd pipestages.vhd exception.vhd core.vhd tb_cMIPS.vhd"
+src="altera.vhd macnica.vhd aux.vhd memory.vhd cache.vhd instrcache.vhd ram.vhd rom.vhd units.vhd io.vhd uart.vhd fpu.vhd pipestages.vhd exception.vhd core.vhd tb_cMIPS.vhd"
 
 # build simulator
 #ghdl --clean
diff --git a/cMIPS/docs/installCrosscompiler b/cMIPS/docs/installCrosscompiler
index 88c1e8c87d6025e54b11e2dec4784585134b889e..a816ed922ca46d3cde0e8f52c132f9fcaf193108 100644
--- a/cMIPS/docs/installCrosscompiler
+++ b/cMIPS/docs/installCrosscompiler
@@ -1,4 +1,7 @@
-#!/bin/bash
+###
+### this file in NOT a shell script,
+### yet it was written to make it easy for cutting-n-pasting with the pointer
+###
 
 # to compile GCC, these three libraries may have to be fetched:
 
@@ -12,11 +15,13 @@ wget ftp://ftp.gmplib.org/pub/gmp/gmp-6.0.0.tar.bz2
 # directory names to mpfr mpc gmp (removing the version suffixes);
 # these libraries are then compiled along with GCC
 
-# OTOH, if you are lucky, the libraries installed by aptitude will do...
+# OTOH, if you are very lucky, the libraries installed by aptitude will do...
 
 # fetch all the auxiliary programs -- this assumes a Debian installation
 aptitude install make flex bison libgmp-dev libmpfr-dev libmpc-dev g++
 unset ls
+
+# make sure the installed files are read-exec by all
 umask 022
 
 # check the latest version of GCC in http://ftp.gnu.org/gnu/gcc/
@@ -38,17 +43,22 @@ tar -xvjf ${BINUTILS}.tar.bz2
 cd ${BINUTILS}
 ./configure --target=$TARGET --prefix=$PREFIX --disable-nls
 make
+
 # do the next one as root?  su ; umask 022 ; make install ; exit
+
 make install
 cd ..
 
 tar -xvzf ${COMPILER}.tar.gz
 cd ${COMPILER}
+
 # you may want/need to expand the libraries' tarballs at this point
+
 export PATH=$PATH:$PREFIX/bin
 ./configure --target=$TARGET --prefix=$PREFIX --disable-nls \
      --enable-languages=c,c++ --without-headers
 make all-gcc
+
 # do the next one as root?  su ; umask 022 ; make install-gcc ; exit
 make install-gcc
 cd ..
@@ -69,8 +79,13 @@ export MANPATH=${MANPATH}:${PREFIX}/man
 #    https://sourceforge.net/p/ghdl-updates/wiki/Debian%20Instructions/
 #  and pick the appropriate version for your computer (32 or 64 bit).
 #
-# When doing  dpkg -i ghdl*.deb  it will complain about versions.
-#  What I have done is use the following DANGEROUS and RISKY command:
-#  dpkg --ignore-depends=ghdl*.deb  -i ghdl*.deb
-#  this forces dpkg to ignore the dependencies for GHDL and install it
-#  with whatever version your machine has for gnat.  The risk is yours.
+# You also need to fetch gnat and libgnat from somewhere; I find it easier
+#  to download the .deb files onto the same directory as ghdl*.deb
+#
+# When doing  dpkg -i ghdl*.deb  it will whine and complain about versions.
+#  What I have done is to use the following DANGEROUS and RISKY command:
+#
+#  dpkg --ignore-depends=ghdl*.deb --ignore-depends=libgnat-4.6 -i ghdl*.deb
+#
+#  it forces dpkg to ignore the dependencies for GHDL and install it
+#  with whatever version your machine has for gnat/libgnat.  Caveat emptor.
diff --git a/cMIPS/tests/FPU_m.c b/cMIPS/tests/FPU_m.c
new file mode 100644
index 0000000000000000000000000000000000000000..45a5c961c6c89562cb0a5354c080214fe003415f
--- /dev/null
+++ b/cMIPS/tests/FPU_m.c
@@ -0,0 +1,162 @@
+#include "cMIPS.h"
+
+//  ALL NaN cases :  set  i < 11 stop case
+//  int A[] = {(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7f800000,(int)0x7f800000,(int)0xff800000,(int)0xff800000};
+//  int B[] = {(int)0x7fffffff,(int)0x7f800000,(int)0xff800000,(int)0x00000000,(int)0x80000000,(int)0x1c038000,(int)0x9c038000,(int)0x00000000,(int)0x80000000,(int)0x00000000,(int)0x80000000};
+//  int C[] = {(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff};
+//  ALL inf cases :  set  i < 12 stop case
+//  int A[] = {(int)0x7f800000,(int)0xff800000,(int)0x7f800000,(int)0xff800000,(int)0x7f000000,(int)0xff000000,(int)0xff800000,(int)0xff800000,(int)0x7f800000,(int)0xff800000,(int)0x7f000000,(int)0xff000000};
+//  int B[] = {(int)0x7f800000,(int)0xff800000,(int)0x1c038000,(int)0x9c038000,(int)0x7f000000,(int)0xff000000,(int)0x7f800000,(int)0x1c038000,(int)0x9c038000,(int)0x1c038000,(int)0xff000000,(int)0x7f000000};
+//  int C[] = {(int)0x7f800000,(int)0x7f800000,(int)0x7f800000,(int)0x7f800000,(int)0x7f800000,(int)0x7f800000,(int)0xff800000,(int)0xff800000,(int)0xff800000,(int)0xff800000,(int)0xff800000,(int)0xff800000};
+//  ALL 0 cases :  set  i < 12 stop case
+//  int A[] = {(int)0x00000000,(int)0x80000000,(int)0x00000000,(int)0x80000000,(int)0x00800000,(int)0x80800000,(int)0x00000000,(int)0x00000000,(int)0x80000000,(int)0x80000000,(int)0x00800000,(int)0x80800000};
+//  int B[] = {(int)0x00000000,(int)0x80000000,(int)0x1c038000,(int)0x9c038000,(int)0x00800000,(int)0x80800000,(int)0x9c038000,(int)0x80000000,(int)0x00000000,(int)0x1c038000,(int)0x80800000,(int)0x00800000};
+//  int C[] = {(int)0x00000000,(int)0x00000000,(int)0x00000000,(int)0x00000000,(int)0x00000000,(int)0x00000000,(int)0x80800000,(int)0x80800000,(int)0x80800000,(int)0x80800000,(int)0x80800000,(int)0x80800000};
+//  denorm cases :  set  i < 3 stop case
+//  int A[] = {(int)0x3c000000,(int)0x3c000000,(int)0x3f800000};
+//  int B[] = {(int)0x03800000,(int)0x03000000,(int)0x00200000};
+//  int C[] = {(int)0x00400000,(int)0x00200000,(int)0x00200000};
+
+
+  int A[] = {
+    // normal numbers
+    (int)0x9c038000,(int)0x9c038000,(int)0x9c038000,(int)0x9c038000,
+    // denormalized 
+    (int)0x3c000000,(int)0x3c000000,(int)0x3f800000,
+    // all 0 cases :  set  i < 12 stop case
+    (int)0x00000000,(int)0x80000000,(int)0x00000000,(int)0x80000000,
+    (int)0x00800000,(int)0x80800000,(int)0x00000000,(int)0x00000000,
+    (int)0x80000000,(int)0x80000000,(int)0x00800000,(int)0x80800000,
+    // all inf cases :  set  i < 12 stop case
+    (int)0x7f800000,(int)0xff800000,(int)0x7f800000,(int)0xff800000,
+    (int)0x7f000000,(int)0xff000000,(int)0xff800000,(int)0xff800000,
+    (int)0x7f800000,(int)0xff800000,(int)0x7f000000,(int)0xff000000,
+    // all NaN cases :  set  i < 11 stop case
+    (int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,
+    (int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7f800000,
+    (int)0x7f800000,(int)0xff800000,(int)0xff800000
+  };
+
+  int B[] = {
+    // normal numbers
+    (int)0x3f800000,(int)0x3f800000,(int)0x3f800000,(int)0x3f800000,
+    // denormalized
+    (int)0x03800000,(int)0x03000000,(int)0x00200000,
+    // all 0 cases
+    (int)0x00000000,(int)0x80000000,(int)0x1c038000,(int)0x9c038000,
+    (int)0x00800000,(int)0x80800000,(int)0x9c038000,(int)0x80000000,
+    (int)0x00000000,(int)0x1c038000,(int)0x80800000,(int)0x00800000,
+    // all inf cases
+    (int)0x7f800000,(int)0xff800000,(int)0x1c038000,(int)0x9c038000,
+    (int)0x7f000000,(int)0xff000000,(int)0x7f800000,(int)0x1c038000,
+    (int)0x9c038000,(int)0x1c038000,(int)0xff000000,(int)0x7f000000,
+    // all NaN cases
+    (int)0x7fffffff,(int)0x7f800000,(int)0xff800000,(int)0x00000000,
+    (int)0x80000000,(int)0x1c038000,(int)0x9c038000,(int)0x00000000,
+    (int)0x80000000,(int)0x00000000,(int)0x80000000
+  };
+
+  int C[] = {
+    // normal numbers
+    (int)0x9c038000,(int)0x9c038000,(int)0x9c038000,(int)0x9c038000,
+    // denorm
+    (int)0x00400000,(int)0x00200000,(int)0x00200000,
+    // all 0 cases
+    (int)0x00000000,(int)0x00000000,(int)0x00000000,(int)0x00000000,
+    (int)0x00000000,(int)0x00000000,(int)0x80800000,(int)0x80800000,
+    (int)0x80800000,(int)0x80800000,(int)0x80800000,(int)0x80800000,
+    // all inf cases
+    (int)0x7f800000,(int)0x7f800000,(int)0x7f800000,(int)0x7f800000,
+    (int)0x7f800000,(int)0x7f800000,(int)0xff800000,(int)0xff800000,
+    (int)0xff800000,(int)0xff800000,(int)0xff800000,(int)0xff800000,
+    // all NaN cases
+    (int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,
+    (int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff,
+    (int)0x7fffffff,(int)0x7fffffff,(int)0x7fffffff
+};
+
+
+void main(void) {
+  int i,j, acc;
+  volatile int *fpu,res;        // address of fpu
+
+  fpu = (int *)IO_FPU_ADDR;        // 0x0f0000c0; // MUL
+  // fpu = (int *)IO_FPU_ADDR + 4; // 0x0f0000c8; // ADD
+  // fpu = (int *)IO_FPU_ADDR + 8; // 0x0f0000cc; // DIV
+
+  acc = 0;
+  for (i = 0; i < 4; i++) { // ordinary cases
+      *fpu = A[i];
+      *(fpu+1) = B[i];
+      asm("nop");
+      res = *fpu;
+      if (res != C[i])
+        acc = 1;
+  }
+  if (acc == 0) {
+    to_stdout('o'); to_stdout('r'); to_stdout('d'); to_stdout('\n');
+  } else {
+    to_stdout('E'); to_stdout('R'); to_stdout('R'); to_stdout('\n');
+  }
+
+  acc = 0;
+  for ( ; i < 4+3; i++) { // denormalized
+      *fpu = A[i];
+      *(fpu+1) = B[i];
+      asm("nop");
+      res = *fpu;
+      if (res != C[i])
+        acc = 1;
+  }
+  if (acc == 0) {
+    to_stdout('d'); to_stdout('e'); to_stdout('n'); to_stdout('\n');
+  } else {
+    to_stdout('E'); to_stdout('R'); to_stdout('R'); to_stdout('\n');
+  }
+
+  acc = 0;
+  for ( ; i < 7+12; i++) { // zeroes
+      *fpu = A[i];
+      *(fpu+1) = B[i];
+      asm("nop");
+      res = *fpu;
+      if (res != C[i])
+        acc = 1;
+  }
+  if (acc == 0) {
+    to_stdout('z'); to_stdout('e'); to_stdout('r'); to_stdout('\n');
+  } else {
+    to_stdout('E'); to_stdout('R'); to_stdout('R'); to_stdout('\n');
+  }
+
+  acc = 0;
+  for ( ; i < 19+12; i++) { // infinites
+      *fpu = A[i];
+      *(fpu+1) = B[i];
+      asm("nop");
+      res = *fpu;
+      if (res != C[i])
+        acc = 1;
+  }
+  if (acc == 0) {
+    to_stdout('i'); to_stdout('n'); to_stdout('f'); to_stdout('\n');
+  } else {
+    to_stdout('E'); to_stdout('R'); to_stdout('R'); to_stdout('\n');
+  }
+
+  acc = 0;
+  for ( ; i < 21+11; i++) { // NaNs
+      *fpu = A[i];
+      *(fpu+1) = B[i];
+      asm("nop");
+      res = *fpu;
+      if (res != C[i])
+        acc = 1;
+  }
+  if (acc == 0) {
+    to_stdout('N'); to_stdout('a'); to_stdout('N'); to_stdout('\n');
+  } else {
+    to_stdout('E'); to_stdout('R'); to_stdout('R'); to_stdout('\n');
+  }
+
+}
diff --git a/cMIPS/tests/mmu_tlbwi.s b/cMIPS/tests/mmu_tlbwi.s
index e59e8043c4a778b7259c273577c07e622746325a..fa2620d7505143dde9a54f8634dad4d130f07747 100644
--- a/cMIPS/tests/mmu_tlbwi.s
+++ b/cMIPS/tests/mmu_tlbwi.s
@@ -231,14 +231,6 @@ read3:	tlbr 			# read TLB at index = 3
 	.set TAG_MASK, 0xfffff000 	# 4Kbyte pages
 	.set TAG_G,     0x00000000	# mark pages as global
 
-#	.set MMU_ini_tag_RAM6, ((x_RAM_PPN_6 & TAG_MASK) | TAG_G)
-	
-#	.set x_RAM_PPN_6, (x_DATA_BASE_ADDR + 6*PAGE_SZ)
-#	.set MMU_ini_dat_RAM6, (((x_RAM_PPN_6 >>12) <<6) | 0b000111) # d,v,g=1
-	
-#	.set x_RAM_PPN_7, (x_DATA_BASE_ADDR + 7*PAGE_SZ)
-#	.set MMU_ini_dat_RAM7, (((x_RAM_PPN_7 >>12) <<6) | 0b000111) # d,v,g=1
-
 	
 	# read from MMU(6)
 	addi $1, $1, 1
diff --git a/cMIPS/vhdl/fpu.vhd b/cMIPS/vhdl/fpu.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..c848afca54a7f87be648b7386548682616f20a96
--- /dev/null
+++ b/cMIPS/vhdl/fpu.vhd
@@ -0,0 +1,919 @@
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+--  cMIPS, a VHDL model of the classical five stage MIPS pipeline.
+--  Copyright (C) 2015  Joao Manoel Pampanini Filho & Roberto Andre Hexsel
+--
+--  This program is free software: you can redistribute it and/or modify
+--  it under the terms of the GNU General Public License as published by
+--  the Free Software Foundation, version 3.
+--
+--  This program is distributed in the hope that it will be useful,
+--  but WITHOUT ANY WARRANTY; without even the implied warranty of
+--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+--  GNU General Public License for more details.
+--
+--  You should have received a copy of the GNU General Public License
+--  along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+use work.p_wires.all;
+
+entity special_values is
+  port (in_a,in_b         : in  std_logic_vector(30 downto 0);
+        type_A,type_b     : out FP_type;
+        denormA,denormB   : out std_logic);
+end special_values;
+
+architecture estrutural of special_values is
+
+  -- type_A= 11    A=0.0
+  -- type_A= 01    infinito
+  -- type_A= 10    NaN
+  -- type_A= 00    A é numero bom
+  -- type FP_type is fp_is_good, fp_is_inf, fp_is_NaN, fp_is_zero;
+
+  constant mant_all_zeroes : reg23 := (others => '0');
+  constant exp_all_zeroes  : reg8  := (others => '0');
+  constant exp_all_ones    : reg8  := (others => '1');
+  
+begin
+
+  U_check_A: process(in_a)
+    variable mant_is_zero, exp_is_zero, exp_is_255 : boolean;
+  begin
+    mant_is_zero := (in_a(22 downto  0) = mant_all_zeroes);
+    exp_is_zero  := (in_a(30 downto 23) = exp_all_zeroes);
+    exp_is_255   := (in_a(30 downto 23) = exp_all_ones);
+
+    if exp_is_zero and mant_is_zero then
+      type_A <= fp_is_zero;
+    elsif exp_is_255 and mant_is_zero then
+      type_A <= fp_is_inf;
+    elsif exp_is_255 and not(mant_is_zero) then
+      type_A <= fp_is_nan;
+    else
+      type_A <= fp_is_good;
+    end if;
+
+    if exp_is_zero then
+      denormA <= '0';
+    else
+      denormA <= '1';
+    end if;
+
+  end process U_check_A;
+
+  
+  U_check_B: process(in_b)
+    variable mant_is_zero, exp_is_zero, exp_is_255 : boolean;
+  begin
+    mant_is_zero := (in_b(22 downto  0) = mant_all_zeroes);
+    exp_is_zero  := (in_b(30 downto 23) = exp_all_zeroes);
+    exp_is_255   := (in_b(30 downto 23) = exp_all_ones);
+
+    if exp_is_zero and mant_is_zero then
+      type_B <= fp_is_zero;
+    elsif exp_is_255 and mant_is_zero then
+      type_B <= fp_is_inf;
+    elsif exp_is_255 and not(mant_is_zero) then
+      type_B <= fp_is_nan;
+    else
+      type_B <= fp_is_good;
+    end if;
+
+    if exp_is_zero then
+      denormB <= '0';
+    else
+      denormB <= '1';
+    end if;
+
+  end process U_check_B;
+  
+    -- type_A <= b"11" when to_integer(unsigned(in_a(30 downto 23))) = 0 AND to_integer(unsigned(in_a(22 downto 0))) = 0
+    -- else b"01" when to_integer(unsigned(in_a(30 downto 23))) = 255 AND to_integer(unsigned(in_a(22 downto 0))) = 0
+    -- else b"10" when to_integer(unsigned(in_a(30 downto 23))) = 255 AND to_integer(unsigned(in_a(22 downto 0))) /= 0
+    -- else b"00";
+
+    -- type_b <= b"11" when to_integer(unsigned(in_b(30 downto 23))) = 0 AND to_integer(unsigned(in_b(22 downto 0))) = 0
+    -- else b"01" when to_integer(unsigned(in_b(30 downto 23))) = 255 AND to_integer(unsigned(in_b(22 downto 0))) = 0
+    -- else b"10" when to_integer(unsigned(in_b(30 downto 23))) = 255 AND to_integer(unsigned(in_b(22 downto 0))) /= 0
+    -- else b"00";
+
+  -- denormA <= '0' when to_integer(unsigned(in_a(30 downto 23))) = 0 else '1';
+  -- denormB <= '0' when to_integer(unsigned(in_b(30 downto 23))) = 0 else '1';
+
+end estrutural;
+
+--+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+use work.p_wires.all;
+
+entity data_check_mult is
+    port(type_A,type_B : in  FP_type;
+         sig_A,sig_B   : in  std_logic;
+         sig_out       : out std_logic;
+         exp_in        : in  std_logic_vector ( 7 downto 0);
+         fra_in        : in  std_logic_vector (22 downto 0);
+         exp_out       : out std_logic_vector ( 7 downto 0);
+         fra_out       : out std_logic_vector (22 downto 0));
+end data_check_mult;
+
+architecture estrutural of data_check_mult is
+begin
+
+  check : process(type_A,type_B, exp_in,fra_in)
+    variable exp_p : std_logic_vector ( 7 downto 0);
+    variable fra_p : std_logic_vector (22 downto 0);
+  begin
+
+    -- if (type_A = "10" OR type_B = "10" ) then
+      --   exp_p := (OTHERS => '1');
+      -- fra_p := (OTHERS => '1');
+    -- elsif ( (type_A = "01" AND type_B = "11") OR (type_A = "11" AND type_B = "01") ) then
+      -- exp_p := (OTHERS => '1');
+      -- fra_p := (OTHERS => '1');
+    -- elsif (type_A = "01" OR type_B = "01") then
+      -- exp_p := (OTHERS => '1');
+      -- fra_p := (OTHERS => '0');
+    -- elsif (type_A = "11" OR type_B = "11") then
+      -- exp_p := (OTHERS => '0');
+      -- fra_p := (OTHERS => '0');
+    -- else
+      -- exp_p := exp_in;
+      -- fra_p := fra_in;
+    -- end if;
+
+    case type_A is
+      when fp_is_NaN =>                 -- 10
+        exp_p := (OTHERS => '1');
+        fra_p := (OTHERS => '1');
+      when fp_is_inf =>                 -- 01
+        exp_p := (OTHERS => '1');
+        if type_B = fp_is_zero then     -- 11
+          fra_p := (OTHERS => '1');
+        else
+          fra_p := (OTHERS => '0');
+        end if;
+      when fp_is_zero =>                -- 11
+        if type_B = fp_is_inf then      -- 01
+          exp_p := (OTHERS => '1');
+          fra_p := (OTHERS => '1');
+        else
+          exp_p := (OTHERS => '0');
+          fra_p := (OTHERS => '0');
+        end if;
+      when others =>
+        case type_B is
+          when fp_is_NaN =>             -- 10
+            exp_p := (OTHERS => '1');
+            fra_p := (OTHERS => '1');
+          when fp_is_zero =>            -- 10
+            exp_p := (OTHERS => '0');
+            fra_p := (OTHERS => '0');
+          when others =>
+            exp_p := exp_in;
+            fra_p := fra_in;
+        end case;
+    end case;
+
+    exp_out <= exp_p ;
+    fra_out <= fra_p ;
+  end process;
+
+  check_sig : process(type_A,type_B, sig_A,sig_B)
+    variable sig_p : std_logic;
+  begin
+    if (type_A = fp_is_NaN OR type_B = fp_is_NaN ) then
+      sig_p := '0';
+    elsif ( (type_A = fp_is_inf  AND type_B = fp_is_zero) OR
+            (type_A = fp_is_zero AND type_B = fp_is_inf ) ) then
+      sig_p := '0';
+    else
+      sig_p := sig_A XOR sig_B;
+    end if;
+
+    sig_out <= sig_p;
+  end process check_sig;
+   
+end estrutural;
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- MULT_FLOAT
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+use work.p_wires.all;
+
+entity mult32float is
+  port(AB_in          : in  std_logic_vector(31 downto 0);
+       rel,rst,wt_in  : in  std_logic;
+       sela,selb,selc : in  std_logic;
+       prod           : out std_logic_vector(31 downto 0);
+       pronto,wt_out  : out std_logic);
+end mult32float;
+
+architecture estrutural of mult32float is
+
+  component special_values is
+    port (in_a,in_b         : in  std_logic_vector;
+          type_A,type_b     : out FP_type;
+          denormA,denormB   : out std_logic);
+  end component special_values;
+
+  component data_check_mult is
+    port (type_A,type_B : in  FP_type;
+          sig_A,sig_B   : in  std_logic;
+          sig_out       : out std_logic;
+          exp_in        : in  std_logic_vector;
+          fra_in        : in  std_logic_vector;
+          exp_out       : out std_logic_vector;
+          fra_out       : out std_logic_vector);
+  end component data_check_mult;
+
+
+signal vlid_stg0,wt_stg0,flag,denormA,denormB: std_logic;
+signal type_A,type_B : FP_type;
+signal expA,expB,exp_ab,exp_p,desloc : std_logic_vector(8 downto 0);
+signal in_A,in_B : std_logic_vector(31 downto 0);
+
+signal vlid_stg1,wt_stg1,sign_A,sign_B : std_logic;
+signal type_A2,type_B2 : FP_type;
+signal deloc2 : std_logic_vector(4 downto 0);
+signal exp_stg1 : std_logic_vector(7 downto 0);
+signal frac_A,frac_B : std_logic_vector(23 downto 0);
+signal frac_p : std_logic_vector(47 downto 0);
+
+signal vlid_stg2,wt_stg2,sign_A2,sign_B2,sign_f : std_logic;
+signal type_A3,type_B3 : FP_type;
+signal deloc3,deloc4 : std_logic_vector(4 downto 0);
+signal exp_stg2,exp_stg3,exp_f : std_logic_vector(7 downto 0);
+signal frac_f,mant : std_logic_vector(22 downto 0);
+signal frac_normed : std_logic_vector(23 downto 0);
+signal frac_rounded : std_logic_vector(25 downto 0);
+signal signR : std_logic_vector(47 downto 0);
+
+signal w_intra,w_pronto: std_logic;
+
+begin
+
+  -- ENTRADA
+  stg0_start: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or (selA = selB and flag = '0')) then
+        vlid_stg0 <= '0';
+        wt_stg0   <= '0';
+        flag      <= '0';
+        in_A      <= x"00000000";
+        in_B      <= x"00000000" ;
+      elsif (selA = '1' and selB = '0') then
+        vlid_stg0 <= '0';
+        wt_stg0   <= '1';
+        flag      <= '1';
+        in_A      <= AB_in;
+        in_B      <= x"00000000";
+      elsif (selA = '0' and selB = '1') then
+        vlid_stg0 <= '1';
+        wt_stg0   <= '1';
+        flag      <= '0';
+        in_A      <= in_A;
+        in_B      <= AB_in;
+      end if;
+    end if;
+  end process;
+
+  s_cases : special_values
+    port map (in_A(30 downto 0),in_B(30 downto 0),
+              type_A,type_B, denormA,denormB);
+
+  -- separa expoentes da entrada
+  expA <= '0' & in_A(30 downto 23) when denormA = '1' else b"000000001";
+  -- e agenta para a soma
+  expB <= '0' & in_B(30 downto 23) when denormB = '1' else b"000000001";
+
+  -- soma dos expoentes
+  exp_ab <=  std_logic_vector(signed(expA) + signed(expB)); -- soma com a bias
+
+  exp_p <= '0' & x"ff" when exp_ab >= b"011111111" else
+           '0' & x"00" when exp_ab <= b"001111111" else
+           std_logic_vector(signed(exp_ab) + signed'("110000001"));
+
+  desloc <= b"000000000" when exp_ab > b"001111111" else
+            std_logic_vector(signed'("010000000") - signed(exp_ab));
+
+  -- MULT
+  stg1_Mult: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or vlid_stg0 = '0') then
+        vlid_stg1   <= '0'            ;
+        wt_stg1     <= '0'            ;
+        type_A2     <= fp_is_good;
+        type_B2     <= fp_is_good;
+        sign_A      <= '0'            ;
+        sign_B      <= '0'            ;
+        deloc2      <= (OTHERS => '0');
+        exp_stg1    <= (OTHERS => '0');
+        frac_A      <= (OTHERS => '0');
+        frac_B      <= (OTHERS => '0');
+      else
+        vlid_stg1   <= '1';
+        wt_stg1     <= wt_stg0;
+        type_A2     <= type_A         ;
+        type_B2     <= type_B         ;
+        sign_A      <= in_A(31)       ;
+        sign_B      <= in_B(31)       ;
+        if (desloc < b"000011000") then
+          deloc2 <= desloc(4 downto 0) ;
+        else
+          deloc2 <= "11000";
+        end if;
+        exp_stg1    <= exp_p(7 downto 0);
+        frac_A       <= denormA & in_A(22 downto 0);
+        frac_B       <= denormB & in_B(22 downto 0);
+      end if;
+    end if;
+  end process;
+
+  frac_p <= std_logic_vector(unsigned(frac_A) * unsigned(frac_B)) ;
+
+  -- NORM e ROUND
+  stg2_round: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or vlid_stg1 = '0') then
+        vlid_stg2   <= '0'     ;
+        wt_stg2     <= '0'     ;
+        type_A3     <= fp_is_good;
+        type_B3     <= fp_is_good;
+        sign_A2     <= '0'     ;
+        sign_B2     <= '0'     ;
+        deloc3      <= (OTHERS => '0');
+        exp_stg2    <= (OTHERS => '0');
+        signR       <= (OTHERS => '0');
+      else
+        vlid_stg2   <= '1'        ;
+        wt_stg2     <= wt_stg1    ;
+        type_A3     <= type_A2    ;
+        type_B3     <= type_B2    ;
+        sign_A2     <= sign_A     ;
+        sign_B2     <= sign_B     ;
+        deloc3      <= deloc2     ;
+        exp_stg2    <= exp_stg1   ;
+        signR       <= frac_p     ;
+      end if;
+    end if;
+  end process;
+
+  frac_rounded <= std_logic_vector(unsigned('0' & signR(46 downto 22)) + 1) WHEN signR(47) = '0'
+               ELSE std_logic_vector(unsigned('0' & signR(47 downto 23)) + 1);
+
+  deloc4 <=  std_logic_vector(unsigned(deloc3) - 1) when (signR(47) = '1' or frac_rounded(25) = '1') and deloc3 > b"00000" else deloc3;
+
+  exp_stg3 <= x"00" when signR(47) = '0' and signR(46) = '0' else std_logic_vector(unsigned(exp_stg2) + 1) when (signR(47) = '1' or frac_rounded(25) = '1') and deloc3 = b"00000" else exp_stg2;
+
+  frac_normed <= b"0000" & x"00000" when exp_stg3 = x"ff"
+                 else std_logic_vector(unsigned(frac_rounded(25 downto 2)) srl to_integer(unsigned(deloc4))) when frac_rounded(25) = '1'
+                 else std_logic_vector(unsigned(frac_rounded(24 downto 1)) srl to_integer(unsigned(deloc4)));
+
+  mant <= frac_normed(22 downto 0);
+
+  finish : data_check_mult
+    port map (type_A3,type_B3,sign_A2,sign_B2,sign_f,exp_stg3,mant,exp_f,frac_f);
+
+  -- Fim do processo
+  stg3_final: process(rel)
+  begin
+    if rising_edge(rel) then
+      if ( rst = '0' or
+           (selc = '1' and  w_intra = '0' and vlid_stg2 = '0') ) then
+        w_pronto <= '0';
+        prod   <= x"00000000";
+      elsif (selc = '0' and w_pronto = '1') then
+        w_pronto <= '1';
+      elsif (vlid_stg2 = '1') then
+        w_pronto <= '1';
+        prod   <= sign_f & exp_f  & frac_f;
+      end if;
+    end if;
+  end process;
+
+  pronto <= w_pronto;
+  w_intra <= (wt_in or wt_stg0 or wt_stg1 or wt_stg2) and (not w_pronto) ;
+  wt_out <= w_intra;
+
+end estrutural;
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+
+entity data_check_sum is
+  port(type_A,type_B : in  std_logic_vector ( 1 downto 0);
+       sig_A,sig_B   : in  std_logic;
+       sig_out       : out std_logic;
+       exp_in        : in  std_logic_vector ( 7 downto 0);
+       fra_in        : in  std_logic_vector (22 downto 0);
+       exp_out       : out std_logic_vector ( 7 downto 0);
+       fra_out       : out std_logic_vector (22 downto 0));
+end data_check_sum;
+
+architecture estrutural of data_check_sum is
+
+  signal sig_p : std_logic;
+  signal exp_p : std_logic_vector ( 7 downto 0);
+  signal fra_p : std_logic_vector (22 downto 0);
+
+begin
+
+  check : process(type_A,type_B,sig_A,sig_B,exp_in,fra_in)
+  begin
+    if (type_A = "10" OR type_B = "10" ) then
+      sig_p <= '0';
+      exp_p <= (OTHERS => '1');
+      fra_p <= (OTHERS => '1');
+    elsif (type_A = "01" AND sig_A /= sig_B AND type_B = "01") then
+      sig_p <= '0';
+      exp_p <= (OTHERS => '1');
+      fra_p <= (OTHERS => '1');
+    elsif (type_A = "01") then
+      sig_p <= sig_A;
+      exp_p <= (OTHERS => '1');
+      fra_p <= (OTHERS => '0');
+    elsif (type_B = "01") then
+      sig_p <= sig_B;
+      exp_p <= (OTHERS => '1');
+      fra_p <= (OTHERS => '0');
+    elsif (type_A = "11" OR type_B = "11") then
+      sig_p <= sig_A AND sig_B;
+      exp_p <= (OTHERS => '0');
+      fra_p <= (OTHERS => '0');
+    elsif (exp_in = x"00") AND (fra_in = b"0000000000000") then
+      sig_p <= '0';
+      exp_p <= (OTHERS => '0');
+      fra_p <= (OTHERS => '0');
+    else
+      sig_p <= sig_A;
+      exp_p <= exp_in;
+      fra_p <= fra_in;
+    end if;
+  end process;
+
+  sig_out <= sig_p ;
+  exp_out <= exp_p ;
+  fra_out <= fra_p ;
+
+end estrutural;
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- SUM_FLOAT
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+
+entity sum32float is
+  port (AB_in          : in  std_logic_vector (31 downto 0);
+        rel,rst,wt_in  : in  std_logic;
+        sela,selb,selc : in  std_logic;
+        prod           : out std_logic_vector (31 downto 0);
+        pronto,wt_out  : out std_logic);
+end sum32float;
+
+architecture estrutural of sum32float is
+
+  component special_values is
+    port (in_a,in_b        : in  std_logic_vector ;
+          type_A,type_B    : out std_logic_vector ;
+          denormA,denormB  : out std_logic);
+  end component special_values;
+
+  component data_check_sum is
+    port (type_A,type_B : in  std_logic_vector;
+          sig_A,sig_B   : in  std_logic;
+          sig_out       : out std_logic;
+          exp_in        : in  std_logic_vector ;
+          fra_in        : in  std_logic_vector ;
+          exp_out       : out std_logic_vector ;
+          fra_out       : out std_logic_vector);
+  end component data_check_sum;
+
+
+signal vlid_stg0,wt_stg0,flag,denorma,denormb,sub_sum  : std_logic ;
+signal type_A,type_B : std_logic_vector( 1 downto 0);
+signal expA,expB,exp_p,deloc : std_logic_vector( 7 downto 0);
+signal st0_sign_A,st0_sign_B_un,st0_sign_B,st0_sign_B2 : std_logic_vector(25 downto 0);
+signal in_A,in_B : std_logic_vector(31 downto 0);
+
+signal vlid_stg1,wt_stg1,s_A,s_b,sin : std_logic ;
+signal type_A2,type_B2,aux           : std_logic_vector( 1 downto 0);
+signal desloc                        : std_logic_vector( 4 downto 0);
+signal exp_stg1,delocn               : std_logic_vector( 7 downto 0);
+signal signA,signB,sum_AB            : std_logic_vector(25 downto 0);
+signal sum_AB2                       : std_logic_vector(26 downto 0);
+
+signal vlid_stg2,wt_stg2,s_A2,s_b2,sign_f : std_logic ;
+signal type_A3,type_B3                    : std_logic_vector( 1 downto 0);
+signal exp_stg2,exp_f,exp_stg21,exp_aux   : std_logic_vector( 7 downto 0);
+signal sum_f,frac_f                       : std_logic_vector(22 downto 0);
+signal sum_C                              : std_logic_vector(25 downto 0);
+signal rounded_sum                        : std_logic_vector(27 downto 0);
+
+signal w_pronto,w_intra : std_logic;
+
+begin
+
+  -- ENTRADA
+  stg0_start: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or (selA = selB and flag = '0')) then
+        vlid_stg0 <= '0';
+        wt_stg0   <= '0';
+        flag      <= '0';
+        in_A      <= x"00000000";
+        in_B      <= x"00000000" ;
+      elsif (selA = '1' and selB = '0') then
+        vlid_stg0 <= '0';
+        wt_stg0   <= '1';
+        flag      <= '1';
+        in_A      <= AB_in;
+        in_B      <= x"00000000";
+      elsif (selA = '0' and selB = '1') then
+        vlid_stg0 <= '1';
+        wt_stg0   <= '1';
+        flag      <= '0';
+        if ( unsigned(in_A(30 downto 0)) >= unsigned(AB_in(30 downto 0)) ) then
+          in_A      <= in_A;
+          in_B      <= AB_in;
+        else
+          in_A      <= AB_in;
+          in_B      <= in_A;
+        end if;
+      end if;
+    end if;
+  end process;
+
+  -- s_cases : special_values
+  --   port map (in_A(30 downto 0),in_B(30 downto 0),type_a,type_b,denormA,denormB);
+
+  expA <=  in_A(30 downto 23) when denormA = '1' else x"01";
+  expB <=  in_B(30 downto 23) when denormB = '1' else x"01";
+
+  deloc <= std_logic_vector(unsigned(expA) - unsigned(expB));
+
+  exp_p <= expA;
+
+  sub_sum <= in_A(31) xor in_B(31);
+
+  st0_sign_A <= b"00" & denormA & in_A(22 downto 0);
+  st0_sign_B_un <= b"00" & denormB & in_B(22 downto 0);
+  st0_sign_B2 <= std_logic_vector(unsigned(st0_sign_B_un) srl to_integer(unsigned(deloc)));
+  st0_sign_B <= std_logic_vector(-signed(st0_sign_B2)) when sub_sum = '1' else st0_sign_B2;
+
+  -- MULT
+  stg1_Mult: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or vlid_stg0 = '0') then
+        vlid_stg1   <= '0';
+        wt_stg1     <= '0';
+        type_A2     <= (OTHERS => '0');
+        type_B2     <= (OTHERS => '0');
+        s_A         <= '0';
+        s_b         <= '0';
+        exp_stg1    <= (OTHERS => '0');
+        signA       <= (OTHERS => '0');
+        signB       <= (OTHERS => '0');
+      else
+        vlid_stg1   <= '1';
+        wt_stg1     <= wt_stg0;
+        type_A2     <= type_A;
+        type_B2     <= type_B;
+        s_A         <= in_A(31);
+        s_b         <= in_B(31);
+        exp_stg1    <= exp_p;
+        signA       <= st0_sign_A;
+        signB       <= st0_sign_B;
+      end if;
+    end if;
+  end process;
+
+  sum_AB <=  std_logic_vector(signed(signA) + signed(signB));
+  sum_ab2 <= sum_AB & '0';
+
+  deloc_sum: process(sum_AB)
+  begin
+      if (sum_AB(24) = '1') then
+        desloc <= b"00001";
+        sin <= '0';
+      elsif (sum_AB(23) = '1') then
+        desloc <= b"00000";
+        sin <= '0';
+      elsif (sum_AB(22) = '1') then
+        desloc <= b"00001";
+        sin <= '1';
+      elsif (sum_AB(21) = '1') then
+        desloc <= b"00010";
+        sin <= '1';
+      elsif (sum_AB(20) = '1') then
+        desloc <= b"00011";
+        sin <= '1';
+      elsif (sum_AB(19) = '1') then
+        desloc <= b"00100";
+        sin <= '1';
+      elsif (sum_AB(18) = '1') then
+        desloc <= b"00101";
+        sin <= '1';
+      elsif (sum_AB(17) = '1') then
+        desloc <= b"00110";
+        sin <= '1';
+      elsif (sum_AB(16) = '1') then
+        desloc <= b"00111";
+        sin <= '1';
+      elsif (sum_AB(15) = '1') then
+        desloc <= b"01000";
+        sin <= '1';
+      elsif (sum_AB(14) = '1') then
+        desloc <= b"01001";
+        sin <= '1';
+      elsif (sum_AB(13) = '1') then
+        desloc <= b"01010";
+        sin <= '1';
+      elsif (sum_AB(12) = '1') then
+        desloc <= b"01011";
+        sin <= '1';
+      elsif (sum_AB(11) = '1') then
+        desloc <= b"01100";
+        sin <= '1';
+      elsif (sum_AB(10) = '1') then
+        desloc <= b"01101";
+        sin <= '1';
+      elsif (sum_AB(9) = '1') then
+        desloc <= b"01110";
+        sin <= '1';
+      elsif (sum_AB(8) = '1') then
+        desloc <= b"01111";
+        sin <= '1';
+      elsif (sum_AB(7) = '1') then
+        desloc <= b"10000";
+        sin <= '1';
+      elsif (sum_AB(6) = '1') then
+        desloc <= b"10001";
+        sin <= '1';
+      elsif (sum_AB(5) = '1') then
+        desloc <= b"10010";
+        sin <= '1';
+      elsif (sum_AB(4) = '1') then
+        desloc <= b"10011";
+        sin <= '1';
+      elsif (sum_AB(3) = '1') then
+        desloc <= b"10100";
+        sin <= '1';
+      elsif (sum_AB(2) = '1') then
+        desloc <= b"10101";
+        sin <= '1';
+      elsif (sum_AB(1) = '1') then
+        desloc <= b"10110";
+        sin <= '1';
+      elsif (sum_AB(0) = '1') then
+        desloc <= b"10111";
+        sin <= '1';
+      else
+        desloc <= b"11000";
+        sin <= '1';
+      end if;
+  end process;
+
+  aux <= b"01" when exp_stg1 = b"11111110" and desloc = b"00001" and sin = '0' else b"10" when desloc = b"11000" and sin = '1' else "00";
+
+  delocn <=  "000" & desloc when (sin = '0') OR ( unsigned(desloc) <= (unsigned(exp_stg1) - unsigned'(b"00000001"))) else std_logic_vector(unsigned(exp_stg1) - unsigned'(b"00000001"));
+
+  -- NORM e ROUND
+  stg2_round: process(rel)
+  begin
+    if rising_edge(rel) then
+      if (rst = '0' or vlid_stg1 = '0') then
+        vlid_stg2   <= '0';
+        wt_stg2     <= '0';
+        type_A3     <= (OTHERS => '0');
+        type_B3     <= (OTHERS => '0');
+        s_A2        <= '0';
+        s_b2        <= '0';
+        exp_stg2    <= (OTHERS => '0');
+        sum_C       <= (OTHERS => '0');
+      else
+        vlid_stg2   <= '1';
+        wt_stg2     <= wt_stg1;
+        type_A3     <= type_A2;
+        type_B3     <= type_B2;
+        s_A2        <= s_A;
+        s_b2        <= s_b;
+        if (aux > b"00") then
+          exp_stg2 <= (OTHERS => aux(0));
+        elsif (unsigned(desloc) > (unsigned(exp_stg1) - unsigned'(b"00000001"))) then
+          exp_stg2 <= (OTHERS => '0');
+        elsif (sin = '0') then
+          exp_stg2 <= std_logic_vector(unsigned(exp_stg1) + unsigned(delocn));
+        else
+          exp_stg2 <= std_logic_vector(unsigned(exp_stg1) - unsigned(delocn));
+        end if;
+        if (aux > b"00") then
+          sum_C <= (OTHERS => '0');
+        elsif (sin = '0') then
+          sum_C <= std_logic_vector(unsigned(sum_AB2(25 downto 0)) srl to_integer(unsigned(delocn)));
+        else
+          sum_C <= std_logic_vector(unsigned(sum_AB2(25 downto 0)) sll to_integer(unsigned(delocn)));
+        end if;
+      end if;
+    end if;
+  end process;
+
+  rounded_sum <= std_logic_vector(unsigned( b"000" & sum_C(24 downto 0)) + unsigned'(x"0000001"));
+
+  exp_aux <= std_logic_vector(unsigned(exp_stg2) + unsigned'(b"00000001")) when rounded_sum(25) = '1' else exp_stg2;
+
+  sum_f <=  b"00000000000000000000000" when exp_stg2 = x"fe" and rounded_sum(25) = '1'
+            else rounded_sum(24 downto 2) when rounded_sum(25) = '1'
+           else rounded_sum(23 downto 1);
+
+  exp_stg21 <= x"01" when exp_stg2 = x"00" and rounded_sum(24) = '1'
+            else x"ff" when exp_stg2 = x"fe" and rounded_sum(25) = '1'
+            else exp_aux;
+
+  finish : data_check_sum
+    port map (type_A3,type_B3,s_A2,s_b2,sign_f,exp_stg21,sum_f,exp_f,frac_f);
+
+  -- Fim do processo
+  stg3_final: process(rel)
+  begin
+    if rising_edge(rel) then
+      if ( rst = '0' or (selc = '1' and  w_intra = '0' and vlid_stg2 = '0')) then
+        w_pronto <= '0';
+        prod   <= x"00000000";
+      elsif (selc = '0' and w_pronto = '1') then
+        w_pronto <= '1';
+      elsif (vlid_stg2 = '1') then
+        w_pronto <= '1';
+        prod   <= sign_f & exp_f & frac_f;
+      end if;
+    end if;
+  end process;
+
+  pronto <= w_pronto;
+  w_intra <= (wt_in or wt_stg0 or wt_stg1 or wt_stg2) and (not w_pronto) ;
+  wt_out <= w_intra;
+
+end estrutural;
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- FPU
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use work.p_wires.all;
+
+entity FPU is
+  port(rst      : in    std_logic;
+       clk      : in    std_logic;
+       sel      : in    std_logic;
+       rdy      : out   std_logic;
+       wr       : in    std_logic;
+       addr     : in    reg4;
+       data_inp : in    reg32;
+       data_out : out   reg32);
+end FPU;
+
+architecture estrutural of FPU is
+
+  component wait_states is
+    generic (NUM_WAIT_STATES :integer);
+    port(rst   : in  std_logic;
+         clk     : in  std_logic;
+         sel     : in  std_logic;         -- active in '0'
+         waiting : out std_logic);        -- active in '1'
+  end component wait_states;
+
+  component mult32float is port(
+    AB_in          : in  std_logic_vector;
+    rel,rst,wt_in  : in  std_logic ;
+    sela,selb,selc : in  std_logic ;
+    prod           : out std_logic_vector;
+    pronto,wt_out  : out std_logic);
+  end component mult32float;
+
+  component sum32float is port(
+    AB_in          : in  std_logic_vector;
+    rel,rst,wt_in  : in  std_logic ;
+    sela,selb,selc : in  std_logic ;
+    prod           : out std_logic_vector;
+    pronto,wt_out  : out std_logic);
+  end component sum32float;
+
+  --component div32float is port(
+  --AB_in          : in  std_logic_vector;
+  --rel,rst,wt_in  : in  std_logic ;
+  --sela,selb,selc : in  std_logic ;
+  --prod           : out std_logic_vector;
+  --pronto,wt_out  : out std_logic);
+  --end component div32float;
+
+  signal wt,wt0,pt0,selA_mul,selB_mul,selC_mul, wt_st0 : std_logic ;
+  signal    wt1,pt1,selA_sum,selB_sum,selC_sum : std_logic ;
+  signal    wt2,pt2,selA_div,selB_div,selC_div : std_logic ;
+  signal RES_MUL,RES_SUM,RES_DIV               : std_logic_vector(31 DOWNTO 0) ;
+
+begin
+
+  U_WAIT_ON_READS: component wait_states
+    generic map (1) port map (rst,clk,selC_mul,wt_st0);
+
+
+  U_Mult_float: mult32float
+    port map (data_inp,clk,rst,'0',selA_mul,selB_mul,selC_mul,RES_MUL,pt0,wt0);
+
+  -- U_Sum_float : sum32float
+  -- port map (data_inp,clk,rst,wt,selA_sum,selB_sum,selC_sum,RES_SUM,pt1,wt1);
+
+  -- U_Div_float : div32float
+  -- port map (data_inp,clk,rst,wt,selA_div,selB_div,selC_div,RES_DIV,pt2,wt2);
+
+
+  -- sel   wr  addr
+  --  0    0   0000   ativa selA (SW A) MUL
+  --  0    0   0001   ativa selB (SW B) MUL
+  --  0    1   0001   ativa selC (LW C) MUL
+  
+  --  0    0   0100   ativa selA (SW A) SUM
+  --  0    0   0101   ativa selB (SW B) SUM
+  --  0    1   0100   ativa selC (LW C) SUM
+  
+  --  0    0   1100   ativa selA (SW A) DIV
+  --  0    0   1101   ativa selB (SW B) DIV
+  --  0    1   110x   ativa selC (LW C) DIV
+  
+  --  1    x   xxx   *#NOP#*
+
+  selA_mul <= '1' when sel = '0' and addr = "0000" and wr = '0' else '0';
+  selB_mul <= '1' when sel = '0' and addr = "0001" and wr = '0' else '0';
+  selC_mul <= '1' when sel = '0' and addr = "0000" and wr = '1' else '0';
+
+  selA_sum <= '1' when sel = '0' and addr = "0010" and wr = '0' else '0';
+  selB_sum <= '1' when sel = '0' and addr = "0011" and wr = '0' else '0';
+  selC_sum <= '1' when sel = '0' and addr = "0010" and wr = '1' else '0';
+
+  --selA_div <= '1' when sel = '0' and addr = "0100" and wr = '0' else '0';
+  --selB_div <= '1' when sel = '0' and addr = "0101" and wr = '0' else '0';
+  --selC_div <= '1' when sel = '0' and addr = "0100" and wr = '1' else '0';
+
+  rdy <= not(wt_st0) and not((wt0 and selC_mul)); -- or (wt1 and selC_sum)); --or (wt2 and selC_div));
+
+  data_out <= RES_MUL when selC_mul = '1' else
+              RES_SUM when selC_sum = '1' else
+              (others => 'X');
+              --RES_DIV when selC_div = '1' else
+end estrutural;
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+-- fake_FPU
+-- ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+library IEEE;
+use IEEE.std_logic_1164.all;
+use work.p_wires.all;
+
+entity fake_FPU is
+  port(rst      : in    std_logic;
+       clk      : in    std_logic;
+       sel      : in    std_logic;
+       rdy      : out   std_logic;
+       wr       : in    std_logic;
+       addr     : in    std_logic_vector;
+       data_inp : in    reg32;
+       data_out : out   reg32);
+end fake_FPU;
+
+architecture estrutural of fake_FPU is
+begin
+  rdy <= '1';
+  data_out <= (others => 'X');
+end estrutural;
diff --git a/cMIPS/vhdl/io.vhd b/cMIPS/vhdl/io.vhd
index 775410eaf62b7193ba1569f122fcee57ef4327f9..2a850f32bc448309665bedb699da201d890ab098 100644
--- a/cMIPS/vhdl/io.vhd
+++ b/cMIPS/vhdl/io.vhd
@@ -855,8 +855,8 @@ begin
         waiting    <= '0';
 
       when st_na =>
-        lcd_enable <= '0';              -- disable, stop waiting
-        lcd_read   <= '1';              --  hold inp data for 40ns
+        lcd_enable <= '0';              -- disable, still waiting
+        lcd_read   <= '1';
         waiting    <= '0';
 
       when st_nb =>
diff --git a/cMIPS/vhdl/packageWires.vhd b/cMIPS/vhdl/packageWires.vhd
index f037ec07978166bf9d2fd30d1d151eb515080a76..c5189b634010c2c83635f7b8d28c036fe44e9841 100644
--- a/cMIPS/vhdl/packageWires.vhd
+++ b/cMIPS/vhdl/packageWires.vhd
@@ -55,6 +55,7 @@ package p_WIRES is
   subtype reg19 is std_logic_vector(18 downto 0);
   subtype reg20 is std_logic_vector(19 downto 0);
   subtype reg21 is std_logic_vector(20 downto 0);
+  subtype reg23 is std_logic_vector(22 downto 0);
   subtype reg24 is std_logic_vector(23 downto 0);
   subtype reg28 is std_logic_vector(27 downto 0);
   subtype reg30 is std_logic_vector(29 downto 0);
@@ -165,6 +166,10 @@ package p_WIRES is
   end record;
 
   type t_rimm_mem is array (0 to 31) of t_rimm_type;
+
+  -- type for floating point numbers: 'good' number, infinity, NaN, zero
+  type FP_type is (fp_is_good, fp_is_inf, fp_is_NaN, fp_is_zero);
+
   
   function log2_ceil(n: natural) return natural;  
   function CONVERT_BOOLEAN(b: in boolean) return std_logic;
diff --git a/cMIPS/vhdl/tb_cMIPS.vhd b/cMIPS/vhdl/tb_cMIPS.vhd
index 93ff89519885a721aa51d719114c61f57e909681..f54e3cf03898c1ebca637f68803b7e24eab7f21e 100644
--- a/cMIPS/vhdl/tb_cMIPS.vhd
+++ b/cMIPS/vhdl/tb_cMIPS.vhd
@@ -139,6 +139,28 @@ architecture TB of tb_cMIPS is
           bit_rt  : out   std_logic_vector);-- communication speed - TB only
   end component simple_uart;
 
+  component FPU is
+    port (rst      : in   std_logic;
+          clk      : in   std_logic;
+          sel      : in   std_logic;
+          rdy      : out  std_logic;
+          wr       : in   std_logic;
+          addr     : in   std_logic_vector;
+          data_inp : in   std_logic_vector;
+          data_out : out  std_logic_vector);
+  end component FPU;
+
+  component fake_FPU is
+    port (rst      : in   std_logic;
+          clk      : in   std_logic;
+          sel      : in   std_logic;
+          rdy      : out  std_logic;
+          wr       : in   std_logic;
+          addr     : in   std_logic_vector;
+          data_inp : in   std_logic_vector;
+          data_out : out  std_logic_vector);
+  end component fake_FPU;
+
   component remota is
     generic(OUTPUT_FILE_NAME : string; INPUT_FILE_NAME : string);
     port(rst, clk  : in  std_logic;
@@ -478,7 +500,7 @@ begin  -- TB
   
   cpu_i_wait <= inst_wait;
   cpu_d_wait <= data_wait and io_wait;
-  io_wait    <= io_lcd_wait;  -- and io_fpu_wait;
+  io_wait    <= io_lcd_wait and io_fpu_wait;
 
   not_waiting <= (inst_wait and data_wait); --  and io_wait);
 
@@ -594,8 +616,10 @@ begin  -- TB
   U_uart_remota: remota generic map ("serial.out","serial.inp")
     port map (rst, clk, start_remota, uart_txd, uart_rxd, bit_rt);
 
-  -- U_FPU: FPU
-  -- port map (rst,clk, io_FPU_sel, io_FPU_wait, wr, d_addr, cpu_data);
+  -- U_FPU: fake_FPU
+  U_FPU: FPU
+    port map (rst,clk, io_FPU_sel,io_FPU_wait, wr, d_addr(5 downto 2),
+              cpu_data,fpu_d_out);
 
   -- U_sys_stats: sys_stats                -- CPU reads system counters
   --   port map (cpu_reset,clk, io_sstats_sel, wr, d_addr, sstats_d_out,
@@ -700,8 +724,8 @@ use IEEE.numeric_std.all;
 use work.p_wires.all;
 use work.p_memory.all;
 
-entity io_addr_decode is              -- CPU side triggers access
-  port (clk,rst     : in  std_logic;
+entity io_addr_decode is                -- CPU side triggers access
+  port (clk,rst     : in  std_logic;    -- clk sparates back-to-back refs
         cpu_d_aVal  : in  std_logic;    -- CPU data addr valid (active=0)
         addr        : in  reg32;        -- CPU address
         dev_select  : out reg4;         -- select input to CPU
diff --git a/cMIPS/vhdl/units.vhd b/cMIPS/vhdl/units.vhd
index 73a16a7b1f6863eda0a729fa69e1268609450537..818573eb236d15206890712b6f69ea50c9064c12 100644
--- a/cMIPS/vhdl/units.vhd
+++ b/cMIPS/vhdl/units.vhd
@@ -668,8 +668,8 @@ begin
     (clk => clk, rst => rst, set => '1', D => cycle, Q => this);
 
   w_d <= this xor cycle;                -- active for ONE cycle only
-  
-  w <= not(w_d or sel);
+
+  w <= w_d and sel;
 
   waiting <= w and will_wait;