void matrixmultjulio(const int M, const int N, const int P,
                     const double* A, const int lda,
                     const double* B, const int ldb,
                     double* C, const int ldc ) {
    
    int i, j, k, tmp;

    // i -> linha do C e do A
    // j -> coluna do A e linha do B
    // k -> coluna do C e do B
    #pragma omp parallel for firstprivate(A,B) private(i,j,k)
    for (i=0; i<N; i++) {
        for (j=0; j<M; j++) {
            for (k=0; k<P; k++) {
                *(C+(i*N+k)) += (*(A+(i*N+k)) *  *(B+(j*P+k)));
            }
        }
    }
}