From 5bd4dd3964c798ae37f94d77a42d15b44546ff11 Mon Sep 17 00:00:00 2001 From: Armando Luiz Nicolini Delgado <nicolui@inf.ufpr.br> Date: Mon, 21 Jun 2021 14:47:58 -0300 Subject: [PATCH] =?UTF-8?q?Novos=20arquivos=20para=20exerc=C3=ADcio.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- HP_EliteDesk-Lab3.txt | 117 ++++++++++++++++++++ Makefile | 44 ++++++++ README.md | 4 + matmult.c | 89 +++++++++++++++ matriz.c | 246 ++++++++++++++++++++++++++++++++++++++++++ matriz.h | 35 ++++++ perfctr | 12 +++ 7 files changed, 547 insertions(+) create mode 100644 HP_EliteDesk-Lab3.txt create mode 100644 Makefile create mode 100644 matmult.c create mode 100644 matriz.c create mode 100644 matriz.h create mode 100755 perfctr diff --git a/HP_EliteDesk-Lab3.txt b/HP_EliteDesk-Lab3.txt new file mode 100644 index 0000000..cfc0149 --- /dev/null +++ b/HP_EliteDesk-Lab3.txt @@ -0,0 +1,117 @@ +-------------------------------------------------------------------------------- +CPU name: Intel(R) Core(TM) i5-7500 CPU @ 3.40GHz +CPU type: Intel Coffeelake processor +CPU clock: 3.41 GHz +CPU family: 6 +CPU model: 158 +CPU short: skylake +CPU stepping: 9 +CPU features: +-------------------------------------------------------------------------------- + Group name Description +-------------------------------------------------------------------------------- + ICACHE Instruction cache miss rate/ratio + PORT_USAGE Execution port utilization + FLOPS_SP Single Precision MFLOP/s + TLB_INSTR L1 Instruction TLB miss rate/ratio + TLB_DATA L2 data TLB miss rate/ratio + UOPS_ISSUE UOPs issueing + UOPS_RETIRE UOPs retirement + FLOPS_DP Double Precision MFLOP/s + TMA Top down cycle allocation + L2CACHE L2 cache miss rate/ratio +CYCLE_ACTIVITY Cycle Activities + CLOCK Power and Energy consumption + FLOPS_AVX Packed AVX MFLOP/s + L3 L3 cache bandwidth in MBytes/s + FALSE_SHARE False sharing + DIVIDE Divide unit information + CYCLE_STALLS Cycle Activities (Stalls) + UOPS_EXEC UOPs execution + L3CACHE L3 cache miss rate/ratio + L2 L2 cache bandwidth in MBytes/s + DATA Load to store ratio + RECOVERY Recovery duration + BRANCH Branch prediction miss rate/ratio + UOPS UOPs execution info + ENERGY Power and Energy consumption + +******************************************************************************** +Hardware Thread Topology +******************************************************************************** +Sockets: 1 +Cores per socket: 4 +Threads per core: 1 +-------------------------------------------------------------------------------- +HWThread Thread Core Socket Available +0 0 0 0 * +1 0 1 0 * +2 0 2 0 * +3 0 3 0 * +-------------------------------------------------------------------------------- +Socket 0: ( 0 1 2 3 ) +-------------------------------------------------------------------------------- +******************************************************************************** +Cache Topology +******************************************************************************** +Level: 1 +Size: 32 kB +Type: Data cache +Associativity: 8 +Number of sets: 64 +Cache line size: 64 +Cache type: Non Inclusive +Shared by threads: 1 +Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 ) +-------------------------------------------------------------------------------- +Level: 2 +Size: 256 kB +Type: Unified cache +Associativity: 4 +Number of sets: 1024 +Cache line size: 64 +Cache type: Non Inclusive +Shared by threads: 1 +Cache groups: ( 0 ) ( 1 ) ( 2 ) ( 3 ) +-------------------------------------------------------------------------------- +Level: 3 +Size: 6 MB +Type: Unified cache +Associativity: 12 +Number of sets: 8192 +Cache line size: 64 +Cache type: Inclusive +Shared by threads: 4 +Cache groups: ( 0 1 2 3 ) +-------------------------------------------------------------------------------- +******************************************************************************** +NUMA Topology +******************************************************************************** +NUMA domains: 1 +-------------------------------------------------------------------------------- +Domain: 0 +Processors: ( 0 1 2 3 ) +Distances: 10 +Free memory: 3774.68 MB +Total memory: 7858 MB +-------------------------------------------------------------------------------- + + +******************************************************************************** +Graphical Topology +******************************************************************************** +Socket 0: ++---------------------------------------------+ +| +--------+ +--------+ +--------+ +--------+ | +| | 0 | | 1 | | 2 | | 3 | | +| +--------+ +--------+ +--------+ +--------+ | +| +--------+ +--------+ +--------+ +--------+ | +| | 32 kB | | 32 kB | | 32 kB | | 32 kB | | +| +--------+ +--------+ +--------+ +--------+ | +| +--------+ +--------+ +--------+ +--------+ | +| | 256 kB | | 256 kB | | 256 kB | | 256 kB | | +| +--------+ +--------+ +--------+ +--------+ | +| +-----------------------------------------+ | +| | 6 MB | | +| +-----------------------------------------+ | ++---------------------------------------------+ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e2d1543 --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ + PROG = matmult + MODULOS = matriz + + CC = gcc -std=c11 -g + OBJS = $(addsuffix .o,$(MODULOS)) + + LIKWID = /home/soft/likwid + LIKWID_FLAGS = -I$(LIKWID)/include + LIKWID_LIBS = -L$(LIKWID)/lib + + AVX_FLAGS = -march=native -mavx -O3 -falign-functions=32 -falign-loops=32 -fstrict-aliasing + AVX_LOG_FLAGS = -fopt-info-vec -fopt-info-vec-missed + + CFLAGS = $(LIKWID_FLAGS) + LFLAGS = $(LIKWID_LIBS) -lm + +.PHONY: all clean limpa purge faxina distclean debug avx likwid + +%.o: %.c %.h + $(CC) $(CFLAGS) -c $< + +all: $(PROG) + +debug: CFLAGS += -DDEBUG + +avx: CFLAGS += $(AVX_FLAGS) $(AVX_LOG_FLAGS) +avx likwid: CFLAGS += -DLIKWID_PERFMON +avx likwid: LFLAGS += -llikwid + +likwid avx debug: $(PROG) + +$(PROG): $(PROG).o + +$(PROG): $(OBJS) + $(CC) $(CFLAGS) -o $@ $^ $(LFLAGS) + +clean: + @echo "Limpando ...." + @rm -f *~ *.bak *.tmp + +purge distclean: clean + @echo "Faxina ...." + @rm -f $(PROG) *.o core a.out + @rm -f *.png marker.out *.log diff --git a/README.md b/README.md index f418fbe..ebbf64e 100644 --- a/README.md +++ b/README.md @@ -1 +1,5 @@ O enunciado do exercício está <A HREF="https://moodle.c3sl.ufpr.br/mod/assign/view.php?id=24939">aqui</a> +<BR> +<BR> +O arquivo <B>perfctr</B> é um <I>script</I> shell para facilitar o uso de <I>likwid-perfctr</I>. +<BR><BR> diff --git a/matmult.c b/matmult.c new file mode 100644 index 0000000..91595cc --- /dev/null +++ b/matmult.c @@ -0,0 +1,89 @@ +#include <stdio.h> +#include <stdlib.h> /* exit, malloc, calloc, etc. */ +#include <string.h> +#include <getopt.h> /* getopt */ + +#include <likwid.h> + +#include "matriz.h" + +/** + * Exibe mensagem de erro indicando forma de uso do programa e termina + * o programa. + */ + +static void usage(char *progname) +{ + fprintf(stderr, "Forma de uso: %s [ -n <ordem> ] \n", progname); + exit(1); +} + + + +/** + * Programa principal + * Forma de uso: matmult [ -n <ordem> ] + * -n <ordem>: ordem da matriz quadrada e dos vetores + * + */ + +int main (int argc, char *argv[]) +{ + int c, n=DEF_SIZE; + double norma; + + MatPtr mPtr; + MatRow mRow; + Vetor vet, resPtr, resRow, resCol; + + /* =============== TRATAMENTO DE LINHA DE COMAANDO =============== */ + + char *opts = "n:"; + c = getopt (argc, argv, opts); + + while ( c != -1 ) { + switch (c) { + case 'n': n = atoi(optarg); break; + default: usage(argv[0]); + } + + c = getopt (argc, argv, opts); + } + + /* ================ FIM DO TRATAMENTO DE LINHA DE COMANDO ========= */ + + resPtr = (double *) malloc (n * sizeof(double)); + resRow = (double *) malloc (n * sizeof(double)); + + srand(20191); + + mPtr = geraMatPtr (n, n); + mRow = geraMatRow (n, n); + vet = geraVetor (n); + +#ifdef DEBUG + prnMatPtr (mPtr, n, n); + prnMatRow (mRow, n, n); + prnVetor (vet, n); + printf ("=================================\n\n"); +#endif /* DEBUG */ + + multMatPtrVet (mPtr, vet, n, n, resPtr); + + multMatRowVet (mRow, vet, n, n, resRow); + +#ifdef DEBUG + prnVetor (resPtr, n); + prnVetor (resRow, n); +#endif /* DEBUG */ + + liberaMatPtr (mPtr, n); + liberaVetor ((void*)mRow); + liberaVetor ((void*)vet); + + free(resRow); + free(resPtr); + + return 0; +} + diff --git a/matriz.c b/matriz.c new file mode 100644 index 0000000..8f7508a --- /dev/null +++ b/matriz.c @@ -0,0 +1,246 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> + +#include "matriz.h" + +/** + * Função que gera valores para para ser usado em uma matriz + * @param i,j coordenadas do elemento a ser calculado (0<=i,j<n) +* @return valor gerado para a posição i,j + */ +static inline double generateRandomA( unsigned int i, unsigned int j) +{ + static double invRandMax = 1.0 / (double)RAND_MAX; + return ( (i==j) ? (double)(BASE<<1) : 1.0 ) * (double)rand() * invRandMax; +} + +/** + * Função que gera valores aleatórios para ser usado em um vetor + * @return valor gerado + * + */ +static inline double generateRandomB( ) +{ + static double invRandMax = 1.0 / (double)RAND_MAX; + return (double)(BASE<<2) * (double)rand() * invRandMax; +} + + + +/* ----------- FUNÇÕES ---------------- */ + +/** + * Funcao geraMatPtr: gera matriz como vetor de ponteiros para as suas linhas + * + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * @return ponteiro para a matriz gerada + * + */ + +MatPtr geraMatPtr (int m, int n) +{ + MatPtr matriz = (double **) malloc(m*sizeof(double)); + + if (matriz) { + for (int i=0; i < m; ++i) { + if (matriz[i] = (double *) malloc(n*sizeof(double))) + for (int j=0; matriz[i] && j < n; ++j) + matriz[i][j] = generateRandomA(i, j); + else + return NULL; + } + } + + return (matriz); +} + +/** + * \brief: libera matriz alocada como vetor de ponteiros para as suas linhas + * + * @param ponteiro para matriz + * + */ +void liberaMatPtr (MatPtr matriz, int m) +{ + if (matriz) { + for (int i=0; i < m; ++i) { + free (matriz[i]); + } + free (matriz); + } +} + + + +/** + * Funcao geraMatRow: gera matriz como vetor único, 'row-oriented' + * + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * @return ponteiro para a matriz gerada + * + */ + +MatRow geraMatRow (int m, int n) +{ + MatRow matriz = (double *) malloc(m*n*sizeof(double)); + + if (matriz) { + for (int i=0; i < m; ++i) { + for (int j=0; j < n; ++j) + matriz[i*m + j] = generateRandomA(i, j); + } + } + + return (matriz); +} + + +/** + * Funcao geraVetor: gera vetor de tamanho 'n' + * + * @param n número de elementos do vetor + * @return ponteiro para vetor gerado + * + */ + +Vetor geraVetor (int n) +{ + Vetor vetor = (double *) malloc(n*sizeof(double)); + + if (vetor) + for (int i=0; i < n; ++i) + vetor[i] = generateRandomB(); + + return (vetor); +} + +/** + * \brief: libera vetor + * + * @param ponteiro para vetor + * + */ +void liberaVetor (void *vet) +{ + free(vet); +} + + +/** + * Funcao multMatPtrVet: Efetua multiplicacao entre matriz 'mxn' por vetor + * de 'n' elementos + * @param mat matriz 'mxn' + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * @param res vetor que guarda o resultado. Deve estar previamente alocado e com + * seus elementos inicializados em 0.0 (zero) + * @return vetor de 'm' elementos + * + */ + +void multMatPtrVet (MatPtr mat, Vetor v, int m, int n, Vetor res) +{ + + /* Efetua a multiplicação */ + if (res) { + for (int i=0; i < m; ++i) + for (int j=0; j < n; ++j) + res[i] += mat[i][j] * v[j]; + } +} + +/** + * Funcao prnMatPtr: Imprime o conteudo de uma matriz em stdout + * @param mat matriz + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * + */ + +void prnMatPtr (MatPtr mat, int m, int n) +{ + for (int i=0; i < m; ++i) { + for (int j=0; j < n; ++j) + printf(DBL_FIELD, mat[i][j]); + printf("\n"); + } + printf(SEP_RES); +} + +/** + * Funcao multMatRowVet: Efetua multiplicacao entre matriz 'mxn' por vetor + * de 'n' elementos + * @param mat matriz 'mxn' + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * @param res vetor que guarda o resultado. Deve estar previamente alocado e com + * seus elementos inicializados em 0.0 (zero) + * @return vetor de 'm' elementos + * + */ + +void multMatRowVet (MatRow mat, Vetor v, int m, int n, Vetor res) +{ + + /* Efetua a multiplicação */ + if (res) { + for (int i=0; i < m; ++i) + for (int j=0; j < n; ++j) + res[i] += mat[m*i + j] * v[j]; + } +} + +/** + * Funcao prnMatRow: Imprime o conteudo de uma matriz em stdout + * @param mat matriz + * @param m número de linhas da matriz + * @param n número de colunas da matriz + * + */ + +void prnMatRow (MatRow mat, int m, int n) +{ + for (int i=0; i < m; ++i) { + for (int j=0; j < n; ++j) + printf(DBL_FIELD, mat[m*i + j]); + printf("\n"); + } + printf(SEP_RES); +} + +/** + * Funcao prodEscalar: Calcula o produto escalar entre 2 vetores + * @param v1 vetor com 'n' elementos + * @param v2 vetor com 'n' elementos + * @param n número de elementos dos vetores + * @return Valor do produto escalar + */ + +double prodEscalar (Vetor v1, Vetor v2, int n) +{ + double prod = 0.0; + + for (int i=0; i < n; ++i) + prod += v1[i]*v2[i]; + + return prod; +} + + +/** + * Funcao prnVetor: Imprime o conteudo de vetor em stdout + * @param vet vetor com 'n' elementos + * @param n número de elementos do vetor + * + */ + +void prnVetor (Vetor vet, int n) +{ + for (int i=0; i < n; ++i) + printf(DBL_FIELD, vet[i]); + printf(SEP_RES); +} + diff --git a/matriz.h b/matriz.h new file mode 100644 index 0000000..11a2025 --- /dev/null +++ b/matriz.h @@ -0,0 +1,35 @@ +/* Constantes */ + +#define DBL_FIELD "%12.7lg" +#define SEP_RES "\n\n\n" + +#define DEF_SIZE 128 +#define BASE 32 + + +#define ABS(num) ((num) < 0.0 ? -(num) : (num)) + +/* Implementações para matrizes e vetores */ + +typedef double ** MatPtr; +typedef double * MatRow; +typedef double * Vetor; + +/* ----------- FUNÇÕES ---------------- */ + +MatPtr geraMatPtr (int m, int n); +MatRow geraMatRow (int m, int n); +Vetor geraVetor (int n); + +void liberaMatPtr (MatPtr mPtr, int n); +void liberaVetor (void *vet); + +void multMatPtrVet (MatPtr mat, Vetor v, int m, int n, Vetor res); +void prnMatPtr (MatPtr mat, int m, int n); + +void multMatRowVet (MatRow mat, Vetor v, int m, int n, Vetor res); +void prnMatRow (MatRow mat, int m, int n); + +double prodEscalar (Vetor v1, Vetor v2, int n); +void prnVetor (Vetor vet, int n); + diff --git a/perfctr b/perfctr new file mode 100755 index 0000000..24db0a1 --- /dev/null +++ b/perfctr @@ -0,0 +1,12 @@ +#!/bin/bash + +LIKWID_CMD="likwid-perfctr -C $1 -g $2 -m" +shift 2 + +${LIKWID_CMD} $* + +# Para obter lista de grupos de indicadores de performance: +# likwid-perfctr -a + +# Para obter topologia dos processadores +# likwid-topology -c -g -- GitLab