diff --git a/Makefile b/Makefile index 04f1bf242510518b2ddd31e8c6ed84722394d66c..ca093cc9255b8db30b39637a4f7f1242305416ab 100644 --- a/Makefile +++ b/Makefile @@ -1,24 +1,33 @@ - # Diretorio base onde estarão os diretórios de biblioteca - PREFIX = ./ + FONTES=$(wildcard *.c) + OBJECTS=$(FONTES:.c=.o) - # Arquivo final + # Arquivo final FILE = pdeSolver CC = gcc - AR = ar -rcu - INSTALL = install - #OPTIMIZATION = -O2 - OPTIMIZATION = -O3 -mavx -march=native -.PHONY: clean install all + #FLAGS = -DLIKWID_PERFMON -O3 -mavx -march=native -lm + # Sem Likwid + #FLAGS = -O3 -lm -march=native + + # com Likwid + FLAGS = -DLIKWID_PERFMON -O3 -lm -mavx -march=native + + INCLUDE = -I/home/soft/likwid/include + LIKWID = -llikwid -L/usr/lib -L/home/soft/likwid/lib -DLIKWID_PERFMON + +.PHONY: all clean %.o: %.c - $(CC) -c $(CFLAGS) $< + $(CC) -c -o $< -I/home/soft/likwid/include $(FLAGS) + +all: pdeSolver -all: install $(FILE).o +pdeSolver: $(OBJECTS) + $(CC) -o $@ $^ $(LIKWID) $(FLAGS) -$(FILE).o: $(FILE).c - $(CC) -static -o $(PREFIX)$(FILE) $(OPTIMIZATION) $(FILE).c -lm +%.o: %.c + $(CC) -c $< -o $@ $(INCLUDE) $(FLAGS) clean: @rm -f *% *.bak *~ *.o $(FILE) core *.swp diff --git a/TestesVersao1 b/TestesVersao1 new file mode 100644 index 0000000000000000000000000000000000000000..ea12f1261054304b8213d5e49aa6c9a2631fab08 --- /dev/null +++ b/TestesVersao1 @@ -0,0 +1,289 @@ +10 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph + +Result: +SOR: + Time: 0.000297 + Memory Bandwidth: 1.619730e+04 + Data Cache Miss Ratio: 1.719345e-02 + DP MFlOP/s: 3.279365e+02 +Residuo: + Time: 0.000295 + Memory Bandwidth: 1.779971e+04 + Data Cache Miss Ratio: 1.687931e-02 + DP MFlOP/s: 3.416050e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +100 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph + +Result: +SOR: + Time:0.002310 + Memory Bandwidth: 3.829102e+02 + Data Cache Miss Ratio: 1.194167e-03 + DP MFlOP/s:8.087655e+02 +Residuo: + Time:0.001934 + Memory Bandwidth: 3.768171e+02 + Data Cache Miss Ratio: 1.268701e-03 + DP MFlOP/s:9.656009e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +127 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph + +Result: +SOR: + Time:0.003715 + Memory Bandwidth:3.978275e+02 + Data Cache Miss Ratio:1.093879e-03 + DP MFlOP/s:8.948557e+02 +Residuo: + Time:0.003342 + Memory Bandwidth:4.362378e+02 + Data Cache Miss Ratio:1.098131e-03 + DP MFlOP/s:9.917492e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +128 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph + +Result: +SOR: + Time:0.003616 + Memory Bandwidth:4.212521e+02 + Data Cache Miss Ratio:9.156894e-04 + DP MFlOP/s:8.418422e+02 +Residuo: + Time:0.003246 + Memory Bandwidth:4.360278e+02 + Data Cache Miss Ratio:1.002241e-03 + DP MFlOP/s:9.337266e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +200 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph + +Result: +SOR: + Time:0.008326 + Memory Bandwidth:4.108838e+02 + Data Cache Miss Ratio:7.725517e-04 + DP MFlOP/s:8.882177e+02 +Residuo: + Time:0.007583 + Memory Bandwidth:3.810756e+02 + Data Cache Miss Ratio:7.828366e-04 + DP MFlOP/s:9.707222e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +255 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph + +Result: +SOR: + Time:0.013354 + Memory Bandwidth:3.862520e+02 + Data Cache Miss Ratio:1.253632e-03 + DP MFlOP/s:8.984721e+02 +Residuo: + Time:0.012168 + Memory Bandwidth:3.874120e+02 + Data Cache Miss Ratio:1.220100e-03 + DP MFlOP/s:9.810839e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +256 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph + +Result: +SOR: + Time:0.013252 + Memory Bandwidth:3.873149e+02 + Data Cache Miss Ratio:1.332275e-03 + DP MFlOP/s:9.176340e+02 +Residuo: + Time:0.012253 + Memory Bandwidth:3.796060e+02 + Data Cache Miss Ratio:1.245257e-03 + DP MFlOP/s:9.873832e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +500 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph + +Result: +SOR: + Time:0.049730 + Memory Bandwidth:4.638628e+02 + Data Cache Miss Ratio:1.155280e-03 + DP MFlOP/s:9.275850e+02 +Residuo: + Time:0.046418 + Memory Bandwidth:4.763581e+02 + Data Cache Miss Ratio:1.140699e-03 + DP MFlOP/s:9.883993e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +511 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph + +Result: +SOR: + Time:0.051865 + Memory Bandwidth:5.385358e+02 + Data Cache Miss Ratio:1.986409e-03 + DP MFlOP/s:9.292904e+02 +Residuo: + Time:0.048035 + Memory Bandwidth:3.882145e+02 + Data Cache Miss Ratio:1.932972e-03 + DP MFlOP/s:9.980287e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +512 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph + +Result: +SOR: + Time:0.053218 + Memory Bandwidth:5.314457e+02 + Data Cache Miss Ratio:3.108666e-03 + DP MFlOP/s:9.099833e+02 +Residuo: + Time:0.049579 + Memory Bandwidth:4.019147e+02 + Data Cache Miss Ratio:3.231441e-03 + DP MFlOP/s:9.715378e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +1000 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph + +Result: +SOR: + Time:0.195197 + Memory Bandwidth:4.412162e+02 + Data Cache Miss Ratio:1.953739e-03 + DP MFlOP/s:9.427951e+02 +Residuo: + Time:0.180556 + Memory Bandwidth:4.341473e+02 + Data Cache Miss Ratio:1.967067e-03 + DP MFlOP/s:1.013719e+03 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +1023 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph + +Result: +SOR: + Time:0.229308 + Memory Bandwidth:4.315215e+02 + Data Cache Miss Ratio:2.595456e-03 + DP MFlOP/s:8.391381e+02 +Residuo: + Time:0.217192 + Memory Bandwidth:4.462077e+02 + Data Cache Miss Ratio:2.582323e-03 + DP MFlOP/s:8.811371e+02 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +1024 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph + +Result: +SOR: + Time:0.205620 + Memory Bandwidth:5.651425e+02 + Data Cache Miss Ratio:9.227765e-04 + DP MFlOP/s:9.388747e+02 +Residuo: + Time:0.190238 + Memory Bandwidth:6.757122e+02 + Data Cache Miss Ratio:2.553477e-03 + DP MFlOP/s:1.009309e+03 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +2000 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph + +Result: +SOR: + Time:0.772440 + Memory Bandwidth:4.496510e+02 + Data Cache Miss Ratio:1.885395e-03 + DP MFlOP/s:9.524161e+02 +Residuo: + Time:0.716187 + Memory Bandwidth:4.366796e+02 + Data Cache Miss Ratio:1.921168e-03 + DP MFlOP/s:1.021636e+03 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +2047 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph + +Result: +SOR: + Time:0.809721 + Memory Bandwidth:4.516483e+02 + Data Cache Miss Ratio:2.487427e-03 + DP MFlOP/s:9.510150e+02 +Residuo: + Time:0.749794 + Memory Bandwidth:4.481116e+02 + Data Cache Miss Ratio:2.480431e-03 + DP MFlOP/s:1.022546e+03 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +2048 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph + +Result: +SOR: + Time:0.813544 + Memory Bandwidth:4.636085e+02 + Data Cache Miss Ratio:2.634287e-03 + DP MFlOP/s:9.492556e+02 +Residuo: + Time:0.752515 + Memory Bandwidth:4.552954e+02 + Data Cache Miss Ratio:2.668889e-03 + DP MFlOP/s:1.018746e+03 + +++++++++++++++++++++++++++++++++++++++++++++++++++++ +*/ \ No newline at end of file diff --git a/TestesVersao2 b/TestesVersao2 new file mode 100644 index 0000000000000000000000000000000000000000..f5e729f31904ff6de0e8b3c2a5108c94cd09ea61 --- /dev/null +++ b/TestesVersao2 @@ -0,0 +1,345 @@ +10 Pontos +========= +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph + +Result: +SOR + Time: 0.000140 + Memory Bandwidth: 1.007957e+04 + Data Cache Miss Ratio: 9.412551e-02 + DP MFLOP/s: 1.447379e+02 + +Residue + Time: 0.000132 + Memory Bandwidth: 2.295315e+04 + Data Cache Miss Ratio: 9.167250e-02 + DP MFLOP/s: 2.008486e+02 + + + +100 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph + +Result: +SOR + Time: 0.000202 + Memory Bandwidth: 1.240943e+04 + Data Cache Miss Ratio: 2.432181e-02 + DP MFLOP/s: 3.411563e+02 + +Residue + Time: 0.000097 + Memory Bandwidth: 2.320394e+04 + Data Cache Miss Ratio: 4.118495e-02 + DP MFLOP/s: 9.004255e+02 + + + +127 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph + + +Result: +SOR + Time: 0.000177 + Memory Bandwidth: 4.497731e+02 + Data Cache Miss Ratio: 2.370301e-02 + DP MFLOP/s: 3.220837e+02 + +Residue + Time: 0.000078 + Memory Bandwidth: 7.438936e+02 + Data Cache Miss Ratio: 4.031123e-02 + DP MFLOP/s: 8.503135e+02 + + + + +128 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph + +Result: +SOR + Time: 0.000194 + Memory Bandwidth: 9.235842e+02 + Data Cache Miss Ratio: 2.360449e-02 + DP MFLOP/s: 5.199345e+02 + +Residue + Time: 0.000077 + Memory Bandwidth: 1.617785e+03 + Data Cache Miss Ratio: 4.227584e-02 + DP MFLOP/s: 1.377613e+03 + + + +200 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph + +Result: +SOR + Time: 0.000355 + Memory Bandwidth: 7.799725e+02 + Data Cache Miss Ratio: 2.346966e-02 + DP MFLOP/s: 5.105652e+02 + +Residue + Time: 0.000114 + Memory Bandwidth: 1.084110e+03 + Data Cache Miss Ratio: 3.915971e-02 + DP MFLOP/s: 1.335525e+03 + + + +255 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph + +Result: +SOR + Time: 0.000566 + Memory Bandwidth: 6.773897e+03 + Data Cache Miss Ratio: 3.048646e-02 + DP MFLOP/s: 4.984077e+02 + +Residue + Time: 0.000159 + Memory Bandwidth: 7.940529e+03 + Data Cache Miss Ratio: 4.035269e-02 + DP MFLOP/s: 1.391218e+03 + + + +256 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph + +Result: +SOR + Time: 0.000568 + Memory Bandwidth: 6.212922e+03 + Data Cache Miss Ratio: 2.831647e-02 + DP MFLOP/s: 5.008122e+02 + +Residue + Time: 0.000157 + Memory Bandwidth: 7.468366e+03 + Data Cache Miss Ratio: 4.404526e-02 + DP MFLOP/s: 1.379471e+03 + + + +500 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph + +Result: +SOR + Time: 0.001982 + Memory Bandwidth: 5.060868e+02 + Data Cache Miss Ratio: 2.411791e-02 + DP MFLOP/s: 5.190291e+02 + +Residue + Time: 0.000476 + Memory Bandwidth: 4.136519e+02 + Data Cache Miss Ratio: 4.232655e-02 + DP MFLOP/s: 1.417728e+03 + + + + +511 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph + +Result: +SOR + Time: 0.003423 + Memory Bandwidth: 3.910980e+02 + Data Cache Miss Ratio: 1.249207e-01 + DP MFLOP/s: 3.023504e+02 + +Residue + Time: 0.000574 + Memory Bandwidth: 3.890882e+02 + Data Cache Miss Ratio: 7.374434e-02 + DP MFLOP/s: 1.148963e+03 + + + + +512 Pontos +========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph + +Result: +SOR + Time: 0.004417 + Memory Bandwidth: 3.975009e+02 + Data Cache Miss Ratio: 2.092028e-01 + DP MFLOP/s: 2.367900e+02 + +Residue + Time: 0.000598 + Memory Bandwidth: 3.981432e+02 + Data Cache Miss Ratio: 8.294147e-02 + DP MFLOP/s: 1.144478e+03 + + + + +1000 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph + +Result: +SOR + Time: 0.008146 + Memory Bandwidth: 3.334609e+03 + Data Cache Miss Ratio: 2.950550e-02 + DP MFLOP/s: 4.909609e+02 + +Residue + Time: 0.003774 + Memory Bandwidth: 6.536244e+03 + Data Cache Miss Ratio: 9.187801e-02 + DP MFLOP/s: 6.335770e+02 + + + + +1023 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph + +Result: +SOR + Time: 0.013825 + Memory Bandwidth: 2.254284e+03 + Data Cache Miss Ratio: 1.237276e-01 + DP MFLOP/s: 3.046950e+02 + +Residue + Time: 0.003997 + Memory Bandwidth: 6.419650e+03 + Data Cache Miss Ratio: 9.549587e-02 + DP MFLOP/s: 6.204512e+02 + + + + +1024 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph + +Result: +SOR + Time: 0.017605 + Memory Bandwidth: 1.839885e+03 + Data Cache Miss Ratio: 1.869319e-01 + DP MFLOP/s: 2.385549e+02 + +Residue + Time: 0.004082 + Memory Bandwidth: 6.200531e+03 + Data Cache Miss Ratio: 9.727541e-02 + DP MFLOP/s: 6.099872e+02 + + + + +2000 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph + +Result: +SOR + Time: 0.032355 + Memory Bandwidth: 3.748434e+03 + Data Cache Miss Ratio: 2.925384e-02 + DP MFLOP/s: 4.933160e+02 + +Residue + Time: 0.011399 + Memory Bandwidth: 6.716217e+03 + Data Cache Miss Ratio: 9.262099e-02 + DP MFLOP/s: 8.375121e+02 + + + + +2047 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph + +Result: +SOR + Time: 0.055417 + Memory Bandwidth: 2.461350e+03 + Data Cache Miss Ratio: 1.241280e-01 + DP MFLOP/s: 3.025465e+02 + +Residue + Time: 0.012243 + Memory Bandwidth: 6.990283e+03 + Data Cache Miss Ratio: 9.543078e-02 + DP MFLOP/s: 8.052487e+02 + + + + +2048 Pontos +=========== +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph + +Result: +SOR + Time: 0.069957 + Memory Bandwidth: 1.995682e+03 + Data Cache Miss Ratio: 1.869591e-01 + DP MFLOP/s: 2.398127e+02 + +Residue + Time: 0.012756 + Memory Bandwidth: 6.563676e+03 + Data Cache Miss Ratio: 9.750622e-02 + DP MFLOP/s: 7.734637e+02 + + diff --git a/borderTest.c b/borderTest.c deleted file mode 100644 index 6feb19c0401d816c0917b72d451b5e44726f4a74..0000000000000000000000000000000000000000 --- a/borderTest.c +++ /dev/null @@ -1,67 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include <math.h> - -#define N 100000 - -double timestamp(void) { - struct timeval tp; - gettimeofday(&tp, NULL); - return((double)(tp.tv_sec + tp.tv_usec/1000000.0)); -} - -int main() { - int i, nx, ny, count, alpha; - double beta, gama, sigma, hx, *x, used, begin; - - nx = 100; - ny = 100; - x = malloc(nx * ny * sizeof(double)); - hx = 0.01; - - used = 0.0f; - - // Versão anterior, sem armazenar cálculos repetidos em variáveis (exceto sigma) - - for(count=0; count<N; ++count) { - begin = timestamp(); - - for(i = nx; i < nx * ny - nx; ++i) { - x[i] = 0.0f; - } - - for(i=0; i<nx; ++i) { - x[i] = sin(2 * M_PI * (M_PI - (i * hx))) * sigma; - x[nx*ny-nx+i] = sin(2 * M_PI * (i * hx)) * sigma; - } - - used += timestamp() - begin; - } - - // Versão atual, armazenando. -/* - for(count=0; count<N; ++count) { - begin = timestamp(); - - sigma = sinh(M_PI * M_PI); - alpha = nx * ny - nx; - beta = 2 * M_PI * hx; - gama = 2 * M_PI * M_PI; - - for(i = nx; i < alpha; ++i) { - x[i] = 0.0f; - } - - for(i=0; i<nx; ++i) { - x[i] = sin(gama - (i * hx)) * sigma; - x[alpha+i] = sin(beta * i) * sigma; - } - - used += timestamp() - begin; - } -*/ - // Deixe uma das duas versões comentadas e teste a outra. - printf("Time used: %.15lf\n", used/N); - - return 1; -} diff --git a/cacheres1.dat b/cacheres1.dat new file mode 100644 index 0000000000000000000000000000000000000000..bef8d72f61b6f682271a2fa9dcee840b18b6f8a3 --- /dev/null +++ b/cacheres1.dat @@ -0,0 +1,16 @@ +10 1.687931e-02 +100 1.268701e-03 +127 1.098131e-03 +128 1.002241e-03 +200 7.828366e-04 +255 1.220100e-03 +256 1.245257e-03 +500 1.140699e-03 +511 1.932972e-03 +512 3.231441e-03 +1000 1.967067e-03 +1023 2.582323e-03 +1024 2.553477e-03 +2000 1.921168e-03 +2047 2.480431e-03 +2048 2.668889e-03 \ No newline at end of file diff --git a/cacheres2.dat b/cacheres2.dat new file mode 100644 index 0000000000000000000000000000000000000000..2ad3f0a714ce618d039d996cb23d4e23d2a367d5 --- /dev/null +++ b/cacheres2.dat @@ -0,0 +1,16 @@ +10 9.167250e-02 +100 4.118495e-02 +127 4.031123e-02 +128 4.227584e-02 +200 3.915971e-02 +255 4.035269e-02 +256 4.404526e-02 +500 4.232655e-02 +511 7.374434e-02 +512 8.294147e-02 +1000 9.187801e-02 +1023 9.549587e-02 +1024 9.727541e-02 +2000 9.262099e-02 +2047 9.543078e-02 +2048 9.750622e-02 \ No newline at end of file diff --git a/cachesor1.dat b/cachesor1.dat new file mode 100644 index 0000000000000000000000000000000000000000..dbcedc35c0bba7ee096763ced206f39313d6f95e --- /dev/null +++ b/cachesor1.dat @@ -0,0 +1,16 @@ +10 1.719345e-02 +100 1.194167e-03 +127 1.093879e-03 +128 9.156894e-04 +200 7.725517e-04 +255 1.253632e-03 +256 1.332275e-03 +500 1.155280e-03 +511 1.986409e-03 +512 3.108666e-03 +1000 1.953739e-03 +1023 2.595456e-03 +1024 9.227765e-04 +2000 1.885395e-03 +2047 2.487427e-03 +2048 2.634287e-03 \ No newline at end of file diff --git a/cachesor2.dat b/cachesor2.dat new file mode 100644 index 0000000000000000000000000000000000000000..c3e05a4ec48d633e5bf8cd26dae62f28b7f5304f --- /dev/null +++ b/cachesor2.dat @@ -0,0 +1,16 @@ +10 9.412551e-02 +100 2.432181e-02 +127 2.370301e-02 +128 2.360449e-02 +200 2.346966e-02 +255 3.048646e-02 +256 2.831647e-02 +500 2.411791e-02 +511 1.249207e-01 +512 2.092028e-01 +1000 2.950550e-02 +1023 1.237276e-01 +1024 1.869319e-01 +2000 2.925384e-02 +2047 1.241280e-01 +2048 1.869591e-01 \ No newline at end of file diff --git a/dpmflopsres1.dat b/dpmflopsres1.dat new file mode 100644 index 0000000000000000000000000000000000000000..b00f28d8cbf9ab2223688d210bff8fb3c2e53fdf --- /dev/null +++ b/dpmflopsres1.dat @@ -0,0 +1,16 @@ +10 3.416050e+02 +100 9.656009e+02 +127 9.917492e+02 +128 9.337266e+02 +200 9.707222e+02 +255 9.810839e+02 +256 9.873832e+02 +500 9.883993e+02 +511 9.980287e+02 +512 9.715378e+02 +1000 1.013719e+03 +1023 8.811371e+02 +1024 1.009309e+03 +2000 1.021636e+03 +2047 1.022546e+03 +2048 1.018746e+03 \ No newline at end of file diff --git a/dpmflopsres2.dat b/dpmflopsres2.dat new file mode 100644 index 0000000000000000000000000000000000000000..68db867b44edf4d207264b4852da07f8d8b00972 --- /dev/null +++ b/dpmflopsres2.dat @@ -0,0 +1,16 @@ +10 2.008486e+02 +100 9.004255e+02 +127 8.503135e+02 +128 1.377613e+03 +200 1.335525e+03 +255 1.391218e+03 +256 1.379471e+03 +500 1.417728e+03 +511 1.148963e+03 +512 1.144478e+03 +1000 6.335770e+02 +1023 6.204512e+02 +1024 6.099872e+02 +2000 8.375121e+02 +2047 8.052487e+02 +2048 7.734637e+02 \ No newline at end of file diff --git a/dpmflopssor1.dat b/dpmflopssor1.dat new file mode 100644 index 0000000000000000000000000000000000000000..f9918e0ded8629d95433fa88926e673c97a1bdba --- /dev/null +++ b/dpmflopssor1.dat @@ -0,0 +1,16 @@ +10 3.279365e+02 +100 8.087655e+02 +127 8.948557e+02 +128 8.418422e+02 +200 8.882177e+02 +255 8.984721e+02 +256 9.176340e+02 +500 9.275850e+02 +511 9.292904e+02 +512 9.099833e+02 +1000 9.427951e+02 +1023 8.391381e+02 +1024 9.388747e+02 +2000 9.524161e+02 +2047 9.510150e+02 +2048 9.492556e+02 \ No newline at end of file diff --git a/dpmflopssor2.dat b/dpmflopssor2.dat new file mode 100644 index 0000000000000000000000000000000000000000..592ff0cb4c7cdaba0f9531298629ace53c360d7b --- /dev/null +++ b/dpmflopssor2.dat @@ -0,0 +1,16 @@ +10 1.447379e+02 +100 3.411563e+02 +127 3.220837e+02 +128 5.199345e+02 +200 5.105652e+02 +255 4.984077e+02 +256 5.008122e+02 +500 5.190291e+02 +511 3.023504e+02 +512 2.367900e+02 +1000 4.909609e+02 +1023 3.046950e+02 +1024 2.385549e+02 +2000 4.933160e+02 +2047 3.025465e+02 +2048 2.398127e+02 \ No newline at end of file diff --git a/header.h b/header.h index f871af048ccc5c7b86095e57a41aa9320756fdd2..d7833a709426223ba4f027a9cbb755ae5cf7215f 100644 --- a/header.h +++ b/header.h @@ -4,6 +4,7 @@ #include <math.h> #include <errno.h> #include <time.h> +#include <likwid.h> #define ARGS_NUM 9 #define BLOCK_SIZE 3 @@ -13,4 +14,5 @@ FILE* getParams(int argc, char* argv[], double *hx, double *hy, int *maxI); double f(int i, int j, double hx, double hy); double calcU(int n, double *u, double *fMem, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4); double subsRow(int n, double *u, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4); +//void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double divided, double hx, double hy, int nx, int ny, int maxI); void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI, int e); \ No newline at end of file diff --git a/howToPlot b/howToPlot new file mode 100644 index 0000000000000000000000000000000000000000..f7d0ce18a7d5fca4e01198de8c4a135b110c82be --- /dev/null +++ b/howToPlot @@ -0,0 +1,67 @@ +Tutorial (Leia essa porra antes de sair tentando, Sr. Romano): +1- Executa os 7 comandos do General Options. +2- Escolhe um dos 4 e executa. +Sim, os graficos tao feios, seja o designer. +Olha o grafico de Cache Miss Ratio. Os pontos potencia de 2 tao um lixo (256, 512, 1024, 2048). +Acho que vamos ter que tirar esse cache trashing, se nao o Daniel vai estuprar a gente no trabalho. + + + +General Options: + +set termoption dash +set grid +set xlabel 'Points' +set ylabel 'Time (seconds)' +set style line 1 lt 1 lc rgb 'red' lw 2 +set style line 2 lt 1 lc rgb "orange" lw 2 +set style line 3 lt 2 lc rgb "yellow" lw 2 +set style line 4 lt 2 lc rgb "green" lw 2 + +-------------------------------------------- +Time: + +set logscale y 2 +set key left box +set title 'Time' + +plot 'timesor1.dat' u 1:2 w l t 'SOR - Version 1' ls 1\ + , 'timesor2.dat' u 1:2 w l t 'SOR - Version 2' ls 2\ + , 'timeres1.dat' u 1:2 w l t 'Residue - Version 1' ls 3\ + , 'timeres2.dat' u 1:2 w l t 'Residue - Version 2' ls 4 + +--------------------------------------------- +Memory Bandwidth + +set key right box +set title 'Memory Bandwidth [MBytes/s]' +set logscale y 2 + +plot 'membandwidthsor1.dat' u 1:2 w l t 'SOR - Version 1' ls 1\ + , 'membandwidthsor2.dat' u 1:2 w l t 'SOR - Version 2' ls 2\ + , 'membandwidthres1.dat' u 1:2 w l t 'Residue - Version 1' ls 3\ + , 'membandwidthres2.dat' u 1:2 w l t 'Residue - Version 2' ls 4 + + +--------------------------------------------- +Data Cache Miss Ratio + +set title 'Data Cache Miss Ratio' +set logscale y 2 + +plot 'cachesor1.dat' u 1:2 w l t 'SOR - Version 1' ls 1\ + , 'cachesor2.dat' u 1:2 w l t 'SOR - Version 2' ls 2\ + , 'cacheres1.dat' u 1:2 w l t 'Residue - Version 1' ls 3\ + , 'cacheres2.dat' u 1:2 w l t 'Residue - Version 2' ls 4 + + +--------------------------------------------- +DP MFLOP/s + +set title 'DP MFLOP/s' +set logscale y 2 + +plot 'dpmflopssor1.dat' u 1:2 w l t 'SOR - Version 1' ls 1\ + , 'dpmflopssor2.dat' u 1:2 w l t 'SOR - Version 2' ls 2\ + , 'dpmflopsres1.dat' u 1:2 w l t 'Residue - Version 1' ls 3\ + , 'dpmflopsres2.dat' u 1:2 w l t 'Residue - Version 2' ls 4 \ No newline at end of file diff --git a/membandwidthres1.dat b/membandwidthres1.dat new file mode 100644 index 0000000000000000000000000000000000000000..a5fabfedf8ba9f94f06a7afa614edc542d44dd49 --- /dev/null +++ b/membandwidthres1.dat @@ -0,0 +1,16 @@ +10 1.779971e+04 +100 3.768171e+02 +127 4.362378e+02 +128 4.360278e+02 +200 3.810756e+02 +255 3.874120e+02 +256 3.796060e+02 +500 4.763581e+02 +511 3.882145e+02 +512 4.019147e+02 +1000 4.341473e+02 +1023 4.462077e+02 +1024 6.757122e+02 +2000 4.366796e+02 +2047 4.481116e+02 +2048 4.552954e+02 \ No newline at end of file diff --git a/membandwidthres2.dat b/membandwidthres2.dat new file mode 100644 index 0000000000000000000000000000000000000000..5e22336c8f0be4f1f52cdd0e792c99a663edf2ff --- /dev/null +++ b/membandwidthres2.dat @@ -0,0 +1,16 @@ +10 2.295315e+04 +100 2.320394e+04 +127 7.438936e+02 +128 1.617785e+03 +200 1.084110e+03 +255 7.940529e+03 +256 7.468366e+03 +500 4.136519e+02 +511 3.890882e+02 +512 3.981432e+02 +1000 6.536244e+03 +1023 6.419650e+03 +1024 6.200531e+03 +2000 6.716217e+03 +2047 6.990283e+03 +2048 6.563676e+03 \ No newline at end of file diff --git a/membandwidthsor1.dat b/membandwidthsor1.dat new file mode 100644 index 0000000000000000000000000000000000000000..55f96f67edc38847612b40fb52d77c2771216f3e --- /dev/null +++ b/membandwidthsor1.dat @@ -0,0 +1,16 @@ +10 1.619730e+04 +100 3.829102e+02 +127 3.978275e+02 +128 4.212521e+02 +200 4.108838e+02 +255 3.862520e+02 +256 3.873149e+02 +500 4.638628e+02 +511 5.385358e+02 +512 5.314457e+02 +1000 4.412162e+02 +1023 4.315215e+02 +1024 5.651425e+02 +2000 4.496510e+02 +2047 4.516483e+02 +2048 4.636085e+02 \ No newline at end of file diff --git a/membandwidthsor2.dat b/membandwidthsor2.dat new file mode 100644 index 0000000000000000000000000000000000000000..c2fcfddc9a493c08677897e157afae1467093393 --- /dev/null +++ b/membandwidthsor2.dat @@ -0,0 +1,16 @@ +10 1.007957e+04 +100 1.240943e+04 +127 4.497731e+02 +128 9.235842e+02 +200 7.799725e+02 +255 6.773897e+03 +256 6.212922e+03 +500 5.060868e+02 +511 3.910980e+02 +512 3.975009e+02 +1000 3.334609e+03 +1023 2.254284e+03 +1024 1.839885e+03 +2000 3.748434e+03 +2047 2.461350e+03 +2048 1.995682e+03 \ No newline at end of file diff --git a/pdeSolver.c b/pdeSolver.c index 018efdaf9a102052f56bc20d7ac54a39ee261f60..fcc752cb6f8b8964c13a045fb826e1518fe47a12 100644 --- a/pdeSolver.c +++ b/pdeSolver.c @@ -1,4 +1,96 @@ #include "header.h" +/* +Banda de Memória: utilizar o grupo MEM do likwid, e apresentar o resultado de "Memory bandwidth [MBytes/s]"; +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.05 -hy 0.05 -i 20 -o graph | grep "Memory bandwidth" | cut -c 39-50 + +Cache miss: utilizar o grupo CACHE do likwid, e apresentar o resultado de "data cache miss ratio"; +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.05 -hy 0.05 -i 20 -o graph + +Operações aritméticas: utilizar o grupo FLOPS_DP do likwid, e apresentar o resultado de "DP MFLOP/s"; +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.05 -hy 0.05 -i 20 -o graph + +--------- + +10 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.34 -hy 0.34 -i 20 -o graph + +100 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0317 -hy 0.0317 -i 20 -o graph + +127 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.025 -hy 0.025 -i 20 -o graph + +128 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0248 -hy 0.0248 -i 20 -o graph + +200 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0158 -hy 0.0158 -i 20 -o graph + +255 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.01235 -hy 0.01235 -i 20 -o graph + +256 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0123 -hy 0.0123 -i 20 -o graph + +500 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0063 -hy 0.0063 -i 20 -o graph + +511 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00616 -hy 0.00616 -i 20 -o graph + +512 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00615 -hy 0.00615 -i 20 -o graph + +1000 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003145 -hy 0.003145 -i 20 -o graph + +1023 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.003073 -hy 0.003073 -i 20 -o graph + +1024 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.00307 -hy 0.00307 -i 20 -o graph + +2000 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015715 -hy 0.0015715 -i 20 -o graph + +2047 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.0015355 -hy 0.0015355 -i 20 -o graph + +2048 Pontos +likwid-perfctr -f -C 1 -g MEM -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g CACHE -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +likwid-perfctr -f -C 1 -g FLOPS_DP -m ./pdeSolver -hx 0.001535 -hy 0.001535 -i 20 -o graph +*/ double timestamp(void) { struct timeval tp; @@ -51,7 +143,7 @@ FILE* getParams(int argc, char* argv[], double *hx, double *hy, int *maxI) { inline double f(int i, int j, double hx, double hy) { /* f(x,y) = 4π²[ sin(2πx)sinh(πy) + sin(2π(π−x))sinh(π(π−y)) ] */ - double x = j * hx, y = i * hy; + //double x = j * hx, y = i * hy; return (4*M_PI*M_PI * ( (sin(2*M_PI*x)) * (sinh(M_PI*y)) + (sin(2*M_PI*(M_PI-x))) * (sinh(M_PI*(M_PI-y))) )); } @@ -80,6 +172,7 @@ f(x,y) = 2/Δx²+2/Δy²+4π² * u(i,j) - (u(i+1,j) * 1/(Δx(Δx-2)) + u(i-1,j) void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI, int e) { int i, j, k, l, m, row, inx, index, nxe; + double now, res, tRes, maxRes = 0, divided; // tRes is total residue in this iteration, maxRes is the biggest residue. double coef1, coef2, coef3, coef4; e=0; @@ -88,11 +181,13 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor coef2 = (1/(hx*hx)) + (1/(2*hx)); // u(i-1,j) coef3 = (1/(hy*hy)) - (1/(2*hy)); // u(i,j+1) coef4 = (1/(hy*hy)) + (1/(2*hy)); // u(i,j-1) + nxe = nx + e; divided = 1 / uDivisor; for(k=0; k<maxI; ++k) { now = timestamp(); // Starting iteration time counter. + LIKWID_MARKER_START("sor"); for(i=1; i<ny-1; i+=BLOCK_SIZE) { inx = i*nxe; @@ -107,12 +202,14 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor } } + LIKWID_MARKER_STOP("sor"); *timeSor += timestamp() - now; // Get iteration time. now = timestamp(); // Start residue norm time counter. + LIKWID_MARKER_START("residue"); tRes = 0.0f; - for(i=1; i<ny-1; ++i) { // Ignoring borders. + for(i=1; i<ny-1; ++i) { // Ignoring borders index = i * nxe; for(j=1; j<nx-1; ++j) { // Ignoring borders as well. res = fMem[index+j] - subsRow(index+j,x,uDivisor,hx,hy,nxe,coef1,coef2,coef3,coef4); @@ -124,6 +221,7 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor r[k] = sqrt(tRes); // Store the norm of the residue in a vector (r). + LIKWID_MARKER_STOP("residue"); *timeResNorm += timestamp() - now; // Get residue norm time. } @@ -138,8 +236,11 @@ int main(int argc, char *argv[]) { fpExit = getParams(argc,argv,&hx,&hy,&maxI); + LIKWID_MARKER_INIT; + nx = (round(M_PI/hx)) + 1; ny = (round(M_PI/hy)) + 1; + printf("Nx = %d, Ny = %d\n",nx,ny); w = 2 - ((hx + hy) / 2); uDivisor = (2 / (hx * hx)) + (2 / (hy * hy)) + 4 * M_PI * M_PI; @@ -217,6 +318,8 @@ int main(int argc, char *argv[]) { fclose(fpExit); fclose(fpData); + LIKWID_MARKER_CLOSE; + return 0; } diff --git a/timeres1.dat b/timeres1.dat new file mode 100644 index 0000000000000000000000000000000000000000..020fe252057399977967835459c1c0e848946562 --- /dev/null +++ b/timeres1.dat @@ -0,0 +1,16 @@ +10 0.000295 +100 0.001934 +127 0.003342 +128 0.003246 +200 0.007583 +255 0.012168 +256 0.012253 +500 0.046418 +511 0.048035 +512 0.049579 +1000 0.180556 +1023 0.217192 +1024 0.190238 +2000 0.716187 +2047 0.749794 +2048 0.752515 \ No newline at end of file diff --git a/timeres2.dat b/timeres2.dat new file mode 100644 index 0000000000000000000000000000000000000000..2647c4495bb27de141c21ca49beccc5732674383 --- /dev/null +++ b/timeres2.dat @@ -0,0 +1,16 @@ +10 0.000132 +100 0.000097 +127 0.000078 +128 0.000077 +200 0.000114 +255 0.000159 +256 0.000157 +500 0.000476 +511 0.000574 +512 0.000598 +1000 0.003774 +1023 0.003997 +1024 0.004082 +2000 0.011399 +2047 0.012243 +2048 0.012756 \ No newline at end of file diff --git a/timesor1.dat b/timesor1.dat new file mode 100644 index 0000000000000000000000000000000000000000..cf91052cad37dadfcfd1693fa4dda52ed24ab679 --- /dev/null +++ b/timesor1.dat @@ -0,0 +1,16 @@ +10 0.000297 +100 0.002310 +127 0.003715 +128 0.003616 +200 0.008326 +255 0.013354 +256 0.013252 +500 0.049730 +511 0.051865 +512 0.053218 +1000 0.195197 +1023 0.229308 +1024 0.205620 +2000 0.772440 +2047 0.809721 +2048 0.813544 \ No newline at end of file diff --git a/timesor2.dat b/timesor2.dat new file mode 100644 index 0000000000000000000000000000000000000000..7ac30e1dd0bc7618f3245f89c8c64a5ba13230d5 --- /dev/null +++ b/timesor2.dat @@ -0,0 +1,16 @@ +10 0.000140 +100 0.000202 +127 0.000177 +128 0.000194 +200 0.000355 +255 0.000566 +256 0.000568 +500 0.001982 +511 0.003423 +512 0.004417 +1000 0.008146 +1023 0.013825 +1024 0.017605 +2000 0.032355 +2047 0.055417 +2048 0.069957 \ No newline at end of file