diff --git a/Makefile b/Makefile index 48d550dda769624ea222af1144ebfed67505cff3..04f1bf242510518b2ddd31e8c6ed84722394d66c 100644 --- a/Makefile +++ b/Makefile @@ -4,11 +4,11 @@ # Arquivo final FILE = pdeSolver - CC = gcc -g + CC = gcc AR = ar -rcu INSTALL = install - OPTIMIZATION = -O2 - #OPTIMIZATION = -O3 -mavx + #OPTIMIZATION = -O2 + OPTIMIZATION = -O3 -mavx -march=native .PHONY: clean install all diff --git a/header.h b/header.h index 9f39c4175e080240622a6620520d39c903296e05..163d235c8962c506caedbec809917c3cfc30fcb8 100644 --- a/header.h +++ b/header.h @@ -10,10 +10,7 @@ double timestamp(void); FILE* getParams(int argc, char* argv[], double *hx, double *hy, int *maxI); -//double f(int n, double hx, double hy, int nx); double f(int i, int j, double hx, double hy, int nx); double calcU(int n, double *u, double *fMem, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4); -//double calcU(int n, double *u, double uDivisor, double hx, double hy, int nx); double subsRow(int n, double *u, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4); -void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI); -//void sor(double *x, double *r, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI); +void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI); \ No newline at end of file diff --git a/pdeSolver.c b/pdeSolver.c index 984bd98f6b5ac28f2dface596f70e99a5fe63537..c568aa5621d2e04a4103bcc9c05c3d902b59e533 100644 --- a/pdeSolver.c +++ b/pdeSolver.c @@ -49,14 +49,8 @@ FILE* getParams(int argc, char* argv[], double *hx, double *hy, int *maxI) { return fp; } - -//double f(int n, double hx, double hy, int nx) { double f(int i, int j, double hx, double hy, int nx) { -/* -f(x,y) = 4π²[ sin(2πx)sinh(πy) + sin(2π(π−x))sinh(π(π−y)) ] -*/ - //int i = n % nx, j = n / nx; - //double x = i * hx, y = j * hy; +/* f(x,y) = 4π²[ sin(2πx)sinh(πy) + sin(2π(π−x))sinh(π(π−y)) ] */ double x = j * hx, y = i * hy; return (4*M_PI*M_PI * ( (sin(2*M_PI*x)) * (sinh(M_PI*y)) + (sin(2*M_PI*(M_PI-x))) * (sinh(M_PI*(M_PI-y))) )); } @@ -64,27 +58,12 @@ f(x,y) = 4π²[ sin(2πx)sinh(πy) + sin(2π(π−x))sinh(π(π−y)) ] inline double calcU(int n, double *u, double *fMem, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4) { /* Final version of simplified equation: -u(i,j) = f(x,y) + (u(i+1,j) + u(i-1,j))/Δx² + (u(i,j+1) + u(i,j-1))/Δy² + (-u(i+1,j)+u(i-1,j))/2Δx + (-u(i,j+1)+u(i,j-1))/2Δy - -------------------------------------------------------------------------------------------------------------------- - 2/Δx² + 2/Δy² + 4π² -Final version 2: - coef1 coef2 coef3 coef4 -u(i,j) = f(x,y) + u(i+1,j) * 1/(Δx(Δx-2)) + u(i-1,j) * 1/(Δx(Δx+2)) + u(i,j+1) * 1/(Δy(Δy-2)) + u(i,j-1) * 1/(Δy(Δy+2)) + coef1 coef2 coef3 coef4 +u(i,j) = f(x,y) + u(i+1,j)*(1/Δx²-1/2Δx) + u(i-1,j)*(1/Δx²+1/2Δx) + u(i,j+1)*1/(1/Δy²-1/2Δy) + u(i,j-1)*(1/Δy²+1/2Δy) -------------------------------------------------------------------------------------------------------------- 2/Δx² + 2/Δy² + 4π² */ -/* - double res = 0; - res += fMem[n] + (u[n+nx] + u[n-nx] ) / (hx * hx) + (u[n+1] + u[n-1]) / (hy * hy); - //res += f(n,hx,hy,nx) + (u[n+nx] + u[n-nx] ) / (hx * hx) + (u[n+1] + u[n-1]) / (hy * hy); - res += (u[n-nx] - u[n+nx]) / (2 * hx) + (u[n-1] - u[n+1]) / (2 * hy); - res = res / uDivisor; - return res; -*/ - - return ((fMem[n] + (u[n+nx] + u[n-nx] ) / (hx * hx) + (u[n+1] + u[n-1]) / (hy * hy) + (u[n-nx] - u[n+nx]) / (2 * hx) + (u[n-1] - u[n+1]) / (2 * hy)) / uDivisor); - - //return ((fMem[n] + u[n+nx]*coef1 + u[n-nx]*coef2 + u[n-1]*coef4 + u[n+1]*coef3) / uDivisor); + return ((fMem[n] + u[n+nx] * coef1 + u[n-nx] * coef2 + u[n+1] * coef3 + u[n-1] * coef4) / uDivisor); } inline double subsRow(int n, double *u, double uDivisor, double hx, double hy, int nx, double coef1, double coef2, double coef3, double coef4) { @@ -95,45 +74,23 @@ f(x,y) = /* 2/Δx²+2/Δy²+4π² * u(i,j) = f(x,y) + u(i+1,j) * 1/(Δx(Δx-2)) + u(i-1,j) * 1/(Δx(Δx+2)) + u(i,j+1) * 1/(Δy(Δy-2)) + u(i,j-1) * 1/(Δy(Δy+2)) f(x,y) = 2/Δx²+2/Δy²+4π² * u(i,j) - (u(i+1,j) * 1/(Δx(Δx-2)) + u(i-1,j) * 1/(Δx(Δx+2)) + u(i,j+1) * 1/(Δy(Δy-2)) + u(i,j-1) * 1/(Δy(Δy+2))) -*/ -/* - double res = 0; - res = uDivisor * u[n]; - res -= ((u[n+nx] + u[n-nx]) / (hx * hx) + (u[n+1] + u[n-1]) / (hy * hy) + (u[n-nx] - u[n+nx]) / (2 * hx) + (u[n-1] - u[n+1]) / (2 * hy)); - return res; -*/ -/* - return uDivisor * u[n] - ((u[n+nx] + u[n-nx]) / (hx * hx) + (u[n+1] + u[n-1]) / (hy * hy) + (u[n-nx] - u[n+nx]) - / (2 * hx) + (u[n-1] - u[n+1]) / (2 * hy)); */ return (uDivisor * u[n] + (-1 * (u[n+nx]*coef1 + u[n-nx]*coef2 + u[n+1]*coef3 + u[n-1]*coef4))); } void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI) { -//void sor(double *x, double *r, double *timeSor, double *timeResNorm, double w, double uDivisor, double hx, double hy, int nx, int ny, int maxI) { int i, j, k, l, m, row, col; - double sigma, now, fxy, res, maxRes = 0, tRes = 0; // maxRes is the biggest residue, tRes is total residue in this iteration. + double now, res, tRes, maxRes = 0; // tRes is total residue in this iteration, maxRes is the biggest residue. double coef1, coef2, coef3, coef4; - coef1 = (1/hx*hx) - (1/(2*hx)); // u(i+1,j) - coef2 = (1/hx*hx) + (1/(2*hx)); // u(i-1,j) - coef3 = (1/hy*hy) - (1/(2*hy)); // u(i,j+1) - coef4 = (1/hy*hy) + (1/(2*hy)); // u(i,j-1) + coef1 = (1/(hx*hx)) - (1/(2*hx)); // u(i+1,j) + coef2 = (1/(hx*hx)) + (1/(2*hx)); // u(i-1,j) + coef3 = (1/(hy*hy)) - (1/(2*hy)); // u(i,j+1) + coef4 = (1/(hy*hy)) + (1/(2*hy)); // u(i,j-1) for(k=0; k<maxI; ++k) { now = timestamp(); // Starting iteration time counter. - /*for(i = 1 + nx; i < nx * ny - nx - 1; ++i) { // Start at u[1][1], which means u[ny+1] and do not calculate borders - x[i] = x[i] + w * (calcU(i,x,fMem,uDivisor,hx,hy,nx) - x[i]); - //x[i] = x[i] + w * (calcU(i,x,uDivisor,hx,hy,nx) - x[i]); - }*/ - /* - for(i=1; i<ny-1; ++i) { // Ignoring borders. - for(j=1; j<nx-1; ++j) { // Ignoring borders as well. - x[i*nx+j] = x[i*nx+j] + w * (calcU(i*nx+j,x,fMem,uDivisor,hx,hy,nx,coef1,coef2,coef3,coef4) - x[i*nx+j]); - } - } - */ - + for(i=1; i<ny-1; i+=BLOCK_SIZE) { for(j=1; j<nx-1; j+=BLOCK_SIZE) { for(l=0; l<BLOCK_SIZE && l+i<ny-1; ++l) { @@ -145,10 +102,12 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor } } } - + *timeSor += timestamp() - now; // Get iteration time. now = timestamp(); // Start residue norm time counter. + tRes = 0.0f; + for(i=1; i<ny-1; ++i) { // Ignoring borders. for(j=1; j<nx-1; ++j) { // Ignoring borders as well. res = fMem[i*nx+j] - subsRow(i*nx+j,x,uDivisor,hx,hy,nx,coef1,coef2,coef3,coef4); @@ -157,18 +116,9 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor tRes += res * res; // Adds res² to the total residue of this iteration. } } - /* - for(i = 1 + nx; i < nx * ny - nx - 1; ++i) { - res = fMem[i]; - //res = f(i,hx,hy,nx); // res = f(x,y) - res -= subsRow(i,x,uDivisor,hx,hy,nx); // res = f(x,y) - (a0 * x0 + a1 * x1 + ... + an * xn) - if(res > maxRes) - maxRes = res; - tRes += res * res; // Adds res² to the total residue of this iteration. - } - */ + r[k] = sqrt(tRes); // Store the norm of the residue in a vector (r). - tRes = 0.0f; + *timeResNorm += timestamp() - now; // Get residue norm time. } @@ -177,8 +127,8 @@ void sor(double *x, double *r, double *fMem, double *timeSor, double *timeResNor } int main(int argc, char *argv[]) { - int i, j, k, nx, ny, maxI, alpha; - double hx, hy, w, beta, gama, sigma, uDivisor, *x, *r, *fMem, timeSor, timeResNorm; + int i, j, nx, ny, maxI, alpha; + double hx, hy, w, beta, sigma, uDivisor, *x, *r, *fMem, timeSor, timeResNorm; FILE *fpExit, *fpData; fpExit = getParams(argc,argv,&hx,&hy,&maxI); @@ -201,7 +151,7 @@ int main(int argc, char *argv[]) { fprintf(stderr,"Could not allocate memory."); exit(-5); } - fMem -= nx; // Save some memory. + fMem -= nx; // Save some memory, but I will not be able to access fMem[0]..fMem[nx-1]!! */ if((fMem = malloc(nx * ny * sizeof(double))) == NULL) { fprintf(stderr,"Could not allocate memory."); @@ -216,43 +166,29 @@ int main(int argc, char *argv[]) { timeSor = 0.0f; timeResNorm = 0.0f; - sigma = sinh(M_PI * M_PI); - alpha = nx * ny - nx; - beta = 2 * M_PI * hx; -/* - for(i = nx; i < nx * ny - nx; ++i) { // Initialize central points (with left and right borders) as 0. - x[i] = 0.0f; - } -*/ for(i = 1; i < ny - 1; ++i) { // This 'for' has to ignore borders. - for(j = 0; j < nx; ++j) { + for(j = 0; j < nx; ++j) { // This 'for' cant ignore borders. x[i*nx+j] = 0.0f; } } + sigma = sinh(M_PI * M_PI); + alpha = nx * ny - nx; + beta = 2 * M_PI * hx; + for(i=0; i<nx; ++i) { // Creating borders x[i] = sin(2 * M_PI * (M_PI - (i * hx))) * sigma; x[alpha+i] = sin(beta * i) * sigma; } -/* - for(i=0; i<nx; ++i) { // Creating borders - x[i] = sin(2 * M_PI * (M_PI - (i * hx))) * sigma; - x[nx*ny-nx+i] = sin(2 * M_PI * (i * hx)) * sigma; - } -*/ // Initializing f(x,y) for(i=1; i<ny-1; ++i) { // Ignoring borders. for(j=1; j<nx-1; ++j) { // Ignoring borders as well. - //fMem[i*nx+j] = f(i*nx+j,hx,hy,nx); // Please check those indexes. (looking to lines 181-185, it looks OK). - //fMem[i*nx+j] = f(i*nx,j,hx,hy,nx); fMem[i*nx+j] = f(i,j,hx,hy,nx); - //printf("%d - %d = %lf\n",i,j,fMem[i*nx+j]); } } sor(x,r,fMem,&timeSor,&timeResNorm,w,uDivisor,hx,hy,nx,ny,maxI); - //sor(x,r,&timeSor,&timeResNorm,w,uDivisor,hx,hy,nx,ny,maxI); fprintf(fpExit,"\n\n\n###########\n# Tempo Método SOR: %lf\n# Tempo Resíduo: %lf\n\n# Norma do Resíduo\n",timeSor,timeResNorm);