X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FBenchKernelD3Q19.c;fp=src%2FBenchKernelD3Q19.c;h=62989c89e747ff32c74fee3906a507c32ce4efa1;hp=48529483ca31c1d3ffc52249ba2c7b76d125e6fa;hb=8cafd9ea08a6b1103eab29811227a7ae536dffa6;hpb=0fde6e45e9be83893afae896cf49a799777f6d7c diff --git a/src/BenchKernelD3Q19.c b/src/BenchKernelD3Q19.c index 4852948..62989c8 100644 --- a/src/BenchKernelD3Q19.c +++ b/src/BenchKernelD3Q19.c @@ -101,6 +101,8 @@ void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd } #endif + X_KERNEL_START(kernelData); + for (int iter = 0; iter < maxIterations; ++iter) { X_LIKWID_START("os"); @@ -125,7 +127,7 @@ void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd #pragma vector always #pragma simd #endif - for (int z = oZ; z < nZ + oZ; ++z) { + for (int z = oZ; z < nZ + oZ; ++z) { // LOOP os #define I(x, y, z, dir) P_INDEX_5(gDims, (x), (y), (z), (dir)) #ifdef PROP_MODEL_PUSH @@ -320,6 +322,7 @@ void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd // Stop counters before bounce back. Else computing loop balance will be incorrect. X_LIKWID_STOP("os"); + // Fixup bounce back PDFs. #ifdef _OPENMP #pragma omp parallel for default(none) \ @@ -355,6 +358,8 @@ void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd } // for (int iter = 0; ... + X_KERNEL_END(kernelData); + #ifdef VTK_OUTPUT if (cd->VtkOutput) { @@ -445,6 +450,8 @@ void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * nThreads = omp_get_max_threads(); #endif + X_KERNEL_START(kernelData); + for (int iter = 0; iter < maxIterations; ++iter) { #ifdef _OPENMP @@ -729,6 +736,8 @@ void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * } // for (int iter = 0; ... + X_KERNEL_END(kernelData); + #ifdef VTK_OUTPUT if (cd->VtkOutput) {