X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FBenchKernelD3Q19AaVecSl.c;fp=src%2FBenchKernelD3Q19AaVecSl.c;h=989e3d0d187770286cdacc220b9dc12d4bdb08b4;hp=885a065c8fbdc8b8c7eec262721b4870d2662eef;hb=8cafd9ea08a6b1103eab29811227a7ae536dffa6;hpb=0fde6e45e9be83893afae896cf49a799777f6d7c diff --git a/src/BenchKernelD3Q19AaVecSl.c b/src/BenchKernelD3Q19AaVecSl.c index 885a065..989e3d0 100644 --- a/src/BenchKernelD3Q19AaVecSl.c +++ b/src/BenchKernelD3Q19AaVecSl.c @@ -121,6 +121,8 @@ void FNAME(D3Q19AaVecSlKernel)(LatticeDesc * ld, KernelData * kd, CaseData * cd) Assert((maxIterations % 2) == 0); + X_KERNEL_START(kd); + #ifdef _OPENMP #pragma omp parallel default(none) shared(kda, kd, ld, cd, src, maxIterations) #endif @@ -238,6 +240,8 @@ void FNAME(D3Q19AaVecSlKernel)(LatticeDesc * ld, KernelData * kd, CaseData * cd) } // for (int iter = 0; ... } // omp parallel + X_KERNEL_END(kd); + #ifdef VTK_OUTPUT if (cd->VtkOutput) { @@ -379,7 +383,7 @@ static void KernelEven(LatticeDesc * ld, KernelData * kd, CaseData * cd) // {{{ // threadId, indexStart, indexEnd, threadStart, threadEnd); - for (int i = threadStart; i < threadEnd; i += VSIZE) { + for (int i = threadStart; i < threadEnd; i += VSIZE) { // LOOP aa-vec-sl-even // Load PDFs of local cell: pdf_N = src[I(x, y, z, D3Q19_N)]; ... // #define X(name, idx, idxinv, _x, _y, _z) JOIN(vpdf_,name) = VLDU(&src[I(x, y, z, idx)]); @@ -605,7 +609,7 @@ startX , startY , startZ , startX + _x, startY + _y, startZ + _z); #endif // DEBUG_EXTENDED - for (int i = threadStart; i < threadEnd; i += VSIZE) { + for (int i = threadStart; i < threadEnd; i += VSIZE) { // LOOP aa-vec-sl-odd #if DEBUG_EXTENDED #define X(name, idx, idxinv, _x, _y, _z) Assert((unsigned long)(JOIN(ppdf_,idx)) >= (unsigned long)(JOIN(ppdf_start_,idx))); Assert((unsigned long)(JOIN(ppdf_,idx)) <= (unsigned long)(JOIN(ppdf_end_,idx)));