X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FBenchKernelD3Q19ListPullSplitNt.c;h=05d1d0a5e59a223c3b41669c67055ab32ded894a;hp=dfab54afbdc574c3936d146e28e9925e479a1088;hb=8cafd9ea08a6b1103eab29811227a7ae536dffa6;hpb=109880839321408644c94a34eb31208460b9f46d diff --git a/src/BenchKernelD3Q19ListPullSplitNt.c b/src/BenchKernelD3Q19ListPullSplitNt.c index dfab54a..05d1d0a 100644 --- a/src/BenchKernelD3Q19ListPullSplitNt.c +++ b/src/BenchKernelD3Q19ListPullSplitNt.c @@ -29,6 +29,7 @@ #include "Memory.h" #include "Vtk.h" #include "Vector.h" +#include "LikwidIf.h" #include #include @@ -54,8 +55,8 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD Assert(kernelData != NULL); Assert(cd != NULL); - Assert(cd->Omega > 0.0); - Assert(cd->Omega < 2.0); + Assert(cd->Omega > F(0.0)); + Assert(cd->Omega < F(2.0)); KernelData * kd = (KernelData *)kernelData; KernelDataList * kdl = KDL(kernelData); @@ -64,16 +65,16 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD PdfT omega = cd->Omega; const PdfT omegaEven = omega; - PdfT magicParam = 1.0 / 12.0; - const PdfT omegaOdd = 1.0 / (0.5 + magicParam / (1.0 / omega - 0.5)); + PdfT magicParam = F(1.0) / F(12.0); + const PdfT omegaOdd = F(1.0) / (F(0.5) + magicParam / (F(1.0) / omega - F(0.5))); - const PdfT w_0 = 1.0 / 3.0; - const PdfT w_1 = 1.0 / 18.0; - const PdfT w_2 = 1.0 / 36.0; + const PdfT w_0 = F(1.0) / F( 3.0); + const PdfT w_1 = F(1.0) / F(18.0); + const PdfT w_2 = F(1.0) / F(36.0); - const PdfT w_1_x3 = w_1 * 3.0; const PdfT w_1_nine_half = w_1 * 9.0 / 2.0; - const PdfT w_2_x3 = w_2 * 3.0; const PdfT w_2_nine_half = w_2 * 9.0 / 2.0; + const PdfT w_1_x3 = w_1 * F(3.0); const PdfT w_1_nine_half = w_1 * F(9.0) / F(2.0); + const PdfT w_2_x3 = w_2 * F(3.0); const PdfT w_2_nine_half = w_2 * F(9.0) / F(2.0); const VPDFT vw_1_x3 = VSET(w_1_x3); const VPDFT vw_2_x3 = VSET(w_2_x3); @@ -84,7 +85,7 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD const VPDFT vomegaEven = VSET(omegaEven); const VPDFT vomegaOdd = VSET(omegaOdd); - const VPDFT voneHalf = VSET(0.5); + const VPDFT voneHalf = VSET(F(0.5)); // uint32_t nConsecNodes = kdlr->nConsecNodes; // uint32_t * consecNodes = kdlr->ConsecNodes; @@ -118,6 +119,10 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD KernelStatistics(kd, ld, cd, 0); #endif + X_KERNEL_START(kernelData); + + X_LIKWID_START("list-pull-split-nt-1s"); + #ifdef _OPENMP #pragma omp parallel default(none) \ shared(nFluid, nCells, kd, kdl, adjList, src, dst, \ @@ -184,6 +189,7 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD for(int iter = 0; iter < maxIterations; ++iter) { + #if 1 #define INDEX_START blIndexStart #define INDEX_STOP blIndexVec @@ -201,6 +207,8 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD #define INDEX_STOP blIndexStop #include "BenchKernelD3Q19ListPullSplitNt1SScalar.h" #endif + + #pragma omp barrier #pragma omp single @@ -235,6 +243,11 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD MemFree((void **)&tmpArray); } + + X_LIKWID_STOP("list-pull-split-nt-1s"); + + X_KERNEL_END(kernelData); + #ifdef VTK_OUTPUT if (cd->VtkOutput) { kd->PdfsActive = src; @@ -257,8 +270,8 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD Assert(kernelData != NULL); Assert(cd != NULL); - Assert(cd->Omega > 0.0); - Assert(cd->Omega < 2.0); + Assert(cd->Omega > F(0.0)); + Assert(cd->Omega < F(2.0)); KernelData * kd = (KernelData *)kernelData; KernelDataList * kdl = KDL(kernelData); @@ -267,16 +280,15 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD PdfT omega = cd->Omega; const PdfT omegaEven = omega; - PdfT magicParam = 1.0 / 12.0; - const PdfT omegaOdd = 1.0 / (0.5 + magicParam / (1.0 / omega - 0.5)); + PdfT magicParam = F(1.0) / F(12.0); + const PdfT omegaOdd = F(1.0) / (F(0.5) + magicParam / (F(1.0) / omega - F(0.5))); + const PdfT w_0 = F(1.0) / F( 3.0); + const PdfT w_1 = F(1.0) / F(18.0); + const PdfT w_2 = F(1.0) / F(36.0); - const PdfT w_0 = 1.0 / 3.0; - const PdfT w_1 = 1.0 / 18.0; - const PdfT w_2 = 1.0 / 36.0; - - const PdfT w_1_x3 = w_1 * 3.0; const PdfT w_1_nine_half = w_1 * 9.0 / 2.0; - const PdfT w_2_x3 = w_2 * 3.0; const PdfT w_2_nine_half = w_2 * 9.0 / 2.0; + const PdfT w_1_x3 = w_1 * F(3.0); const PdfT w_1_nine_half = w_1 * F(9.0) / F(2.0); + const PdfT w_2_x3 = w_2 * F(3.0); const PdfT w_2_nine_half = w_2 * F(9.0) / F(2.0); const VPDFT vw_1_x3 = VSET(w_1_x3); const VPDFT vw_2_x3 = VSET(w_2_x3); @@ -287,7 +299,7 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD const VPDFT vomegaEven = VSET(omegaEven); const VPDFT vomegaOdd = VSET(omegaOdd); - const VPDFT voneHalf = VSET(0.5); + const VPDFT voneHalf = VSET(F(0.5)); // uint32_t nConsecNodes = kdlr->nConsecNodes; // uint32_t * consecNodes = kdlr->ConsecNodes; @@ -321,6 +333,12 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD KernelStatistics(kd, ld, cd, 0); #endif + + X_KERNEL_START(kernelData); + + X_LIKWID_START("list-pull-split-nt-2s"); + + #ifdef _OPENMP #pragma omp parallel default(none) \ shared(nFluid, nCells, kd, kdl, adjList, src, dst, \ @@ -406,6 +424,7 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD #endif #pragma omp barrier + #pragma omp single { #ifdef VERIFICATION @@ -438,6 +457,10 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD MemFree((void **)&tmpArray); } + X_LIKWID_STOP("list-pull-split-nt-2s"); + + X_KERNEL_END(kernelData); + #ifdef VTK_OUTPUT if (cd->VtkOutput) { kd->PdfsActive = src;