X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FBenchKernelD3Q19ListPullSplitNt.c;h=05d1d0a5e59a223c3b41669c67055ab32ded894a;hp=0132dc9058f26d7f18f153e8a0c11f37d38efa74;hb=8cafd9ea08a6b1103eab29811227a7ae536dffa6;hpb=e3f82424829ebb623343ce0092238f83b4a1b8c2 diff --git a/src/BenchKernelD3Q19ListPullSplitNt.c b/src/BenchKernelD3Q19ListPullSplitNt.c index 0132dc9..05d1d0a 100644 --- a/src/BenchKernelD3Q19ListPullSplitNt.c +++ b/src/BenchKernelD3Q19ListPullSplitNt.c @@ -55,8 +55,8 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD Assert(kernelData != NULL); Assert(cd != NULL); - Assert(cd->Omega > 0.0); - Assert(cd->Omega < 2.0); + Assert(cd->Omega > F(0.0)); + Assert(cd->Omega < F(2.0)); KernelData * kd = (KernelData *)kernelData; KernelDataList * kdl = KDL(kernelData); @@ -65,16 +65,16 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD PdfT omega = cd->Omega; const PdfT omegaEven = omega; - PdfT magicParam = 1.0 / 12.0; - const PdfT omegaOdd = 1.0 / (0.5 + magicParam / (1.0 / omega - 0.5)); + PdfT magicParam = F(1.0) / F(12.0); + const PdfT omegaOdd = F(1.0) / (F(0.5) + magicParam / (F(1.0) / omega - F(0.5))); - const PdfT w_0 = 1.0 / 3.0; - const PdfT w_1 = 1.0 / 18.0; - const PdfT w_2 = 1.0 / 36.0; + const PdfT w_0 = F(1.0) / F( 3.0); + const PdfT w_1 = F(1.0) / F(18.0); + const PdfT w_2 = F(1.0) / F(36.0); - const PdfT w_1_x3 = w_1 * 3.0; const PdfT w_1_nine_half = w_1 * 9.0 / 2.0; - const PdfT w_2_x3 = w_2 * 3.0; const PdfT w_2_nine_half = w_2 * 9.0 / 2.0; + const PdfT w_1_x3 = w_1 * F(3.0); const PdfT w_1_nine_half = w_1 * F(9.0) / F(2.0); + const PdfT w_2_x3 = w_2 * F(3.0); const PdfT w_2_nine_half = w_2 * F(9.0) / F(2.0); const VPDFT vw_1_x3 = VSET(w_1_x3); const VPDFT vw_2_x3 = VSET(w_2_x3); @@ -85,7 +85,7 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD const VPDFT vomegaEven = VSET(omegaEven); const VPDFT vomegaOdd = VSET(omegaOdd); - const VPDFT voneHalf = VSET(0.5); + const VPDFT voneHalf = VSET(F(0.5)); // uint32_t nConsecNodes = kdlr->nConsecNodes; // uint32_t * consecNodes = kdlr->ConsecNodes; @@ -119,8 +119,10 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD KernelStatistics(kd, ld, cd, 0); #endif + X_KERNEL_START(kernelData); + + X_LIKWID_START("list-pull-split-nt-1s"); - X_LIKWID_START("list-pull-split-nt-1s"); #ifdef _OPENMP #pragma omp parallel default(none) \ shared(nFluid, nCells, kd, kdl, adjList, src, dst, \ @@ -244,6 +246,8 @@ void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, KernelData * kernelData, CaseD X_LIKWID_STOP("list-pull-split-nt-1s"); + X_KERNEL_END(kernelData); + #ifdef VTK_OUTPUT if (cd->VtkOutput) { kd->PdfsActive = src; @@ -266,8 +270,8 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD Assert(kernelData != NULL); Assert(cd != NULL); - Assert(cd->Omega > 0.0); - Assert(cd->Omega < 2.0); + Assert(cd->Omega > F(0.0)); + Assert(cd->Omega < F(2.0)); KernelData * kd = (KernelData *)kernelData; KernelDataList * kdl = KDL(kernelData); @@ -276,16 +280,15 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD PdfT omega = cd->Omega; const PdfT omegaEven = omega; - PdfT magicParam = 1.0 / 12.0; - const PdfT omegaOdd = 1.0 / (0.5 + magicParam / (1.0 / omega - 0.5)); - + PdfT magicParam = F(1.0) / F(12.0); + const PdfT omegaOdd = F(1.0) / (F(0.5) + magicParam / (F(1.0) / omega - F(0.5))); - const PdfT w_0 = 1.0 / 3.0; - const PdfT w_1 = 1.0 / 18.0; - const PdfT w_2 = 1.0 / 36.0; + const PdfT w_0 = F(1.0) / F( 3.0); + const PdfT w_1 = F(1.0) / F(18.0); + const PdfT w_2 = F(1.0) / F(36.0); - const PdfT w_1_x3 = w_1 * 3.0; const PdfT w_1_nine_half = w_1 * 9.0 / 2.0; - const PdfT w_2_x3 = w_2 * 3.0; const PdfT w_2_nine_half = w_2 * 9.0 / 2.0; + const PdfT w_1_x3 = w_1 * F(3.0); const PdfT w_1_nine_half = w_1 * F(9.0) / F(2.0); + const PdfT w_2_x3 = w_2 * F(3.0); const PdfT w_2_nine_half = w_2 * F(9.0) / F(2.0); const VPDFT vw_1_x3 = VSET(w_1_x3); const VPDFT vw_2_x3 = VSET(w_2_x3); @@ -296,7 +299,7 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD const VPDFT vomegaEven = VSET(omegaEven); const VPDFT vomegaOdd = VSET(omegaOdd); - const VPDFT voneHalf = VSET(0.5); + const VPDFT voneHalf = VSET(F(0.5)); // uint32_t nConsecNodes = kdlr->nConsecNodes; // uint32_t * consecNodes = kdlr->ConsecNodes; @@ -331,7 +334,9 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD #endif - X_LIKWID_START("list-pull-split-nt-2s"); + X_KERNEL_START(kernelData); + + X_LIKWID_START("list-pull-split-nt-2s"); #ifdef _OPENMP @@ -452,7 +457,9 @@ void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, KernelData * kernelData, CaseD MemFree((void **)&tmpArray); } - X_LIKWID_STOP("list-pull-split-nt-2s"); + X_LIKWID_STOP("list-pull-split-nt-2s"); + + X_KERNEL_END(kernelData); #ifdef VTK_OUTPUT if (cd->VtkOutput) {