1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
13 // LbmBenchKernels is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
18 // LbmBenchKernels is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
23 // You should have received a copy of the GNU General Public License
24 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
26 // --------------------------------------------------------------------------
27 #include "BenchKernelD3Q19Common.h"
36 // Forward definition.
37 void FNAME(D3Q19Kernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
39 void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
43 static void FNAME(BcGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
46 Assert(kd->PdfsActive != NULL);
47 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
53 Assert(x < kd->Dims[0]);
54 Assert(y < kd->Dims[1]);
55 Assert(z < kd->Dims[2]);
57 Assert(dir < N_D3Q19);
59 int oX = kd->Offsets[0];
60 int oY = kd->Offsets[1];
61 int oZ = kd->Offsets[2];
63 #ifdef PROP_MODEL_PUSH
68 int nx = x - D3Q19_X[dir];
69 int ny = y - D3Q19_Y[dir];
70 int nz = z - D3Q19_Z[dir];
73 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
74 *pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)];
80 static void FNAME(BcSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
83 Assert(kd->PdfsActive != NULL);
84 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
88 Assert(x < kd->Dims[0]);
89 Assert(y < kd->Dims[1]);
90 Assert(z < kd->Dims[2]);
92 Assert(dir < N_D3Q19);
94 int oX = kd->Offsets[0];
95 int oY = kd->Offsets[1];
96 int oZ = kd->Offsets[2];
98 #ifdef PROP_MODEL_PUSH
102 #elif PROP_MODEL_PULL
103 int nx = x - D3Q19_X[dir];
104 int ny = y - D3Q19_Y[dir];
105 int nz = z - D3Q19_Z[dir];
108 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
109 kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)] = pdf;
117 static void FNAME(GetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
120 Assert(kd->PdfsActive != NULL);
121 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
122 Assert(pdfs != NULL);
126 Assert(x < kd->Dims[0]);
127 Assert(y < kd->Dims[1]);
128 Assert(z < kd->Dims[2]);
130 int oX = kd->Offsets[0];
131 int oY = kd->Offsets[1];
132 int oZ = kd->Offsets[2];
135 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
136 #ifdef PROP_MODEL_PUSH
137 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)];
138 #elif PROP_MODEL_PULL
139 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)];
147 for (int d = 0; d < 19; ++d) {
148 if (isnan(pdfs[d])) {
149 printf("%d %d %d %d nan! get node\n", x, y, z, d);
151 for (int d2 = 0; d2 < 19; ++d2) {
152 printf("%d: %e\n", d2, pdfs[d2]);
165 static void FNAME(SetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
168 Assert(kd->PdfsActive != NULL);
169 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
170 Assert(pdfs != NULL);
175 Assert(x < kd->Dims[0]);
176 Assert(y < kd->Dims[1]);
177 Assert(z < kd->Dims[2]);
179 int oX = kd->Offsets[0];
180 int oY = kd->Offsets[1];
181 int oZ = kd->Offsets[2];
183 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
184 #ifdef PROP_MODEL_PUSH
185 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)] = pdfs[idx];
186 #elif PROP_MODEL_PULL
187 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)] = pdfs[idx];
197 static void ParameterUsage()
199 printf("Kernel parameters:\n");
200 printf(" [-blk <n>] [-blk-[xyz] <n>]\n");
205 static void ParseParameters(Parameters * params, int * blk)
209 blk[0] = 0; blk[1] = 0; blk[2] = 0;
211 #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param))
212 #define NEXT_ARG_PRESENT() \
214 if (i + 1 >= params->nKernelArgs) { \
215 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
221 for (int i = 0; i < params->nKernelArgs; ++i) {
222 if (ARG_IS("-blk") || ARG_IS("--blk")) {
225 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
228 printf("ERROR: blocking parameter must be > 0.\n");
232 blk[0] = blk[1] = blk[2] = tmp;
234 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
237 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
240 printf("ERROR: blocking parameter must be > 0.\n");
246 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
249 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
252 printf("ERROR: blocking parameter must be > 0.\n");
258 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
261 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
264 printf("ERROR: blocking parameter must be > 0.\n");
270 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
275 printf("ERROR: unknown kernel parameter.\n");
282 #undef NEXT_ARG_PRESENT
288 void FNAME(D3Q19BlkInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
290 KernelDataEx * kdex = NULL;
291 MemAlloc((void **)&kdex, sizeof(KernelDataEx));
293 kdex->Blk[0] = 0; kdex->Blk[1] = 0; kdex->Blk[2] = 0;
295 KernelData * kd = &kdex->kd;
298 kd->nObstIndices = ld->nObst;
300 // Ajust the dimensions according to padding, if used.
301 kd->Dims[0] = ld->Dims[0];
302 kd->Dims[1] = ld->Dims[1];
303 kd->Dims[2] = ld->Dims[2];
306 int * lDims = ld->Dims;
307 int * gDims = kd->GlobalDims;
309 gDims[0] = lDims[0] + 2;
310 gDims[1] = lDims[1] + 2;
311 gDims[2] = lDims[2] + 2;
325 int oX = kd->Offsets[0];
326 int oY = kd->Offsets[1];
327 int oZ = kd->Offsets[2];
331 int nCells = gX * gY * gZ;
335 ParseParameters(params, blk);
337 if (blk[0] == 0) blk[0] = gX;
338 if (blk[1] == 0) blk[1] = gY;
339 if (blk[2] == 0) blk[2] = gZ;
341 printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
344 kdex->Blk[0] = blk[0]; kdex->Blk[1] = blk[1]; kdex->Blk[2] = blk[2];
347 printf("# allocating data for %d LB nodes with padding (%lu bytes = %f MiB for both lattices)\n",
348 nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19,
349 2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
351 MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19);
352 MemAlloc((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19);
354 kd->Pdfs[0] = pdfs[0];
355 kd->Pdfs[1] = pdfs[1];
357 // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
358 // This depends on the chosen data layout.
359 // The structure of the loop should resemble the same "execution layout"
362 #pragma omp parallel for collapse(3)
365 for (int bZ = 0; bZ < gZ; bZ += blk[2]) {
366 for (int bY = 0; bY < gY; bY += blk[1]) {
367 for (int bX = 0; bX < gX; bX += blk[0]) {
369 // Must do everything here, else it would break collapse.
370 int eZ = MIN(bZ + blk[2], gZ);
371 int eY = MIN(bY + blk[1], gY);
372 int eX = MIN(bX + blk[0], gX);
374 for (int z = bZ; z < eZ; ++z) {
375 for (int y = bY; y < eY; ++y) {
376 for (int x = bX; x < eX; ++x) {
378 for (int d = 0; d < N_D3Q19; ++d) {
379 pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
380 pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
390 // Initialize all PDFs to some standard value.
391 for (int z = 0; z < gZ; ++z) {
392 for (int y = 0; y < gY; ++y) {
393 for (int x = 0; x < gX; ++x) {
394 for (int d = 0; d < N_D3Q19; ++d) {
395 pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 0.0;
396 pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 0.0;
403 // Count how many *PDFs* need bounce back treatment.
405 uint64_t nPdfs = ((uint64_t)19) * gX * gY * gZ;
407 if (nPdfs > ((2LU << 31) - 1)) {
408 printf("ERROR: number of PDFs exceed 2^31.\n");
412 // Compiler bug? Incorrect computation of nBounceBackPdfs when using icc 15.0.2.
413 // Works when declaring nBounceBackPdfs as int64_t or using volatile.
414 volatile int nBounceBackPdfs = 0;
415 // int64_t nBounceBackPdfs = 0;
416 int nx, ny, nz, px, py, pz;
418 // TODO: apply blocking?
420 for (int z = 0; z < lZ; ++z) {
421 for (int y = 0; y < lY; ++y) {
422 for (int x = 0; x < lX; ++x) {
424 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
425 for (int d = 0; d < N_D3Q19; ++d) {
426 #ifdef PROP_MODEL_PUSH
430 #elif PROP_MODEL_PULL
435 #error PROP_MODEL_NAME unknown.
437 // Check if neighbor is inside the lattice.
438 // if(nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
441 if ((nx < 0 || nx >= lX) && ld->PeriodicX) {
442 ++nBounceBackPdfs; // Compiler bug --> see above
444 else if ((ny < 0 || ny >= lY) && ld->PeriodicY) {
445 ++nBounceBackPdfs; // Compiler bug --> see above
447 else if ((nz < 0 || nz >= lZ) && ld->PeriodicZ) {
448 ++nBounceBackPdfs; // Compiler bug --> see above
450 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
453 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
454 ++nBounceBackPdfs; // Compiler bug --> see above
463 printf("# allocating %d indices for bounce back pdfs (%s for source and destination array)\n", nBounceBackPdfs, ByteToHuman(sizeof(int) * nBounceBackPdfs * 2));
465 MemAlloc((void **) & (kd->BounceBackPdfsSrc), sizeof(int) * nBounceBackPdfs + 100);
466 MemAlloc((void **) & (kd->BounceBackPdfsDst), sizeof(int) * nBounceBackPdfs + 100);
468 kd->nBounceBackPdfs = nBounceBackPdfs;
474 for (int z = 0; z < lZ; ++z) {
475 for (int y = 0; y < lY; ++y) {
476 for (int x = 0; x < lX; ++x) {
478 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
479 for (int d = 0; d < N_D3Q19; ++d) {
480 #ifdef PROP_MODEL_PUSH
484 #elif PROP_MODEL_PULL
489 #error PROP_MODEL_NAME unknown.
492 if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
493 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
494 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
496 // Implement periodic boundary in X direction.
498 // If the target node reached through propagation is outside the lattice
499 // the kernel stores it in some buffer around the domain.
500 // From this position the PDF must be transported to the other side of the
503 // Take PDF from outside the domain.
535 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
536 #ifdef PROP_MODEL_PUSH
537 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
538 dstIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]);
539 #elif PROP_MODEL_PULL
540 srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]);
541 dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
546 #ifdef PROP_MODEL_PUSH
547 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
548 // Put it on the other side back into the domain.
549 dstIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d);
550 #elif PROP_MODEL_PULL
551 srcIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d);
552 // Put it on the other side back into the ghost layer.
553 dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
556 VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
560 kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
561 kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
566 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
569 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
570 #ifdef PROP_MODEL_PUSH
571 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
572 dstIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]);
573 #elif PROP_MODEL_PULL
574 srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]);
575 dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
576 // srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, d);
577 // dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, D3Q19_INV[d]);
580 VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
582 kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
583 kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
594 // Fill remaining KernelData structures
595 kd->GetNode = FNAME(GetNode);
596 kd->SetNode = FNAME(SetNode);
598 kd->BoundaryConditionsGetPdf = FNAME(BcGetPdf);
599 kd->BoundaryConditionsSetPdf = FNAME(BcSetPdf);
601 kd->Kernel = FNAME(D3Q19BlkKernel);
604 kd->PdfsActive = kd->Pdfs[0];
609 void FNAME(D3Q19BlkDeinit)(LatticeDesc * ld, KernelData ** kernelData)
611 MemFree((void **) & ((*kernelData)->Pdfs[0]));
612 MemFree((void **) & ((*kernelData)->Pdfs[1]));
614 MemFree((void **) & ((*kernelData)->BounceBackPdfsSrc));
615 MemFree((void **) & ((*kernelData)->BounceBackPdfsDst));
617 MemFree((void **)kernelData);
622 // Kernels without blocking perform the same initialization/deinitialization as with
623 // blocking, except that a different kernel is called. Hence, no arguments are allowed.
625 void FNAME(D3Q19Init)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
629 if (params->nKernelArgs != 0) {
630 printf("ERROR: unknown kernel parameter.\n");
631 printf("This kernels accepts no parameters.\n");
635 // Setup an empty parameters structure.
636 p.nArgs = params->nArgs;
637 p.Args = params->Args;
641 // Call init routine for blocking kernel and override the
642 // kernel function to be called later on.
643 FNAME(D3Q19BlkInit)(ld, kernelData, &p);
645 (*kernelData)->Kernel = FNAME(D3Q19Kernel);
651 void FNAME(D3Q19Deinit)(LatticeDesc * ld, KernelData ** kernelData)
653 FNAME(D3Q19BlkDeinit)(ld, kernelData);