version 0.1
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListAaCommon.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19ListAaCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
31
32#include <math.h>
33
34
35// Forward definition.
36void FNAME(D3Q19ListAaKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
37
38
39
40
41// -----------------------------------------------------------------------
42// Functions which are used as callback by the kernel to read or write
43// PDFs and nodes.
44
45static void FNAME(BCGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
46{
47 Assert(kd != NULL);
48 Assert(kd->PdfsActive != NULL);
49 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
50 Assert(pdf != NULL);
51
52 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
53 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
54 Assert(dir >= 0); Assert(dir < N_D3Q19);
55
56 KernelDataList * kdl = (KernelDataList *)kd;
57
58 if (kdl->Iteration % 2 == 0) {
59 // Pdfs are stored inverse, local PDFs are located in remote nodes
60
61 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
62
63 if (dir != D3Q19_C) {
64 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
65
66 *pdf = kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + D3Q19_INV[dir]]];
67 }
68 else {
69 *pdf = kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)];
70 }
71
72 }
73 else {
74 *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
75 }
76
77
78 return;
79}
80
81static void FNAME(BCSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
82{
83 Assert(kd != NULL);
84 Assert(kd->PdfsActive != NULL);
85 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
86 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
87 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
88 Assert(dir >= 0); Assert(dir < N_D3Q19);
89
90 if (isnan(pdf)) {
91 printf("ERROR: setting nan %d %d %d %d %s\n", x, y, z, dir, D3Q19_NAMES[dir]);
92 DEBUG_BREAK_POINT();
93 exit(1);
94 }
95
96 KernelDataList * kdl = (KernelDataList *)kd;
97
98 if (kdl->Iteration % 2 == 0) {
99 // Pdfs are stored inverse, local PDFs are located in remote nodes
100
101 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
102
103 if (dir != D3Q19_C) {
104 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
105
106 kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + D3Q19_INV[dir]]] = pdf;
107 }
108 else {
109 kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)] = pdf;
110 }
111
112 }
113 else {
114 kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
115 }
116
117 return;
118}
119
120
121static void GetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
122{
123 Assert(kd != NULL);
124 Assert(kd->PdfsActive != NULL);
125 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
126 Assert(pdfs != NULL);
127 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
128 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
129
130 KernelDataList * kdl = (KernelDataList *)kd;
131
132 if(kdl->Iteration % 2 == 0){
133
134 uint32_t nodeIndex = kdl->Grid[L_INDEX_4(kdl->kd.Dims, x, y, z)];
135 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
136
137 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
138 pdfs[D3Q19_C] = kd->PdfsActive[P_INDEX_3(kdl->nCells, nodeIndex, D3Q19_C)];
139
140 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[kdl->AdjList[adjListIndex + idxinv]];
141 D3Q19_LIST_WO_C
142 #undef X
143
144 } else {
145
146 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
147 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x, y, z, idx)];
148 D3Q19_LIST
149 #undef X
150 #undef I
151
152 }
153
154#if 0
155 // Detect NaNs
156 for (int d = 0; d < 19; ++d) {
157 if(isnan(pdfs[d]) || isinf(pdfs[d])) {
158 printf("%d %d %d %d nan! get node\n", x, y, z, d);
159 for (int d2 = 0; d2 < 19; ++d2) {
160 printf("%d: %e\n", d2, pdfs[d2]);
161 }
162 exit(1);
163 }
164 }
165#endif
166
167 return;
168}
169
170
171static void SetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
172{
173 Assert(kd != NULL);
174 Assert(kd->PdfsActive != NULL);
175 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
176 Assert(pdfs != NULL);
177
178 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
179 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
180
181#if 0
182 // Detect NaNs
183 for (int d = 0; d < 19; ++d) {
184 if(isnan(pdfs[d])) {
185 printf("%d %d %d %d nan! get node\n", x, y, z, d);
186 for (int d2 = 0; d2 < 19; ++d2) {
187 printf("%d: %e\n", d2, pdfs[d2]);
188 }
189 exit(1);
190 }
191 }
192#endif
193
194 KernelDataList * kdl = (KernelDataList *)kd;
195
196 if(kdl->Iteration % 2 == 0){
197
198 uint32_t nodeIndex = kdl->Grid[L_INDEX_4(kdl->kd.Dims, x, y, z)];
199 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
200
201 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
202 kd->PdfsActive[P_INDEX_3(kdl->nCells, nodeIndex, D3Q19_C)] = pdfs[D3Q19_C];
203
204 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[kdl->AdjList[adjListIndex + idxinv]] = pdfs[idx];
205 D3Q19_LIST_WO_C
206 #undef X
207
208 } else {
209
210 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
211 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x, y, z, idx)] = pdfs[idx];
212 D3Q19_LIST
213 #undef X
214 #undef I
215
216 }
217
218 return;
219}
220
221static void ParameterUsage()
222{
223 printf("Kernel parameters:\n");
224 printf(" [-blk <n>] [-blk-[xyz] <n>]\n");
225
226 return;
227}
228
229static void ParseParameters(Parameters * params, int * blk)
230{
231 Assert(blk != NULL);
232
233 blk[0] = 0; blk[1] = 0; blk[2] = 0;
234
235 #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param))
236 #define NEXT_ARG_PRESENT() \
237 do { \
238 if (i + 1 >= params->nKernelArgs) { \
239 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
240 exit(1); \
241 } \
242 } while (0)
243
244
245 for (int i = 0; i < params->nKernelArgs; ++i) {
246 if (ARG_IS("-blk") || ARG_IS("--blk")) {
247 NEXT_ARG_PRESENT();
248
249 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
250
251 if (tmp <= 0) {
252 printf("ERROR: blocking parameter must be > 0.\n");
253 exit(1);
254 }
255
256 blk[0] = blk[1] = blk[2] = tmp;
257 }
258 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
259 NEXT_ARG_PRESENT();
260
261 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
262
263 if (tmp <= 0) {
264 printf("ERROR: blocking parameter must be > 0.\n");
265 exit(1);
266 }
267
268 blk[0] = tmp;
269 }
270 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
271 NEXT_ARG_PRESENT();
272
273 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
274
275 if (tmp <= 0) {
276 printf("ERROR: blocking parameter must be > 0.\n");
277 exit(1);
278 }
279
280 blk[1] = tmp;
281 }
282 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
283 NEXT_ARG_PRESENT();
284
285 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
286
287 if (tmp <= 0) {
288 printf("ERROR: blocking parameter must be > 0.\n");
289 exit(1);
290 }
291
292 blk[2] = tmp;
293 }
294 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
295 ParameterUsage();
296 exit(1);
297 }
298 else {
299 printf("ERROR: unknown kernel parameter.\n");
300 ParameterUsage();
301 exit(1);
302 }
303 }
304
305 #undef ARG_IS
306 #undef NEXT_ARG_PRESENT
307
308 return;
309}
310
311void FNAME(D3Q19ListAaInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
312{
313 KernelData * kd;
314 KernelDataList * kdl;
315 MemAlloc((void **)&kdl, sizeof(KernelDataList));
316
317 kd = (KernelData *)kdl;
318 *kernelData = kd;
319
320#ifdef DEBUG
321 kd->Pdfs[0] = NULL;
322 kd->Pdfs[1] = NULL;
323 kd->PdfsActive = NULL;
324 kd->DstPdfs = NULL;
325 kd->SrcPdfs = NULL;
326 kd->Dims[0] = -1;
327 kd->Dims[1] = -1;
328 kd->Dims[2] = -1;
329 kd->GlobalDims[0] = -1;
330 kd->GlobalDims[1] = -1;
331 kd->GlobalDims[2] = -1;
332 kd->Offsets[0] = -1;
333 kd->Offsets[1] = -1;
334 kd->Offsets[2] = -1;
335
336 kd->ObstIndices = NULL;
337 kd->nObstIndices = -1;
338 kd->BounceBackPdfsSrc = NULL;
339 kd->BounceBackPdfsDst = NULL;
340 kd->nBounceBackPdfs = -1;
341
342 kdl->AdjList = NULL;
343 kdl->Coords = NULL;
344 kdl->Grid = NULL;
345 kdl->nCells = -1;
346 kdl->nFluid = -1;
347#endif
348
349 // Ajust the dimensions according to padding, if used.
350 kd->Dims[0] = kd->GlobalDims[0] = ld->Dims[0];
351 kd->Dims[1] = kd->GlobalDims[1] = ld->Dims[1];
352 kd->Dims[2] = kd->GlobalDims[2] = ld->Dims[2];
353
354 int * lDims = ld->Dims;
355
356 int lX = lDims[0];
357 int lY = lDims[1];
358 int lZ = lDims[2];
359
360 int nTotalCells = lX * lY * lZ;
361 int nCells = ld->nFluid; // TODO: + padding
362 int nFluid = ld->nFluid;
363
364 kdl->nCells = nCells;
365 kdl->nFluid = nFluid;
366
367 PdfT * pdfs[2];
368
369 int blk[3] = { 0 };
370
371 ParseParameters(params, blk);
372
373 if (blk[0] == 0) blk[0] = lX;
374 if (blk[1] == 0) blk[1] = lY;
375 if (blk[2] == 0) blk[2] = lZ;
376
377 printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
378
379 printf("# allocating data for %d fluid LB nodes with padding (%lu bytes = %f MiB for both lattices)\n",
380 nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19,
381 2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
382
383 MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19);
384
385 kd->Pdfs[0] = pdfs[0];
386
387 // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
388 // Here we touch only the fluid nodes as this loop is OpenMP parallel and
389 // we want the same scheduling as in the kernel.
390 #ifdef _OPENMP
391 #pragma omp parallel for
392 #endif
393 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
394 pdfs[0][P_INDEX_3(nCells, i, d)] = 1.0;
395 } }
396
397 // Initialize all PDFs to some standard value.
398 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
399 pdfs[0][P_INDEX_3(nCells, i, d)] = 0.0;
400 } }
401
402 // ----------------------------------------------------------------------
403 // create grid which will hold the index numbers of the fluid nodes
404
405 uint32_t * grid;
406
407 if (MemAlloc((void **)&grid, nTotalCells * sizeof(uint32_t))) {
408 printf("ERROR: allocating grid for numbering failed: %lu bytes.\n", nTotalCells * sizeof(uint32_t));
409 exit(1);
410 }
411 kdl->Grid = grid;
412
413 int latticeIndex;
414
415#ifdef DEBUG
416 for(int z = 0; z < lZ; ++z) {
417 for(int y = 0; y < lY; ++y) {
418 for(int x = 0; x < lX; ++x) {
419
420 latticeIndex = L_INDEX_4(ld->Dims, x, y, z);
421
422 grid[latticeIndex] = ~0;
423 }
424 }
425 }
426#endif
427
428 // ----------------------------------------------------------------------
429 // generate numbering over grid
430
431 uint32_t * coords;
432
433 if (MemAlloc((void **)&coords, nFluid * sizeof(uint32_t) * 3)) {
434 printf("ERROR: allocating coords array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * 3);
435 exit(1);
436 }
437
438 kdl->Coords = coords;
439
440 // Index for the PDF nodes can start at 0 as we distinguish solid and fluid nodes
441 // through the ld->Lattice array.
442 int counter = 0;
443
444 // Blocking is implemented via setup of the adjacency list. The kernel later will
445 // walk through the lattice blocked automatically.
446 for (int bZ = 0; bZ < lZ; bZ += blk[2]) {
447 for (int bY = 0; bY < lY; bY += blk[1]) {
448 for (int bX = 0; bX < lX; bX += blk[0]) {
449
450 int eX = MIN(bX + blk[0], lX);
451 int eY = MIN(bY + blk[1], lY);
452 int eZ = MIN(bZ + blk[2], lZ);
453
454
455 for (int z = bZ; z < eZ; ++z) {
456 for (int y = bY; y < eY; ++y) {
457 for (int x = bX; x < eX; ++x) {
458
459 latticeIndex = L_INDEX_4(lDims, x, y, z);
460
461 if (ld->Lattice[latticeIndex] != LAT_CELL_OBSTACLE) {
462 grid[latticeIndex] = counter;
463
464 coords[C_INDEX_X(counter)] = x;
465 coords[C_INDEX_Y(counter)] = y;
466 coords[C_INDEX_Z(counter)] = z;
467
468 ++counter;
469 }
470 } } }
471 } } }
472
473 Verify(counter == nFluid);
474
475 uint32_t * adjList;
476
477 // AdjList only requires 18 instead of 19 entries per node, as
478 // the center PDF needs no addressing.
479 if (MemAlloc((void **)&adjList, nFluid * sizeof(uint32_t) * N_D3Q19_IDX)) {
480 printf("ERROR: allocating adjList array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * N_D3Q19_IDX);
481 exit(1);
482 }
483
484 kdl->AdjList = adjList;
485
486 int x, y, z;
487
488 uint32_t neighborIndex;
489 uint32_t dstIndex;
490
491 int nx, ny, nz, px, py, pz;
492
493 // Loop over all fluid nodes and compute the indices to the neighboring
494 // PDFs for configure data layout (AoS/SoA).
495 // TODO: Parallelized loop to ensure correct NUMA placement.
496 // #ifdef _OPENMP --> add line continuation
497 // #pragma omp parallel for default(none)
498 // shared(nFluid, nCells, coords, D3Q19_INV, D3Q19_X, D3Q19_Y, D3Q19_Z,
499 // stderr,
500 // lDims, grid, ld, lX, lY, lZ, adjList)
501 // private(x, y, z, nx, ny, nz, neighborIndex, dstIndex)
502 // #endif
503 for (int index = 0; index < nFluid; ++index) {
504 x = coords[C_INDEX_X(index)];
505 y = coords[C_INDEX_Y(index)];
506 z = coords[C_INDEX_Z(index)];
507
508 Assert(x >= 0 && x < lX);
509 Assert(y >= 0 && y < lY);
510 Assert(z >= 0 && z < lZ);
511
512 Assert(ld->Lattice[L_INDEX_4(lDims, x, y, z)] != LAT_CELL_OBSTACLE);
513
514 // Loop over all directions except the center one.
515 for(int d = 0; d < N_D3Q19 - 1; ++d) {
516 Assert(d != D3Q19_C);
517
518#ifdef PROP_MODEL_PUSH
519 nx = x + D3Q19_X[d];
520 ny = y + D3Q19_Y[d];
521 nz = z + D3Q19_Z[d];
522
523#elif PROP_MODEL_PULL
524 nx = x - D3Q19_X[d];
525 ny = y - D3Q19_Y[d];
526 nz = z - D3Q19_Z[d];
527#else
528 #error No implementation for this PROP_MODEL_NAME.
529#endif
530 // If the neighbor is outside the latcie in X direction and we have a
531 // periodic boundary then we need to wrap around.
532 if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
533 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
534 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
535 ){
536 // x periodic
537
538 if (nx < 0) {
539 px = lX - 1;
540 }
541 else if (nx >= lX) {
542 px = 0;
543 } else {
544 px = nx;
545 }
546 // y periodic
547 if (ny < 0) {
548 py = lY - 1;
549 }
550 else if (ny >= lY) {
551 py = 0;
552 } else {
553 py = ny;
554 }
555
556 // z periodic
557 if (nz < 0) {
558 pz = lZ - 1;
559 }
560 else if (nz >= lZ) {
561 pz = 0;
562 } else {
563 pz = nz;
564 }
565
566 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
567 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
568 }
569 else {
570 neighborIndex = grid[L_INDEX_4(lDims, px, py, pz)];
571
572 AssertMsg(neighborIndex != ~0, "Neighbor has no Index. (%d %d %d) direction %s (%d)\n", px, py, pz, D3Q19_NAMES[d], d);
573
574 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
575 }
576 }
577 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
578 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
579 }
580 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
581 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
582 }
583 else {
584 neighborIndex = grid[L_INDEX_4(lDims, nx, ny, nz)];
585
586 Assert(neighborIndex != ~0);
587
588 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
589 }
590
591 Assert(dstIndex >= 0);
592 Assert(dstIndex < nCells * N_D3Q19);
593
594 adjList[index * N_D3Q19_IDX + d] = dstIndex;
595 }
596 }
597
598
599 // Fill remaining KernelData structures
600 kd->GetNode = GetNode;
601 kd->SetNode = SetNode;
602
603 kd->BoundaryConditionsGetPdf = FNAME(BCGetPdf);
604 kd->BoundaryConditionsSetPdf = FNAME(BCSetPdf);
605
606 kd->Kernel = FNAME(D3Q19ListAaKernel);
607
608 kd->DstPdfs = NULL;
609 kd->PdfsActive = kd->Pdfs[0];
610
611 return;
612}
613
614void FNAME(D3Q19ListAaDeinit)(LatticeDesc * ld, KernelData ** kernelData)
615{
616 KernelDataList ** kdl = (KernelDataList **)kernelData;
617
618 MemFree((void **)&((*kernelData)->Pdfs[0]));
619
620 MemFree((void **)&((*kdl)->AdjList));
621 MemFree((void **)&((*kdl)->Coords));
622 MemFree((void **)&((*kdl)->Grid));
623
624 MemFree((void **)kernelData);
625
626 return;
627}
628
This page took 0.100151 seconds and 5 git commands to generate.