add acknowledgements in doc and README
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListCommon.c
... / ...
CommitLineData
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19ListCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
31
32#include <math.h>
33
34
35// Forward definition.
36void FNAME(D3Q19ListKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
37
38
39
40
41// -----------------------------------------------------------------------
42// Functions which are used as callback by the kernel to read or write
43// PDFs and nodes.
44
45static void FNAME(BCGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
46{
47 Assert(kd != NULL);
48 Assert(kd->PdfsActive != NULL);
49 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
50 Assert(pdf != NULL);
51
52 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
53 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
54 Assert(dir >= 0); Assert(dir < N_D3Q19);
55
56#if 0
57 *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
58#else
59#ifdef PROP_MODEL_PUSH
60 *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
61#elif PROP_MODEL_PULL
62
63
64 // The relevant PDFs here are the ones, which will get streamed in later
65 // during propagation. So we must return the *remote* PDFs.
66 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
67
68 if (dir != D3Q19_C) {
69
70 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
71
72 *pdf = kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]];
73 }
74 else {
75 *pdf = kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)];
76
77 }
78#endif
79#endif
80
81 return;
82}
83
84static void FNAME(BCSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
85{
86 Assert(kd != NULL);
87 Assert(kd->PdfsActive != NULL);
88 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
89 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
90 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
91 Assert(dir >= 0); Assert(dir < N_D3Q19);
92
93#if 0
94 if (isnan(pdf)) {
95 printf("ERROR: setting nan %d %d %d %d %s\n", x, y, z, dir, D3Q19_NAMES[dir]);
96 DEBUG_BREAK_POINT();
97 exit(1);
98 }
99#endif
100
101#if 0
102 kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
103#else
104#ifdef PROP_MODEL_PUSH
105 kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
106#elif PROP_MODEL_PULL
107
108 // The relevant PDFs here are the ones, which will get streamed in later
109 // during propagation. So we must set this *remote* PDFs.
110 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
111
112 if (dir != D3Q19_C) {
113
114 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
115
116 kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]] = pdf;
117 }
118 else {
119 kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)] = pdf;
120
121 }
122#endif
123#endif
124
125 return;
126}
127
128
129static void GetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
130{
131 Assert(kd != NULL);
132 Assert(kd->PdfsActive != NULL);
133 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
134 Assert(pdfs != NULL);
135 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
136 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
137
138 PdfT sum = 0.0;
139
140 // TODO: pull scheme?
141 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
142 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x, y, z, idx)]; sum += pdfs[idx];
143 D3Q19_LIST
144 #undef X
145 #undef I
146
147 // if (sum < 0.0) {
148 // printf("%d %d %d negative density \n", x, y, z);
149 // exit(1);
150 // }
151
152#if 0
153 for (int d = 0; d < 19; ++d) {
154 if(isnan(pdfs[d]) || isinf(pdfs[d])) {
155 printf("%d %d %d %d nan! get node\n", x, y, z, d);
156 for (int d2 = 0; d2 < 19; ++d2) {
157 printf("%d: %e\n", d2, pdfs[d2]);
158 }
159 exit(1);
160 }
161 }
162#endif
163 return;
164}
165
166
167static void SetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
168{
169 Assert(kd != NULL);
170 Assert(kd->PdfsActive != NULL);
171 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
172 Assert(pdfs != NULL);
173
174 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
175 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
176
177#if 0
178 for (int d = 0; d < 19; ++d) {
179 if(isnan(pdfs[d])) {
180 printf("%d %d %d %d nan! get node\n", x, y, z, d);
181 for (int d2 = 0; d2 < 19; ++d2) {
182 printf("%d: %e\n", d2, pdfs[d2]);
183 }
184 exit(1);
185 }
186 }
187#endif
188
189 // TODO: pull scheme?
190 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
191 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x, y, z, idx)] = pdfs[idx];
192 D3Q19_LIST
193 #undef X
194 #undef I
195
196 return;
197}
198
199static void ParameterUsage()
200{
201 printf("Kernel parameters:\n");
202 printf(" [-blk <n>] [-blk-[xyz] <n>]\n");
203
204 return;
205}
206
207static void ParseParameters(Parameters * params, int * blk)
208{
209 Assert(blk != NULL);
210
211 blk[0] = 0; blk[1] = 0; blk[2] = 0;
212
213 #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param))
214 #define NEXT_ARG_PRESENT() \
215 do { \
216 if (i + 1 >= params->nKernelArgs) { \
217 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
218 exit(1); \
219 } \
220 } while (0)
221
222
223 for (int i = 0; i < params->nKernelArgs; ++i) {
224 if (ARG_IS("-blk") || ARG_IS("--blk")) {
225 NEXT_ARG_PRESENT();
226
227 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
228
229 if (tmp <= 0) {
230 printf("ERROR: blocking parameter must be > 0.\n");
231 exit(1);
232 }
233
234 blk[0] = blk[1] = blk[2] = tmp;
235 }
236 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
237 NEXT_ARG_PRESENT();
238
239 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
240
241 if (tmp <= 0) {
242 printf("ERROR: blocking parameter must be > 0.\n");
243 exit(1);
244 }
245
246 blk[0] = tmp;
247 }
248 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
249 NEXT_ARG_PRESENT();
250
251 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
252
253 if (tmp <= 0) {
254 printf("ERROR: blocking parameter must be > 0.\n");
255 exit(1);
256 }
257
258 blk[1] = tmp;
259 }
260 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
261 NEXT_ARG_PRESENT();
262
263 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
264
265 if (tmp <= 0) {
266 printf("ERROR: blocking parameter must be > 0.\n");
267 exit(1);
268 }
269
270 blk[2] = tmp;
271 }
272 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
273 ParameterUsage();
274 exit(1);
275 }
276 else {
277 printf("ERROR: unknown kernel parameter.\n");
278 ParameterUsage();
279 exit(1);
280 }
281 }
282
283 #undef ARG_IS
284 #undef NEXT_ARG_PRESENT
285
286 return;
287}
288
289void FNAME(D3Q19ListInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
290{
291 KernelData * kd;
292 KernelDataList * kdl;
293 MemAlloc((void **)&kdl, sizeof(KernelDataList));
294
295 kd = (KernelData *)kdl;
296 *kernelData = kd;
297
298#ifdef DEBUG
299 kd->Pdfs[0] = NULL;
300 kd->Pdfs[1] = NULL;
301 kd->PdfsActive = NULL;
302 kd->DstPdfs = NULL;
303 kd->SrcPdfs = NULL;
304 kd->Dims[0] = -1;
305 kd->Dims[1] = -1;
306 kd->Dims[2] = -1;
307 kd->GlobalDims[0] = -1;
308 kd->GlobalDims[1] = -1;
309 kd->GlobalDims[2] = -1;
310 kd->Offsets[0] = -1;
311 kd->Offsets[1] = -1;
312 kd->Offsets[2] = -1;
313
314 kd->ObstIndices = NULL;
315 kd->nObstIndices = -1;
316 kd->BounceBackPdfsSrc = NULL;
317 kd->BounceBackPdfsDst = NULL;
318 kd->nBounceBackPdfs = -1;
319
320 kdl->AdjList = NULL;
321 kdl->Coords = NULL;
322 kdl->Grid = NULL;
323 kdl->nCells = -1;
324 kdl->nFluid = -1;
325#endif
326
327 // Ajust the dimensions according to padding, if used.
328 kd->Dims[0] = kd->GlobalDims[0] = ld->Dims[0];
329 kd->Dims[1] = kd->GlobalDims[1] = ld->Dims[1];
330 kd->Dims[2] = kd->GlobalDims[2] = ld->Dims[2];
331
332 int * lDims = ld->Dims;
333
334 int lX = lDims[0];
335 int lY = lDims[1];
336 int lZ = lDims[2];
337
338 int nTotalCells = lX * lY * lZ;
339 int nCells = ld->nFluid; // TODO: + padding
340 int nFluid = ld->nFluid;
341
342 kdl->nCells = nCells;
343 kdl->nFluid = nFluid;
344
345 PdfT * pdfs[2];
346
347 int blk[3] = { 0 };
348
349 ParseParameters(params, blk);
350
351 if (blk[0] == 0) blk[0] = lX;
352 if (blk[1] == 0) blk[1] = lY;
353 if (blk[2] == 0) blk[2] = lZ;
354
355 printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
356
357 printf("# allocating data for %d fluid LB nodes with padding (%lu bytes = %f MiB for both lattices)\n",
358 nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19,
359 2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
360
361 MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19);
362 MemAlloc((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19);
363
364 kd->Pdfs[0] = pdfs[0];
365 kd->Pdfs[1] = pdfs[1];
366
367 // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
368 // Here we touch only the fluid nodes as this loop is OpenMP parallel and
369 // we want the same scheduling as in the kernel.
370 #ifdef _OPENMP
371 #pragma omp parallel for
372 #endif
373 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
374 pdfs[0][P_INDEX_3(nCells, i, d)] = 1.0;
375 pdfs[1][P_INDEX_3(nCells, i, d)] = 1.0;
376 } }
377
378 // Initialize all PDFs to some standard value.
379 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
380 pdfs[0][P_INDEX_3(nCells, i, d)] = 0.0;
381 pdfs[1][P_INDEX_3(nCells, i, d)] = 0.0;
382 } }
383
384 // ----------------------------------------------------------------------
385 // create grid which will hold the index numbers of the fluid nodes
386
387 uint32_t * grid;
388
389 if (MemAlloc((void **)&grid, nTotalCells * sizeof(uint32_t))) {
390 printf("ERROR: allocating grid for numbering failed: %lu bytes.\n", nTotalCells * sizeof(uint32_t));
391 exit(1);
392 }
393 kdl->Grid = grid;
394
395 int latticeIndex;
396
397#ifdef DEBUG
398 for(int z = 0; z < lZ; ++z) {
399 for(int y = 0; y < lY; ++y) {
400 for(int x = 0; x < lX; ++x) {
401
402 latticeIndex = L_INDEX_4(ld->Dims, x, y, z);
403
404 grid[latticeIndex] = ~0;
405 }
406 }
407 }
408#endif
409
410 // ----------------------------------------------------------------------
411 // generate numbering over grid
412
413 uint32_t * coords;
414
415 if (MemAlloc((void **)&coords, nFluid * sizeof(uint32_t) * 3)) {
416 printf("ERROR: allocating coords array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * 3);
417 exit(1);
418 }
419
420 kdl->Coords = coords;
421
422 // Index for the PDF nodes can start at 0 as we distinguish solid and fluid nodes
423 // through the ld->Lattice array.
424 int counter = 0;
425
426 // Blocking is implemented via setup of the adjacency list. The kernel later will
427 // walk through the lattice blocked automatically.
428 for (int bZ = 0; bZ < lZ; bZ += blk[2]) {
429 for (int bY = 0; bY < lY; bY += blk[1]) {
430 for (int bX = 0; bX < lX; bX += blk[0]) {
431
432 int eX = MIN(bX + blk[0], lX);
433 int eY = MIN(bY + blk[1], lY);
434 int eZ = MIN(bZ + blk[2], lZ);
435
436
437 for (int z = bZ; z < eZ; ++z) {
438 for (int y = bY; y < eY; ++y) {
439 for (int x = bX; x < eX; ++x) {
440
441 latticeIndex = L_INDEX_4(lDims, x, y, z);
442
443 if (ld->Lattice[latticeIndex] != LAT_CELL_OBSTACLE) {
444 grid[latticeIndex] = counter;
445
446 coords[C_INDEX_X(counter)] = x;
447 coords[C_INDEX_Y(counter)] = y;
448 coords[C_INDEX_Z(counter)] = z;
449
450 ++counter;
451 }
452 } } }
453 } } }
454
455 Verify(counter == nFluid);
456
457 uint32_t * adjList;
458
459 // AdjList only requires 18 instead of 19 entries per node, as
460 // the center PDF needs no addressing.
461 if (MemAlloc((void **)&adjList, nFluid * sizeof(uint32_t) * N_D3Q19_IDX)) {
462 printf("ERROR: allocating adjList array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * N_D3Q19_IDX);
463 exit(1);
464 }
465
466 kdl->AdjList = adjList;
467
468 int x, y, z;
469
470 uint32_t neighborIndex;
471 uint32_t dstIndex;
472
473 int nx, ny, nz, px, py, pz;
474
475 // Loop over all fluid nodes and compute the indices to the neighboring
476 // PDFs for configured data layout (AoS/SoA).
477 // TODO: Parallelized loop to ensure correct NUMA placement.
478 // #ifdef _OPENMP --> add line continuation
479 // #pragma omp parallel for default(none)
480 // shared(nFluid, nCells, coords, D3Q19_INV, D3Q19_X, D3Q19_Y, D3Q19_Z,
481 // stderr,
482 // lDims, grid, ld, lX, lY, lZ, adjList)
483 // private(x, y, z, nx, ny, nz, neighborIndex, dstIndex)
484 // #endif
485 for (int index = 0; index < nFluid; ++index) {
486 x = coords[C_INDEX_X(index)];
487 y = coords[C_INDEX_Y(index)];
488 z = coords[C_INDEX_Z(index)];
489
490 Assert(x >= 0 && x < lX);
491 Assert(y >= 0 && y < lY);
492 Assert(z >= 0 && z < lZ);
493
494 Assert(ld->Lattice[L_INDEX_4(lDims, x, y, z)] != LAT_CELL_OBSTACLE);
495
496 // Loop over all directions except the center one.
497 for(int d = 0; d < N_D3Q19 - 1; ++d) {
498 Assert(d != D3Q19_C);
499#ifdef PROP_MODEL_PUSH
500 nx = x + D3Q19_X[d];
501 ny = y + D3Q19_Y[d];
502 nz = z + D3Q19_Z[d];
503#elif PROP_MODEL_PULL
504 nx = x - D3Q19_X[d];
505 ny = y - D3Q19_Y[d];
506 nz = z - D3Q19_Z[d];
507#else
508 #error No implementation for this PROP_MODEL_NAME.
509#endif
510 // If the neighbor is outside the latcie in X direction and we have a
511 // periodic boundary then we need to wrap around.
512 if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
513 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
514 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
515 ){
516 // x periodic
517
518 if (nx < 0) {
519 px = lX - 1;
520 }
521 else if (nx >= lX) {
522 px = 0;
523 } else {
524 px = nx;
525 }
526 // y periodic
527 if (ny < 0) {
528 py = lY - 1;
529 }
530 else if (ny >= lY) {
531 py = 0;
532 } else {
533 py = ny;
534 }
535
536 // z periodic
537 if (nz < 0) {
538 pz = lZ - 1;
539 }
540 else if (nz >= lZ) {
541 pz = 0;
542 } else {
543 pz = nz;
544 }
545
546 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
547 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
548 }
549 else {
550 neighborIndex = grid[L_INDEX_4(lDims, px, py, pz)];
551
552 AssertMsg(neighborIndex != ~0, "Neighbor has no Index. (%d %d %d) direction %s (%d)\n", px, py, pz, D3Q19_NAMES[d], d);
553
554 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
555 }
556 }
557 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
558 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
559 }
560 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
561 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
562 }
563 else {
564 neighborIndex = grid[L_INDEX_4(lDims, nx, ny, nz)];
565
566 Assert(neighborIndex != ~0);
567
568 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
569 }
570
571 Assert(dstIndex >= 0);
572 Assert(dstIndex < nCells * N_D3Q19);
573
574 adjList[index * N_D3Q19_IDX + d] = dstIndex;
575 }
576 }
577
578
579 // Fill remaining KernelData structures
580 kd->GetNode = GetNode;
581 kd->SetNode = SetNode;
582
583 kd->BoundaryConditionsGetPdf = FNAME(BCGetPdf);
584 kd->BoundaryConditionsSetPdf = FNAME(BCSetPdf);
585
586 kd->Kernel = FNAME(D3Q19ListKernel);
587
588 kd->DstPdfs = NULL;
589 kd->PdfsActive = kd->Pdfs[0];
590
591 return;
592}
593
594void FNAME(D3Q19ListDeinit)(LatticeDesc * ld, KernelData ** kernelData)
595{
596 KernelDataList ** kdl = (KernelDataList **)kernelData;
597
598 MemFree((void **)&((*kernelData)->Pdfs[0]));
599 MemFree((void **)&((*kernelData)->Pdfs[1]));
600
601 MemFree((void **)&((*kdl)->AdjList));
602 MemFree((void **)&((*kdl)->Coords));
603 MemFree((void **)&((*kdl)->Grid));
604
605 MemFree((void **)kernelData);
606
607 return;
608}
609
This page took 0.172438 seconds and 5 git commands to generate.