version 0.1
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListCommon.c
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12 //
13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
14 //  it under the terms of the GNU General Public License as published by
15 //  the Free Software Foundation, either version 3 of the License, or
16 //  (at your option) any later version.
17 //
18 //  LbmBenchKernels is distributed in the hope that it will be useful,
19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //  GNU General Public License for more details.
22 //
23 //  You should have received a copy of the GNU General Public License
24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
25 //
26 // --------------------------------------------------------------------------
27 #include "BenchKernelD3Q19ListCommon.h"
28
29 #include "Memory.h"
30 #include "Vtk.h"
31
32 #include <math.h>
33
34
35 // Forward definition.
36 void FNAME(D3Q19ListKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
37
38
39
40
41 // -----------------------------------------------------------------------
42 // Functions which are used as callback by the kernel to read or write
43 // PDFs and nodes.
44
45 static void FNAME(BCGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
46 {
47         Assert(kd != NULL);
48         Assert(kd->PdfsActive != NULL);
49         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
50         Assert(pdf != NULL);
51
52         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
53         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
54         Assert(dir >= 0); Assert(dir < N_D3Q19);
55
56 #if 0
57         *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
58 #else
59 #ifdef PROP_MODEL_PUSH
60         *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
61 #elif PROP_MODEL_PULL
62
63
64         // The relevant PDFs here are the ones, which will get streamed in later
65         // during propagation. So we must return the *remote* PDFs.
66         uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
67
68         if (dir != D3Q19_C) {
69
70                 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
71
72                 *pdf = kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]];
73         }
74         else {
75                 *pdf = kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)];
76
77         }
78 #endif
79 #endif
80
81         return;
82 }
83
84 static void FNAME(BCSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
85 {
86         Assert(kd != NULL);
87         Assert(kd->PdfsActive != NULL);
88         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
89         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
90         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
91         Assert(dir >= 0); Assert(dir < N_D3Q19);
92
93 #if 0
94         if (isnan(pdf)) {
95                 printf("ERROR: setting nan %d %d %d %d %s\n", x, y, z, dir, D3Q19_NAMES[dir]);
96                 DEBUG_BREAK_POINT();
97                 exit(1);
98         }
99 #endif
100
101 #if 0
102         kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
103 #else
104 #ifdef PROP_MODEL_PUSH
105         kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
106 #elif PROP_MODEL_PULL
107
108         // The relevant PDFs here are the ones, which will get streamed in later
109         // during propagation. So we must set this *remote* PDFs.
110         uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
111
112         if (dir != D3Q19_C) {
113
114                 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
115
116                 kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]] = pdf;
117         }
118         else {
119                 kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)] = pdf;
120
121         }
122 #endif
123 #endif
124
125         return;
126 }
127
128
129 static void GetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
130 {
131         Assert(kd != NULL);
132         Assert(kd->PdfsActive != NULL);
133         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
134         Assert(pdfs != NULL);
135         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
136         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
137
138         PdfT sum = 0.0;
139
140         // TODO: pull scheme?
141         #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
142         #define X(name, idx, idxinv, _x, _y, _z)        pdfs[idx] = kd->PdfsActive[I(x, y, z, idx)]; sum += pdfs[idx];
143         D3Q19_LIST
144         #undef X
145         #undef I
146
147         // if (sum < 0.0) {
148         //              printf("%d %d %d negative density \n", x, y, z);
149         //              exit(1);
150         // }
151
152 #if 0
153         for (int d = 0; d < 19; ++d) {
154                 if(isnan(pdfs[d]) || isinf(pdfs[d])) {
155                         printf("%d %d %d %d nan! get node\n", x, y, z, d);
156                                                 for (int d2 = 0; d2 < 19; ++d2) {
157                                                         printf("%d: %e\n", d2, pdfs[d2]);
158                                                 }
159                         exit(1);
160                 }
161         }
162 #endif
163         return;
164 }
165
166
167 static void SetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
168 {
169         Assert(kd != NULL);
170         Assert(kd->PdfsActive != NULL);
171         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
172         Assert(pdfs != NULL);
173
174         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
175         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
176
177 #if 0
178         for (int d = 0; d < 19; ++d) {
179                 if(isnan(pdfs[d])) {
180                         printf("%d %d %d %d nan! get node\n", x, y, z, d);
181                                                 for (int d2 = 0; d2 < 19; ++d2) {
182                                                         printf("%d: %e\n", d2, pdfs[d2]);
183                                                 }
184                         exit(1);
185                 }
186         }
187 #endif
188
189         // TODO: pull scheme?
190         #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
191         #define X(name, idx, idxinv, _x, _y, _z)        kd->PdfsActive[I(x, y, z, idx)] = pdfs[idx];
192         D3Q19_LIST
193         #undef X
194         #undef I
195
196         return;
197 }
198
199 static void ParameterUsage()
200 {
201         printf("Kernel parameters:\n");
202         printf("  [-blk <n>] [-blk-[xyz] <n>]\n");
203
204         return;
205 }
206
207 static void ParseParameters(Parameters * params, int * blk)
208 {
209         Assert(blk != NULL);
210
211         blk[0] = 0; blk[1] = 0; blk[2] = 0;
212
213         #define ARG_IS(param)                   (!strcmp(params->KernelArgs[i], param))
214         #define NEXT_ARG_PRESENT() \
215                 do { \
216                         if (i + 1 >= params->nKernelArgs) { \
217                                 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
218                                 exit(1); \
219                         } \
220                 } while (0)
221
222
223         for (int i = 0; i < params->nKernelArgs; ++i) {
224                 if (ARG_IS("-blk") || ARG_IS("--blk")) {
225                         NEXT_ARG_PRESENT();
226
227                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
228
229                         if (tmp <= 0) {
230                                 printf("ERROR: blocking parameter must be > 0.\n");
231                                 exit(1);
232                         }
233
234                         blk[0] = blk[1] = blk[2] = tmp;
235                 }
236                 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
237                         NEXT_ARG_PRESENT();
238
239                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
240
241                         if (tmp <= 0) {
242                                 printf("ERROR: blocking parameter must be > 0.\n");
243                                 exit(1);
244                         }
245
246                         blk[0] = tmp;
247                 }
248                 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
249                         NEXT_ARG_PRESENT();
250
251                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
252
253                         if (tmp <= 0) {
254                                 printf("ERROR: blocking parameter must be > 0.\n");
255                                 exit(1);
256                         }
257
258                         blk[1] = tmp;
259                 }
260                 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
261                         NEXT_ARG_PRESENT();
262
263                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
264
265                         if (tmp <= 0) {
266                                 printf("ERROR: blocking parameter must be > 0.\n");
267                                 exit(1);
268                         }
269
270                         blk[2] = tmp;
271                 }
272                 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
273                         ParameterUsage();
274                         exit(1);
275                 }
276                 else {
277                         printf("ERROR: unknown kernel parameter.\n");
278                         ParameterUsage();
279                         exit(1);
280                 }
281         }
282
283         #undef ARG_IS
284         #undef NEXT_ARG_PRESENT
285
286         return;
287 }
288
289 void FNAME(D3Q19ListInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
290 {
291         KernelData * kd;
292         KernelDataList * kdl;
293         MemAlloc((void **)&kdl, sizeof(KernelDataList));
294
295         kd = (KernelData *)kdl;
296         *kernelData = kd;
297
298 #ifdef DEBUG
299         kd->Pdfs[0] = NULL;
300         kd->Pdfs[1] = NULL;
301         kd->PdfsActive = NULL;
302         kd->DstPdfs = NULL;
303         kd->SrcPdfs = NULL;
304         kd->Dims[0] = -1;
305         kd->Dims[1] = -1;
306         kd->Dims[2] = -1;
307         kd->GlobalDims[0] = -1;
308         kd->GlobalDims[1] = -1;
309         kd->GlobalDims[2] = -1;
310         kd->Offsets[0] = -1;
311         kd->Offsets[1] = -1;
312         kd->Offsets[2] = -1;
313
314         kd->ObstIndices = NULL;
315         kd->nObstIndices = -1;
316         kd->BounceBackPdfsSrc = NULL;
317         kd->BounceBackPdfsDst = NULL;
318         kd->nBounceBackPdfs = -1;
319
320         kdl->AdjList = NULL;
321         kdl->Coords = NULL;
322         kdl->Grid = NULL;
323         kdl->nCells = -1;
324         kdl->nFluid = -1;
325 #endif
326
327         // Ajust the dimensions according to padding, if used.
328         kd->Dims[0] = kd->GlobalDims[0] = ld->Dims[0];
329         kd->Dims[1] = kd->GlobalDims[1] = ld->Dims[1];
330         kd->Dims[2] = kd->GlobalDims[2] = ld->Dims[2];
331
332         int * lDims = ld->Dims;
333
334         int lX = lDims[0];
335         int lY = lDims[1];
336         int lZ = lDims[2];
337
338         int nTotalCells = lX * lY * lZ;
339         int nCells = ld->nFluid; // TODO: + padding
340         int nFluid = ld->nFluid;
341
342         kdl->nCells = nCells;
343         kdl->nFluid = nFluid;
344
345         PdfT * pdfs[2];
346
347         int blk[3] = { 0 };
348
349         ParseParameters(params, blk);
350
351         if (blk[0] == 0) blk[0] = lX;
352         if (blk[1] == 0) blk[1] = lY;
353         if (blk[2] == 0) blk[2] = lZ;
354
355         printf("# blocking               x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
356
357         printf("# allocating data for %d fluid LB nodes with padding (%lu bytes = %f MiB for both lattices)\n",
358                 nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19,
359                 2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
360
361         MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19);
362         MemAlloc((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19);
363
364         kd->Pdfs[0] = pdfs[0];
365         kd->Pdfs[1] = pdfs[1];
366
367         // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
368         // Here we touch only the fluid nodes as this loop is OpenMP parallel and
369         // we want the same scheduling as in the kernel.
370         #ifdef _OPENMP
371                 #pragma omp parallel for
372         #endif
373         for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
374                 pdfs[0][P_INDEX_3(nCells, i, d)] = 1.0;
375                 pdfs[1][P_INDEX_3(nCells, i, d)] = 1.0;
376         } }
377
378         // Initialize all PDFs to some standard value.
379         for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
380                 pdfs[0][P_INDEX_3(nCells, i, d)] = 0.0;
381                 pdfs[1][P_INDEX_3(nCells, i, d)] = 0.0;
382         } }
383
384         // ----------------------------------------------------------------------
385         // create grid which will hold the index numbers of the fluid nodes
386
387         uint32_t * grid;
388
389         if (MemAlloc((void **)&grid, nTotalCells * sizeof(uint32_t))) {
390                 printf("ERROR: allocating grid for numbering failed: %lu bytes.\n", nTotalCells * sizeof(uint32_t));
391                 exit(1);
392         }
393         kdl->Grid = grid;
394
395         int latticeIndex;
396
397 #ifdef DEBUG
398         for(int z = 0; z < lZ; ++z) {
399                 for(int y = 0; y < lY; ++y) {
400                         for(int x = 0; x < lX; ++x) {
401
402                                 latticeIndex = L_INDEX_4(ld->Dims, x, y, z);
403
404                                 grid[latticeIndex] = ~0;
405                         }
406                 }
407         }
408 #endif
409
410         // ----------------------------------------------------------------------
411         // generate numbering over grid
412
413         uint32_t * coords;
414
415         if (MemAlloc((void **)&coords, nFluid * sizeof(uint32_t) * 3)) {
416                 printf("ERROR: allocating coords array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * 3);
417                 exit(1);
418         }
419
420         kdl->Coords = coords;
421
422         // Index for the PDF nodes can start at 0 as we distinguish solid and fluid nodes
423         // through the ld->Lattice array.
424         int counter = 0;
425
426         // Blocking is implemented via setup of the adjacency list. The kernel later will
427         // walk through the lattice blocked automatically.
428         for (int bZ = 0; bZ < lZ; bZ += blk[2]) {
429         for (int bY = 0; bY < lY; bY += blk[1]) {
430         for (int bX = 0; bX < lX; bX += blk[0]) {
431
432                 int eX = MIN(bX + blk[0], lX);
433                 int eY = MIN(bY + blk[1], lY);
434                 int eZ = MIN(bZ + blk[2], lZ);
435
436
437                 for (int z = bZ; z < eZ; ++z) {
438                 for (int y = bY; y < eY; ++y) {
439                 for (int x = bX; x < eX; ++x) {
440
441                         latticeIndex = L_INDEX_4(lDims, x, y, z);
442
443                         if (ld->Lattice[latticeIndex] != LAT_CELL_OBSTACLE) {
444                                 grid[latticeIndex] = counter;
445
446                                 coords[C_INDEX_X(counter)] = x;
447                                 coords[C_INDEX_Y(counter)] = y;
448                                 coords[C_INDEX_Z(counter)] = z;
449
450                                 ++counter;
451                         }
452                 } } }
453         } } }
454
455         Verify(counter == nFluid);
456
457         uint32_t * adjList;
458
459         // AdjList only requires 18 instead of 19 entries per node, as
460         // the center PDF needs no addressing.
461         if (MemAlloc((void **)&adjList, nFluid * sizeof(uint32_t) * N_D3Q19_IDX)) {
462                 printf("ERROR: allocating adjList array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * N_D3Q19_IDX);
463                 exit(1);
464         }
465
466         kdl->AdjList = adjList;
467
468         int x, y, z;
469
470         uint32_t neighborIndex;
471         uint32_t dstIndex;
472
473         int nx, ny, nz, px, py, pz;
474
475         // Loop over all fluid nodes and compute the indices to the neighboring
476         // PDFs for configured data layout (AoS/SoA).
477         // TODO: Parallelized loop to ensure correct NUMA placement.
478         // #ifdef _OPENMP  --> add line continuation
479         //      #pragma omp parallel for default(none)
480         //              shared(nFluid, nCells, coords, D3Q19_INV, D3Q19_X, D3Q19_Y, D3Q19_Z,
481         //                              stderr,
482         //                              lDims, grid, ld, lX, lY, lZ, adjList)
483         //              private(x, y, z, nx, ny, nz, neighborIndex, dstIndex)
484         // #endif
485         for (int index = 0; index < nFluid; ++index) {
486                 x = coords[C_INDEX_X(index)];
487                 y = coords[C_INDEX_Y(index)];
488                 z = coords[C_INDEX_Z(index)];
489
490                 Assert(x >= 0 && x < lX);
491                 Assert(y >= 0 && y < lY);
492                 Assert(z >= 0 && z < lZ);
493
494                 Assert(ld->Lattice[L_INDEX_4(lDims, x, y, z)] != LAT_CELL_OBSTACLE);
495
496                 // Loop over all directions except the center one.
497                 for(int d = 0; d < N_D3Q19 - 1; ++d) {
498                         Assert(d != D3Q19_C);
499 #ifdef PROP_MODEL_PUSH
500                         nx = x + D3Q19_X[d];
501                         ny = y + D3Q19_Y[d];
502                         nz = z + D3Q19_Z[d];
503 #elif PROP_MODEL_PULL
504                         nx = x - D3Q19_X[d];
505                         ny = y - D3Q19_Y[d];
506                         nz = z - D3Q19_Z[d];
507 #else
508                         #error No implementation for this PROP_MODEL_NAME.
509 #endif
510                         // If the neighbor is outside the latcie in X direction and we have a
511                         // periodic boundary then we need to wrap around.
512                         if (    ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
513                                         ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
514                                         ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
515                                                                                                                                 ){
516                                 // x periodic
517
518                                 if (nx < 0) {
519                                         px = lX - 1;
520                                 }
521                                 else if (nx >= lX) {
522                                         px = 0;
523                                 } else {
524                                         px = nx;
525                                 }
526                                 // y periodic
527                                 if (ny < 0) {
528                                         py = lY - 1;
529                                 }
530                                 else if (ny >= lY) {
531                                         py = 0;
532                                 } else {
533                                         py = ny;
534                                 }
535
536                                 // z periodic
537                                 if (nz < 0) {
538                                         pz = lZ - 1;
539                                 }
540                                 else if (nz >= lZ) {
541                                         pz = 0;
542                                 } else {
543                                         pz = nz;
544                                 }
545
546                                 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
547                                         dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
548                                 }
549                                 else {
550                                         neighborIndex = grid[L_INDEX_4(lDims, px, py, pz)];
551
552                                         AssertMsg(neighborIndex != ~0, "Neighbor has no Index. (%d %d %d) direction %s (%d)\n", px, py, pz, D3Q19_NAMES[d], d);
553
554                                         dstIndex = P_INDEX_3(nCells, neighborIndex, d);
555                                 }
556                         }
557                         else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
558                                 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
559                         }
560                         else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
561                                 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
562                         }
563                         else {
564                                 neighborIndex = grid[L_INDEX_4(lDims, nx, ny, nz)];
565
566                                 Assert(neighborIndex != ~0);
567
568                                 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
569                         }
570
571                         Assert(dstIndex >= 0);
572                         Assert(dstIndex < nCells * N_D3Q19);
573
574                         adjList[index * N_D3Q19_IDX + d] = dstIndex;
575                 }
576         }
577
578
579         // Fill remaining KernelData structures
580         kd->GetNode = GetNode;
581         kd->SetNode = SetNode;
582
583         kd->BoundaryConditionsGetPdf = FNAME(BCGetPdf);
584         kd->BoundaryConditionsSetPdf = FNAME(BCSetPdf);
585
586         kd->Kernel = FNAME(D3Q19ListKernel);
587
588         kd->DstPdfs = NULL;
589         kd->PdfsActive = kd->Pdfs[0];
590
591         return;
592 }
593
594 void FNAME(D3Q19ListDeinit)(LatticeDesc * ld, KernelData ** kernelData)
595 {
596         KernelDataList ** kdl = (KernelDataList **)kernelData;
597
598         MemFree((void **)&((*kernelData)->Pdfs[0]));
599         MemFree((void **)&((*kernelData)->Pdfs[1]));
600
601         MemFree((void **)&((*kdl)->AdjList));
602         MemFree((void **)&((*kdl)->Coords));
603         MemFree((void **)&((*kdl)->Grid));
604
605         MemFree((void **)kernelData);
606
607         return;
608 }
609
This page took 0.091749 seconds and 4 git commands to generate.