version 0.1
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListPullSplitNtCommon.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19ListPullSplitNtCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
31#include "Vector.h"
32
33#include <math.h>
34
35#ifdef _OPENMP
36 #include <omp.h>
37#endif
38
39// Forward definition.
40void FNAME(KernelPullSplitNt1S)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
41void FNAME(KernelPullSplitNt2S)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
42
43void FNAME(KernelPullSplitNtRia1S)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
44void FNAME(KernelPullSplitNtRia2S)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
45
46
47
48
49// -----------------------------------------------------------------------
50// Functions which are used as callback by the kernel to read or write
51// PDFs and nodes.
52
53static void FNAME(BCGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
54{
55 Assert(kd != NULL);
56 Assert(kd->PdfsActive != NULL);
57 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
58 Assert(pdf != NULL);
59
60 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
61 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
62 Assert(dir >= 0); Assert(dir < N_D3Q19);
63
64 // The relevant PDFs here are the ones, which will get streamed in later
65 // during propagation. So we must return the *remote* PDFs.
66 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
67
68 if (dir != D3Q19_C) {
69
70 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
71
72 *pdf = kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]];
73 }
74 else {
75 *pdf = kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)];
76
77 }
78
79 return;
80}
81
82static void FNAME(BCSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
83{
84 Assert(kd != NULL);
85 Assert(kd->PdfsActive != NULL);
86 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
87 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
88 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
89 Assert(dir >= 0); Assert(dir < N_D3Q19);
90
91 if (isnan(pdf)) {
92 printf("ERROR: setting nan %d %d %d %d %s\n", x, y, z, dir, D3Q19_NAMES[dir]);
93 DEBUG_BREAK_POINT();
94 exit(1);
95 }
96
97 // The relevant PDFs here are the ones, which will get streamed in later
98 // during propagation. So we must set this *remote* PDFs.
99 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
100
101 if (dir != D3Q19_C) {
102
103 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
104
105 kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + dir]] = pdf;
106 }
107 else {
108 kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)] = pdf;
109
110 }
111
112 return;
113}
114
115
116static void GetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
117{
118 Assert(kd != NULL);
119 Assert(kd->PdfsActive != NULL);
120 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
121 Assert(pdfs != NULL);
122 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
123 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
124
125 PdfT sum = 0.0;
126
127 // TODO: pull scheme?
128
129 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
130 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x, y, z, idx)]; sum += pdfs[idx];
131 D3Q19_LIST
132 #undef X
133 #undef I
134
135#ifdef DETECT_NANS
136 for (int d = 0; d < 19; ++d) {
137 if(isnan(pdfs[d]) || isinf(pdfs[d])) {
138 printf("%d %d %d %d nan! get node\n", x, y, z, d);
139 for (int d2 = 0; d2 < 19; ++d2) {
140 printf("%d: %e\n", d2, pdfs[d2]);
141 }
142 exit(1);
143 }
144 }
145#endif
146 return;
147}
148
149
150static void SetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
151{
152 Assert(kd != NULL);
153 Assert(kd->PdfsActive != NULL);
154 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
155 Assert(pdfs != NULL);
156
157 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
158 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
159
160#ifdef DETECT_NANS
161 for (int d = 0; d < 19; ++d) {
162 if(isnan(pdfs[d])) {
163 printf("%d %d %d %d nan! get node\n", x, y, z, d);
164 for (int d2 = 0; d2 < 19; ++d2) {
165 printf("%d: %e\n", d2, pdfs[d2]);
166 }
167 exit(1);
168 }
169 }
170#endif
171
172 // TODO: pull scheme?
173 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
174 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x, y, z, idx)] = pdfs[idx];
175 D3Q19_LIST
176 #undef X
177 #undef I
178
179 return;
180}
181
182static void ParameterUsage()
183{
184 printf("Kernel parameters:\n");
185 printf(" [-blk <n>] [-blk-[xyz] <n>] [-n-tmp-array <n>]\n");
186
187 return;
188}
189
190static void ParseParameters(Parameters * params, int * blk, int * nTmpArray)
191{
192 Assert(blk != NULL);
193
194 blk[0] = 0; blk[1] = 0; blk[2] = 0;
195 *nTmpArray = 152;
196
197 #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param))
198 #define NEXT_ARG_PRESENT() \
199 do { \
200 if (i + 1 >= params->nKernelArgs) { \
201 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
202 exit(1); \
203 } \
204 } while (0)
205
206
207 for (int i = 0; i < params->nKernelArgs; ++i) {
208 if (ARG_IS("-blk") || ARG_IS("--blk")) {
209 NEXT_ARG_PRESENT();
210
211 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
212
213 if (tmp <= 0) {
214 printf("ERROR: blocking parameter must be > 0.\n");
215 exit(1);
216 }
217
218 blk[0] = blk[1] = blk[2] = tmp;
219 }
220 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
221 NEXT_ARG_PRESENT();
222
223 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
224
225 if (tmp <= 0) {
226 printf("ERROR: blocking parameter must be > 0.\n");
227 exit(1);
228 }
229
230 blk[0] = tmp;
231 }
232 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
233 NEXT_ARG_PRESENT();
234
235 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
236
237 if (tmp <= 0) {
238 printf("ERROR: blocking parameter must be > 0.\n");
239 exit(1);
240 }
241
242 blk[1] = tmp;
243 }
244 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
245 NEXT_ARG_PRESENT();
246
247 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
248
249 if (tmp <= 0) {
250 printf("ERROR: blocking parameter must be > 0.\n");
251 exit(1);
252 }
253
254 blk[2] = tmp;
255 }
256 else if (ARG_IS("-n-tmp-array") || ARG_IS("--n-tmp-array")) {
257 NEXT_ARG_PRESENT();
258
259 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
260
261 if (tmp <= 0) {
262 printf("ERROR: -n-tmp-array parameter must be > 0.\n");
263 exit(1);
264 }
265
266 if (tmp % VSIZE != 0) {
267 printf("ERROR: value for -n-tmp-array must be a multiple of %d.\n", VSIZE);
268 exit(1);
269 }
270
271 *nTmpArray = tmp;
272 }
273 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
274 ParameterUsage();
275 exit(1);
276 }
277 else {
278 printf("ERROR: unknown kernel parameter.\n");
279 ParameterUsage();
280 exit(1);
281 }
282 }
283
284 #undef ARG_IS
285 #undef NEXT_ARG_PRESENT
286
287 return;
288}
289
290static void SetupConsecNodes(LatticeDesc * ld, KernelDataListRia * kdlr, int nThreads)
291{
292 Assert(ld != NULL);
293 Assert(kdlr != NULL);
294 Assert(nThreads > 0);
295
296 uint32_t * adjList = kdlr->kdl.AdjList;
297
298 uint32_t nConsecNodes = 0;
299 uint32_t consecIndex = 0;
300
301 int nFluid = kdlr->kdl.nFluid;
302
303 uint32_t * consecThreadIndices = (uint32_t *)malloc(sizeof(uint32_t) * (nThreads + 1));
304
305 int nNodesPerThread = nFluid / nThreads;
306
307 for (int i = 0; i < nThreads; ++i) {
308 consecThreadIndices[i] = i * nNodesPerThread + MinI(i, nFluid % nThreads);
309 }
310 consecThreadIndices[nThreads] = -1;
311
312 int indexThread = 1;
313
314 // We execute following code two times.
315 // - The first time to get the count of how many entries we need for the
316 // consecNodes array.
317 // - The second time to fill the array.
318
319 // Loop over adjacency list of all nodes.
320 // Compare if adjacent nodes share the same access pattern.
321 for (int index = 1; index < nFluid; ++index) {
322
323 int different = 0;
324
325 // Loop over all directions except the center one.
326 for(int d = 0; d < N_D3Q19 - 1; ++d) {
327 Assert(d != D3Q19_C);
328
329 if (adjList[index * N_D3Q19_IDX + d] != adjList[(index - 1) * N_D3Q19_IDX + d] + 1) {
330 // Different access pattern.
331 different = 1;
332 break;
333 }
334 }
335
336 if (consecThreadIndices[indexThread] == index) {
337 // We are at a thread boundary. Starting from this index the fluids
338 // belong to another thread. Force a break, if nodes are consecutive.
339 ++indexThread;
340 different = 1;
341 }
342
343 if (different) {
344 ++consecIndex;
345 }
346 }
347
348 if (nFluid > 0) {
349 nConsecNodes = consecIndex + 1;
350 }
351
352 uint32_t * consecNodes;
353 MemAlloc((void **)&consecNodes, sizeof(uint32_t) * nConsecNodes);
354
355 consecIndex = 0;
356
357 if (nFluid > 0) {
358 consecNodes[consecIndex] = 1;
359 }
360
361 indexThread = 1;
362 consecThreadIndices[0] = 0;
363
364 // Loop over adjacency list of all nodes.
365 // Compare if adjacent nodes share the same access pattern.
366 for (int index = 1; index < nFluid; ++index) {
367
368 int different = 0;
369
370 // Loop over all directions except the center one.
371 for(int d = 0; d < N_D3Q19 - 1; ++d) {
372 Assert(d != D3Q19_C);
373
374 if (adjList[index * N_D3Q19_IDX + d] != adjList[(index - 1) * N_D3Q19_IDX + d] + 1) {
375 // Different access pattern.
376 different = 1;
377 break;
378 }
379 }
380
381 if (consecThreadIndices[indexThread] == index) {
382 // We are at a thread boundary. Starting from this index the fluids
383 // belong to another thread. Force a break, if nodes are consecutive.
384 consecThreadIndices[indexThread] = consecIndex + 1;
385 ++indexThread;
386 different = 1;
387 }
388
389 if (different) {
390 ++consecIndex;
391 Assert(consecIndex < nConsecNodes);
392 consecNodes[consecIndex] = 1;
393 }
394 else {
395 Assert(consecIndex < nConsecNodes);
396 consecNodes[consecIndex] += 1;
397 }
398 }
399
400
401 kdlr->ConsecNodes = consecNodes;
402 kdlr->nConsecNodes = nConsecNodes;
403
404 kdlr->ConsecThreadIndices = consecThreadIndices;
405 kdlr->nConsecThreadIndices = nThreads;
406
407 // printf("# total fluid nodes: %d consecutive blocks: %d\n", nFluid, nConsecNodes);
408
409 return;
410}
411
412
413static void FNAME(Init)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
414{
415 KernelData * kd;
416 KernelDataList * kdl;
417 KernelDataListRia * kdlr;
418 MemAlloc((void **)&kdlr, sizeof(KernelDataListRia));
419
420 kd = (KernelData *)kdlr;
421 kdl = KDL(kdlr);
422
423 *kernelData = kd;
424
425#ifdef DEBUG
426 kd->Pdfs[0] = NULL;
427 kd->Pdfs[1] = NULL;
428 kd->PdfsActive = NULL;
429 kd->DstPdfs = NULL;
430 kd->SrcPdfs = NULL;
431 kd->Dims[0] = -1;
432 kd->Dims[1] = -1;
433 kd->Dims[2] = -1;
434 kd->GlobalDims[0] = -1;
435 kd->GlobalDims[1] = -1;
436 kd->GlobalDims[2] = -1;
437 kd->Offsets[0] = -1;
438 kd->Offsets[1] = -1;
439 kd->Offsets[2] = -1;
440
441 kd->ObstIndices = NULL;
442 kd->nObstIndices = -1;
443 kd->BounceBackPdfsSrc = NULL;
444 kd->BounceBackPdfsDst = NULL;
445 kd->nBounceBackPdfs = -1;
446
447 kdl->AdjList = NULL;
448 kdl->Coords = NULL;
449 kdl->Grid = NULL;
450 kdl->nCells = -1;
451 kdl->nFluid = -1;
452
453 kdlr->ConsecNodes = NULL;
454 kdlr->nConsecNodes = 0;
455 kdlr->ConsecThreadIndices = NULL;
456 kdlr->nConsecThreadIndices = 0;
457#endif
458
459 // Ajust the dimensions according to padding, if used.
460 kd->Dims[0] = kd->GlobalDims[0] = ld->Dims[0];
461 kd->Dims[1] = kd->GlobalDims[1] = ld->Dims[1];
462 kd->Dims[2] = kd->GlobalDims[2] = ld->Dims[2];
463
464 int * lDims = ld->Dims;
465
466 int lX = lDims[0];
467 int lY = lDims[1];
468 int lZ = lDims[2];
469
470 int nTotalCells = lX * lY * lZ;
471 int nCells = ld->nFluid;
472 int nFluid = ld->nFluid;
473
474 // We padd each stream of a PDF array for a complete cache line.
475 // TODO: padding for L1/L2 and TLB.
476 nCells = nCells + (8 - nCells % 8);
477
478 Assert(nCells % VSIZE == 0);
479
480 kdl->nCells = nCells;
481 kdl->nFluid = nFluid;
482
483 PdfT * pdfs[2];
484
485 int blk[3] = { 0 };
486
487 ParseParameters(params, blk, &kdlr->nTmpArray);
488
489 if (blk[0] == 0) blk[0] = lX;
490 if (blk[1] == 0) blk[1] = lY;
491 if (blk[2] == 0) blk[2] = lZ;
492
493 printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
494 printf("# temporary array size: %d PDFs, %lu b\n", kdlr->nTmpArray, kdlr->nTmpArray * sizeof(PdfT) * 23);
495
496 double latMiB = nCells * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
497 double latFluidMib = nFluid * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
498 double latPadMib = (nCells - nFluid) * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
499
500 printf("# lattice size: %e MiB total: %e MiB\n", latMiB, latMiB * 2);
501 printf("# fluid lattice size: %e MiB total: %e MiB\n", latFluidMib, latFluidMib * 2);
502 printf("# lattice padding: %e MiB total: %e MiB\n", latPadMib, latPadMib * 2);
503
504#define PAGE_4K 4096
505
506 printf("# aligning lattices to: %d b\n", PAGE_4K);
507
508 MemAllocAligned((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19, PAGE_4K);
509 MemAllocAligned((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19, PAGE_4K);
510
511 kd->Pdfs[0] = pdfs[0];
512 kd->Pdfs[1] = pdfs[1];
513
514 // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
515 // Here we touch only the fluid nodes as this loop is OpenMP parallel and
516 // we want the same scheduling as in the kernel.
517 #ifdef _OPENMP
518 #pragma omp parallel for
519 #endif
520 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
521 pdfs[0][P_INDEX_3(nCells, i, d)] = 1.0;
522 pdfs[1][P_INDEX_3(nCells, i, d)] = 1.0;
523 } }
524
525 // Initialize all PDFs to some standard value.
526 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
527 pdfs[0][P_INDEX_3(nCells, i, d)] = 0.0;
528 pdfs[1][P_INDEX_3(nCells, i, d)] = 0.0;
529 } }
530
531 // ----------------------------------------------------------------------
532 // create grid which will hold the index numbers of the fluid nodes
533
534 uint32_t * grid;
535
536 if (MemAlloc((void **)&grid, nTotalCells * sizeof(uint32_t))) {
537 printf("ERROR: allocating grid for numbering failed: %lu bytes.\n", nTotalCells * sizeof(uint32_t));
538 exit(1);
539 }
540 kdl->Grid = grid;
541
542 int latticeIndex;
543
544#ifdef DEBUG
545 for(int z = 0; z < lZ; ++z) {
546 for(int y = 0; y < lY; ++y) {
547 for(int x = 0; x < lX; ++x) {
548
549 latticeIndex = L_INDEX_4(ld->Dims, x, y, z);
550
551 grid[latticeIndex] = ~0;
552 }
553 }
554 }
555#endif
556
557 // ----------------------------------------------------------------------
558 // generate numbering over grid
559
560 uint32_t * coords;
561
562 if (MemAlloc((void **)&coords, nFluid * sizeof(uint32_t) * 3)) {
563 printf("ERROR: allocating coords array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * 3);
564 exit(1);
565 }
566
567 kdl->Coords = coords;
568
569 // Index for the PDF nodes can start at 0 as we distinguish solid and fluid nodes
570 // through the ld->Lattice array.
571 int counter = 0;
572
573 // Blocking is implemented via setup of the adjacency list. The kernel later will
574 // walk through the lattice blocked automatically.
575 for (int bZ = 0; bZ < lZ; bZ += blk[2]) {
576 for (int bY = 0; bY < lY; bY += blk[1]) {
577 for (int bX = 0; bX < lX; bX += blk[0]) {
578
579 int eX = MIN(bX + blk[0], lX);
580 int eY = MIN(bY + blk[1], lY);
581 int eZ = MIN(bZ + blk[2], lZ);
582
583
584 for (int z = bZ; z < eZ; ++z) {
585 for (int y = bY; y < eY; ++y) {
586 for (int x = bX; x < eX; ++x) {
587
588 latticeIndex = L_INDEX_4(lDims, x, y, z);
589
590 if (ld->Lattice[latticeIndex] != LAT_CELL_OBSTACLE) {
591 grid[latticeIndex] = counter;
592
593 coords[C_INDEX_X(counter)] = x;
594 coords[C_INDEX_Y(counter)] = y;
595 coords[C_INDEX_Z(counter)] = z;
596
597 ++counter;
598 }
599 } } }
600 } } }
601
602 Verify(counter == nFluid);
603
604 uint32_t * adjList;
605
606 double indexMib = nFluid * sizeof(uint32_t) * N_D3Q19_IDX / 1024.0 / 1024.0;
607
608 printf("# index size: %e MiB\n", indexMib);
609
610
611 // AdjList only requires 18 instead of 19 entries per node, as
612 // the center PDF needs no addressing.
613 if (MemAlloc((void **)&adjList, nFluid * sizeof(uint32_t) * N_D3Q19_IDX)) {
614 printf("ERROR: allocating adjList array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * N_D3Q19_IDX);
615 exit(1);
616 }
617
618 kdl->AdjList = adjList;
619
620 int x, y, z;
621
622 uint32_t neighborIndex;
623 uint32_t dstIndex;
624
625 int nx, ny, nz, px, py, pz;
626
627 // Loop over all fluid nodes and compute the indices to the neighboring
628 // PDFs for configured data layout (AoS/SoA).
629 // Parallelized loop to ensure correct NUMA placement.
630 // #ifdef _OPENMP --> add line continuation
631 // #pragma omp parallel for default(none)
632 // shared(nFluid, nCells, coords, D3Q19_INV, D3Q19_X, D3Q19_Y, D3Q19_Z,
633 // stderr,
634 // lDims, grid, ld, lX, lY, lZ, adjList)
635 // private(x, y, z, nx, ny, nz, neighborIndex, dstIndex)
636 // #endif
637 for (int index = 0; index < nFluid; ++index) {
638 x = coords[C_INDEX_X(index)];
639 y = coords[C_INDEX_Y(index)];
640 z = coords[C_INDEX_Z(index)];
641
642 Assert(x >= 0 && x < lX);
643 Assert(y >= 0 && y < lY);
644 Assert(z >= 0 && z < lZ);
645
646 Assert(ld->Lattice[L_INDEX_4(lDims, x, y, z)] != LAT_CELL_OBSTACLE);
647
648 // Loop over all directions except the center one.
649 for(int d = 0; d < N_D3Q19 - 1; ++d) {
650 Assert(d != D3Q19_C);
651
652#ifdef PROP_MODEL_PUSH
653 nx = x + D3Q19_X[d];
654 ny = y + D3Q19_Y[d];
655 nz = z + D3Q19_Z[d];
656
657#elif PROP_MODEL_PULL
658 nx = x - D3Q19_X[d];
659 ny = y - D3Q19_Y[d];
660 nz = z - D3Q19_Z[d];
661#else
662 #error No implementation for this PROP_MODEL_NAME.
663#endif
664 // If the neighbor is outside the latcie in X direction and we have a
665 // periodic boundary then we need to wrap around.
666 if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
667 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
668 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
669 ){
670 // x periodic
671
672 if (nx < 0) {
673 px = lX - 1;
674 }
675 else if (nx >= lX) {
676 px = 0;
677 } else {
678 px = nx;
679 }
680 // y periodic
681 if (ny < 0) {
682 py = lY - 1;
683 }
684 else if (ny >= lY) {
685 py = 0;
686 } else {
687 py = ny;
688 }
689
690 // z periodic
691 if (nz < 0) {
692 pz = lZ - 1;
693 }
694 else if (nz >= lZ) {
695 pz = 0;
696 } else {
697 pz = nz;
698 }
699
700 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
701 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
702 }
703 else {
704 neighborIndex = grid[L_INDEX_4(lDims, px, py, pz)];
705
706 AssertMsg(neighborIndex != ~0, "Neighbor has no Index. (%d %d %d) direction %s (%d)\n", px, py, pz, D3Q19_NAMES[d], d);
707
708 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
709 }
710 }
711 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
712 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
713 }
714 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
715 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
716 }
717 else {
718 neighborIndex = grid[L_INDEX_4(lDims, nx, ny, nz)];
719
720 Assert(neighborIndex != ~0);
721
722 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
723 }
724
725 Assert(dstIndex >= 0);
726 Assert(dstIndex < nCells * N_D3Q19);
727
728 adjList[index * N_D3Q19_IDX + d] = dstIndex;
729 }
730 }
731
732 int nThreads = 1;
733
734#ifdef _OPENMP
735 nThreads = omp_get_max_threads();
736#endif
737
738 SetupConsecNodes(ld, KDLR(kd), nThreads);
739
740
741 // Fill remaining KernelData structures
742 kd->GetNode = GetNode;
743 kd->SetNode = SetNode;
744
745 kd->BoundaryConditionsGetPdf = FNAME(BCGetPdf);
746 kd->BoundaryConditionsSetPdf = FNAME(BCSetPdf);
747
748 kd->Kernel = NULL; // FNAME(KernelPullSplitNt2S);
749
750 kd->DstPdfs = NULL;
751 kd->PdfsActive = kd->Pdfs[0];
752
753 return;
754}
755
756void FNAME(D3Q19ListPullSplitNt1SInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
757{
758 FNAME(Init)(ld, kernelData, params);
759 (*kernelData)->Kernel = FNAME(KernelPullSplitNt1S);
760
761 double loopBalance = 2.0 * 19 * sizeof(PdfT) + (18 * 4.0);
762 printf("# loop balance: %.2f B/FLUP\n", loopBalance);
763}
764
765void FNAME(D3Q19ListPullSplitNt2SInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
766{
767 FNAME(Init)(ld, kernelData, params);
768 (*kernelData)->Kernel = FNAME(KernelPullSplitNt2S);
769
770 double loopBalance = 2.0 * 19 * sizeof(PdfT) + (18 * 4.0);
771 printf("# loop balance: %.2f B/FLUP\n", loopBalance);
772}
773
774
775void FNAME(D3Q19ListPullSplitNtDeinit)(LatticeDesc * ld, KernelData ** kernelData)
776{
777 KernelDataListRia ** kdlr = (KernelDataListRia **)kernelData;
778
779 MemFree((void **)&((*kdlr)->ConsecNodes));
780
781 if ((*kdlr)->ConsecThreadIndices != NULL) {
782 MemFree((void **)&((*kdlr)->ConsecThreadIndices));
783 }
784
785 KernelDataList ** kdl = (KernelDataList **)kernelData;
786
787 MemFree((void **)&((*kdl)->AdjList));
788 MemFree((void **)&((*kdl)->Coords));
789 MemFree((void **)&((*kdl)->Grid));
790
791 MemFree((void **)&((*kernelData)->Pdfs[0]));
792 MemFree((void **)&((*kernelData)->Pdfs[1]));
793
794 MemFree((void **)kernelData);
795 return;
796}
797
This page took 0.110512 seconds and 5 git commands to generate.