add citation information
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListAaRiaCommon.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19ListAaRiaCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
e3f82424 31#include "Padding.h"
10988083
MW
32
33#include <math.h>
34
35#ifdef _OPENMP
36 #include <omp.h>
37#endif
38
39// Forward definition.
40void FNAME(D3Q19ListAaRiaKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
41
42
43
44
45// -----------------------------------------------------------------------
46// Functions which are used as callback by the kernel to read or write
47// PDFs and nodes.
48
49static void FNAME(BCGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
50{
51 Assert(kd != NULL);
52 Assert(kd->PdfsActive != NULL);
53 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
54 Assert(pdf != NULL);
55
56 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
57 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
58 Assert(dir >= 0); Assert(dir < N_D3Q19);
59
60 KernelDataList * kdl = (KernelDataList *)kd;
61
62 if (kdl->Iteration % 2 == 0) {
63 // Pdfs are stored inverse, local PDFs are located in remote nodes
64
65 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
66
67 if (dir != D3Q19_C) {
68 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
69
70 *pdf = kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + D3Q19_INV[dir]]];
71 }
72 else {
73 *pdf = kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)];
74 }
75
76 }
77 else {
78 *pdf = kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)];
79 }
80
81
82 return;
83}
84
85static void FNAME(BCSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
86{
87 Assert(kd != NULL);
88 Assert(kd->PdfsActive != NULL);
89 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
90 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
91 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
92 Assert(dir >= 0); Assert(dir < N_D3Q19);
93
94#if 0
95 if (isnan(pdf)) {
96 printf("ERROR: setting nan %d %d %d %d %s\n", x, y, z, dir, D3Q19_NAMES[dir]);
97 DEBUG_BREAK_POINT();
98 exit(1);
99 }
100#endif
101
102 KernelDataList * kdl = (KernelDataList *)kd;
103
104 if (kdl->Iteration % 2 == 0) {
105 // Pdfs are stored inverse, local PDFs are located in remote nodes
106
107 uint32_t nodeIndex = KDL(kd)->Grid[L_INDEX_4(kd->Dims, x, y, z)];
108
109 if (dir != D3Q19_C) {
110 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
111
112 kd->PdfsActive[KDL(kd)->AdjList[adjListIndex + D3Q19_INV[dir]]] = pdf;
113 }
114 else {
115 kd->PdfsActive[P_INDEX_3(KDL(kd)->nCells, nodeIndex, dir)] = pdf;
116 }
117
118 }
119 else {
120 kd->PdfsActive[P_INDEX_5(KDL(kd), x, y, z, dir)] = pdf;
121 }
122
123 return;
124}
125
126
127static void GetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
128{
129 Assert(kd != NULL);
130 Assert(kd->PdfsActive != NULL);
131 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
132 Assert(pdfs != NULL);
133 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
134 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
135
136 KernelDataList * kdl = (KernelDataList *)kd;
137
138 if(kdl->Iteration % 2 == 0){
139
140 uint32_t nodeIndex = kdl->Grid[L_INDEX_4(kdl->kd.Dims, x, y, z)];
141 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
142
143 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
144 pdfs[D3Q19_C] = kd->PdfsActive[P_INDEX_3(kdl->nCells, nodeIndex, D3Q19_C)];
145
146 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[kdl->AdjList[adjListIndex + idxinv]];
147 D3Q19_LIST_WO_C
148 #undef X
149
150 } else {
151
152 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
153 #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x, y, z, idx)];
154 D3Q19_LIST
155 #undef X
156 #undef I
157
158 }
159
160#if 0
161 for (int d = 0; d < 19; ++d) {
162 if(isnan(pdfs[d]) || isinf(pdfs[d])) {
163 printf("%d %d %d %d nan! get node\n", x, y, z, d);
164 for (int d2 = 0; d2 < 19; ++d2) {
165 printf("%d: %e\n", d2, pdfs[d2]);
166 }
167 exit(1);
168 }
169 }
170#endif
171
172 return;
173}
174
175
176static void SetNode(KernelData * kd, int x, int y, int z, PdfT * pdfs)
177{
178 Assert(kd != NULL);
179 Assert(kd->PdfsActive != NULL);
180 Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
181 Assert(pdfs != NULL);
182
183 Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
184 Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
185
186#if 0
187 for (int d = 0; d < 19; ++d) {
188 if(isnan(pdfs[d])) {
189 printf("%d %d %d %d nan! get node\n", x, y, z, d);
190 for (int d2 = 0; d2 < 19; ++d2) {
191 printf("%d: %e\n", d2, pdfs[d2]);
192 }
193 exit(1);
194 }
195 }
196#endif
197
198 KernelDataList * kdl = (KernelDataList *)kd;
199
200 if(kdl->Iteration % 2 == 0){
201
202 uint32_t nodeIndex = kdl->Grid[L_INDEX_4(kdl->kd.Dims, x, y, z)];
203 uint32_t adjListIndex = nodeIndex * N_D3Q19_IDX;
204
205 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
206 kd->PdfsActive[P_INDEX_3(kdl->nCells, nodeIndex, D3Q19_C)] = pdfs[D3Q19_C];
207
208 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[kdl->AdjList[adjListIndex + idxinv]] = pdfs[idx];
209 D3Q19_LIST_WO_C
210 #undef X
211
212 } else {
213
214 #define I(x, y, z, dir) P_INDEX_5(KDL(kd), (x), (y), (z), (dir))
215 #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x, y, z, idx)] = pdfs[idx];
216 D3Q19_LIST
217 #undef X
218 #undef I
219
220 }
221
222 return;
223}
224
225static void ParameterUsage()
226{
227 printf("Kernel parameters:\n");
228 printf(" [-blk <n>] [-blk-[xyz] <n>]\n");
e3f82424
MW
229#ifdef DATA_LAYOUT_SOA
230 printf(" [-pad auto|modulus_1+offset_1(,modulus_n+offset_n)*]\n");
231#endif
10988083
MW
232
233 return;
234}
235
e3f82424 236static void ParseParameters(Parameters * params, int * blk, PadInfo ** padInfo)
10988083
MW
237{
238 Assert(blk != NULL);
239
240 blk[0] = 0; blk[1] = 0; blk[2] = 0;
e3f82424 241 *padInfo = NULL;
10988083
MW
242
243 #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param))
244 #define NEXT_ARG_PRESENT() \
245 do { \
246 if (i + 1 >= params->nKernelArgs) { \
247 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
248 exit(1); \
249 } \
250 } while (0)
251
252
253 for (int i = 0; i < params->nKernelArgs; ++i) {
254 if (ARG_IS("-blk") || ARG_IS("--blk")) {
255 NEXT_ARG_PRESENT();
256
257 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
258
e3f82424
MW
259 if (tmp < 0) {
260 printf("ERROR: blocking parameter must be >= 0.\n");
10988083
MW
261 exit(1);
262 }
263
264 blk[0] = blk[1] = blk[2] = tmp;
265 }
266 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
267 NEXT_ARG_PRESENT();
268
269 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
270
e3f82424
MW
271 if (tmp < 0) {
272 printf("ERROR: blocking parameter must be >= 0.\n");
10988083
MW
273 exit(1);
274 }
275
276 blk[0] = tmp;
277 }
278 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
279 NEXT_ARG_PRESENT();
280
281 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
282
e3f82424
MW
283 if (tmp < 0) {
284 printf("ERROR: blocking parameter must be >= 0.\n");
10988083
MW
285 exit(1);
286 }
287
288 blk[1] = tmp;
289 }
290 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
291 NEXT_ARG_PRESENT();
292
293 int tmp = strtol(params->KernelArgs[++i], NULL, 0);
294
e3f82424
MW
295 if (tmp < 0) {
296 printf("ERROR: blocking parameter must be >= 0.\n");
10988083
MW
297 exit(1);
298 }
299
300 blk[2] = tmp;
301 }
e3f82424
MW
302#ifdef DATA_LAYOUT_SOA
303 else if (ARG_IS("-pad") || ARG_IS("--pad")) {
304 NEXT_ARG_PRESENT();
305
306 *padInfo = PadInfoFromStr(params->KernelArgs[++i]);
307 }
308#endif
10988083
MW
309 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
310 ParameterUsage();
311 exit(1);
312 }
313 else {
314 printf("ERROR: unknown kernel parameter.\n");
315 ParameterUsage();
316 exit(1);
317 }
318 }
319
320 #undef ARG_IS
321 #undef NEXT_ARG_PRESENT
322
323 return;
324}
325
326static void SetupConsecNodes(LatticeDesc * ld, KernelDataListRia * kdlr, int nThreads)
327{
328 Assert(ld != NULL);
329 Assert(kdlr != NULL);
330 Assert(nThreads > 0);
331
332 uint32_t * adjList = kdlr->kdl.AdjList;
333
334 uint32_t nConsecNodes = 0;
335 uint32_t consecIndex = 0;
336
337 int nFluid = kdlr->kdl.nFluid;
338
339 uint32_t * consecThreadIndices = (uint32_t *)malloc(sizeof(uint32_t) * (nThreads + 1));
340 int * fluidNodeThreadIndices = (int *)malloc(sizeof(int) * (nThreads + 1));
341
342 int nNodesPerThread = nFluid / nThreads;
343
344 for (int i = 0; i < nThreads; ++i) {
345 consecThreadIndices[i] = i * nNodesPerThread + MinI(i, nFluid % nThreads);
346 fluidNodeThreadIndices[i] = consecThreadIndices[i];
347 }
348 consecThreadIndices[nThreads] = -1;
349 fluidNodeThreadIndices[nThreads] = nFluid;
350
351 int indexThread = 1;
352
353 // We execute following code two times.
354 // - The first time to get the count of how many entries we need for the
355 // consecNodes array.
356 // - The second time to fill the array.
357
358 // Loop over adjacency list of all nodes.
359 // Compare if adjacent nodes share the same access pattern.
360 for (int index = 1; index < nFluid; ++index) {
361
362 int different = 0;
363
364 // Loop over all directions except the center one.
365 for(int d = 0; d < N_D3Q19 - 1; ++d) {
366 Assert(d != D3Q19_C);
367
368 if (adjList[index * N_D3Q19_IDX + d] != adjList[(index - 1) * N_D3Q19_IDX + d] + 1) {
369 // Different access pattern.
370 different = 1;
371 break;
372 }
373 }
374
375 if (consecThreadIndices[indexThread] == index) {
376 // We are at a thread boundary. Starting from this index the fluids
377 // belong to another thread. Force a break, if nodes are consecutive.
378 ++indexThread;
379 different = 1;
380 }
381
382 if (different) {
383 ++consecIndex;
384 }
385 }
386
387 if (nFluid > 0) {
388 nConsecNodes = consecIndex + 1;
389 }
390
391 uint32_t * consecNodes;
392 MemAlloc((void **)&consecNodes, sizeof(uint32_t) * nConsecNodes);
393
394 consecIndex = 0;
395
396 if (nFluid > 0) {
397 consecNodes[consecIndex] = 1;
398 }
399
400 indexThread = 1;
401 consecThreadIndices[0] = 0;
402
403 // Loop over adjacency list of all nodes.
404 // Compare if adjacent nodes share the same access pattern.
405 for (int index = 1; index < nFluid; ++index) {
406
407 int different = 0;
408
409 // Loop over all directions except the center one.
410 for(int d = 0; d < N_D3Q19 - 1; ++d) {
411 Assert(d != D3Q19_C);
412
413 if (adjList[index * N_D3Q19_IDX + d] != adjList[(index - 1) * N_D3Q19_IDX + d] + 1) {
414 // Different access pattern.
415 different = 1;
416 break;
417 }
418 }
419
420 if (consecThreadIndices[indexThread] == index) {
421 // We are at a thread boundary. Starting from this index the fluids
422 // belong to another thread. Force a break, if nodes are consecutive.
423 consecThreadIndices[indexThread] = consecIndex + 1;
424 ++indexThread;
425 different = 1;
426 }
427
428 if (different) {
429 ++consecIndex;
430 Assert(consecIndex < nConsecNodes);
431 consecNodes[consecIndex] = 1;
432 }
433 else {
434 Assert(consecIndex < nConsecNodes);
435 consecNodes[consecIndex] += 1;
436 }
437 }
438
439
440 kdlr->ConsecNodes = consecNodes;
441 kdlr->nConsecNodes = nConsecNodes;
442
443 kdlr->ConsecThreadIndices = consecThreadIndices;
444 kdlr->nConsecThreadIndices = nThreads;
445
446 kdlr->FluidNodeThreadIndices = fluidNodeThreadIndices;
447 kdlr->nFluidNodeThreadIndices = nThreads;
448
449 printf("# total fluid nodes: %d consecutive blocks: %d\n", nFluid, nConsecNodes);
450
451 return;
452}
453
454void FNAME(D3Q19ListAaRiaInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
455{
456 KernelData * kd;
457 KernelDataList * kdl;
458 KernelDataListRia * kdlr;
459 MemAlloc((void **)&kdlr, sizeof(KernelDataListRia));
460
461 kd = (KernelData *)kdlr;
462 kdl = KDL(kdlr);
463
464 *kernelData = kd;
465
466#ifdef DEBUG
467 kd->Pdfs[0] = NULL;
468 kd->Pdfs[1] = NULL;
469 kd->PdfsActive = NULL;
470 kd->DstPdfs = NULL;
471 kd->SrcPdfs = NULL;
472 kd->Dims[0] = -1;
473 kd->Dims[1] = -1;
474 kd->Dims[2] = -1;
475 kd->GlobalDims[0] = -1;
476 kd->GlobalDims[1] = -1;
477 kd->GlobalDims[2] = -1;
478 kd->Offsets[0] = -1;
479 kd->Offsets[1] = -1;
480 kd->Offsets[2] = -1;
481
482 kd->ObstIndices = NULL;
483 kd->nObstIndices = -1;
484 kd->BounceBackPdfsSrc = NULL;
485 kd->BounceBackPdfsDst = NULL;
486 kd->nBounceBackPdfs = -1;
487
488 kdl->AdjList = NULL;
489 kdl->Coords = NULL;
490 kdl->Grid = NULL;
491 kdl->nCells = -1;
492 kdl->nFluid = -1;
493
494 kdlr->ConsecNodes = NULL;
495 kdlr->nConsecNodes = 0;
496 kdlr->ConsecThreadIndices = NULL;
497 kdlr->nConsecThreadIndices = 0;
498#endif
499
e3f82424
MW
500 int blk[3] = { 0 };
501 PadInfo * padInfo = NULL;
502
503 ParseParameters(params, blk, &padInfo);
504
10988083
MW
505 // Ajust the dimensions according to padding, if used.
506 kd->Dims[0] = kd->GlobalDims[0] = ld->Dims[0];
507 kd->Dims[1] = kd->GlobalDims[1] = ld->Dims[1];
508 kd->Dims[2] = kd->GlobalDims[2] = ld->Dims[2];
509
510 int * lDims = ld->Dims;
511
512 int lX = lDims[0];
513 int lY = lDims[1];
514 int lZ = lDims[2];
515
516 int nTotalCells = lX * lY * lZ;
517 int nCells = ld->nFluid; // TODO: + padding
518 int nFluid = ld->nFluid;
519
e3f82424
MW
520#ifdef DATA_LAYOUT_SOA
521 {
522 nCells = PadCellsAndReport(nCells, sizeof(PdfT), &padInfo);
523 PadInfoFree(padInfo); padInfo = NULL;
524 }
525#endif
526
10988083
MW
527 kdl->nCells = nCells;
528 kdl->nFluid = nFluid;
529
530 PdfT * pdfs[2];
531
10988083
MW
532
533 if (blk[0] == 0) blk[0] = lX;
534 if (blk[1] == 0) blk[1] = lY;
535 if (blk[2] == 0) blk[2] = lZ;
536
537 printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
538
539 double latMiB = nCells * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
540 double latFluidMib = nFluid * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
541 double latPadMib = (nCells - nFluid) * sizeof(PdfT) * N_D3Q19 / 1024.0 / 1024.0;
542
543 printf("# lattice size: %e MiB\n", latMiB);
544 printf("# fluid lattice size: %e MiB\n", latFluidMib);
545 printf("# lattice padding: %e MiB\n", latPadMib);
546
547#define PAGE_4K 4096
548
549 printf("# aligning lattices to: %d b\n", PAGE_4K);
550
551 MemAllocAligned((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19, PAGE_4K);
552
553 kd->Pdfs[0] = pdfs[0];
554
555 // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
556 // Here we touch only the fluid nodes as this loop is OpenMP parallel and
557 // we want the same scheduling as in the kernel.
558 #ifdef _OPENMP
559 #pragma omp parallel for
560 #endif
561 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
562 pdfs[0][P_INDEX_3(nCells, i, d)] = 1.0;
563 } }
564
565 // Initialize all PDFs to some standard value.
566 for (int i = 0; i < nFluid; ++i) { for(int d = 0; d < N_D3Q19; ++d) {
567 pdfs[0][P_INDEX_3(nCells, i, d)] = 0.0;
568 } }
569
570 // ----------------------------------------------------------------------
571 // create grid which will hold the index numbers of the fluid nodes
572
573 uint32_t * grid;
574
575 if (MemAlloc((void **)&grid, nTotalCells * sizeof(uint32_t))) {
576 printf("ERROR: allocating grid for numbering failed: %lu bytes.\n", nTotalCells * sizeof(uint32_t));
577 exit(1);
578 }
579 kdl->Grid = grid;
580
581 int latticeIndex;
582
583#ifdef DEBUG
584 for(int z = 0; z < lZ; ++z) {
585 for(int y = 0; y < lY; ++y) {
586 for(int x = 0; x < lX; ++x) {
587
588 latticeIndex = L_INDEX_4(ld->Dims, x, y, z);
589
590 grid[latticeIndex] = ~0;
591 }
592 }
593 }
594#endif
595
596 // ----------------------------------------------------------------------
597 // generate numbering over grid
598
599 uint32_t * coords;
600
601 if (MemAlloc((void **)&coords, nFluid * sizeof(uint32_t) * 3)) {
602 printf("ERROR: allocating coords array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * 3);
603 exit(1);
604 }
605
606 kdl->Coords = coords;
607
608 // Index for the PDF nodes can start at 0 as we distinguish solid and fluid nodes
609 // through the ld->Lattice array.
610 int counter = 0;
611
612 // Blocking is implemented via setup of the adjacency list. The kernel later will
613 // walk through the lattice blocked automatically.
10988083 614 for (int bX = 0; bX < lX; bX += blk[0]) {
e3f82424
MW
615 for (int bY = 0; bY < lY; bY += blk[1]) {
616 for (int bZ = 0; bZ < lZ; bZ += blk[2]) {
10988083
MW
617
618 int eX = MIN(bX + blk[0], lX);
619 int eY = MIN(bY + blk[1], lY);
620 int eZ = MIN(bZ + blk[2], lZ);
621
10988083 622 for (int x = bX; x < eX; ++x) {
e3f82424
MW
623 for (int y = bY; y < eY; ++y) {
624 for (int z = bZ; z < eZ; ++z) {
10988083
MW
625
626 latticeIndex = L_INDEX_4(lDims, x, y, z);
627
628 if (ld->Lattice[latticeIndex] != LAT_CELL_OBSTACLE) {
629 grid[latticeIndex] = counter;
630
631 coords[C_INDEX_X(counter)] = x;
632 coords[C_INDEX_Y(counter)] = y;
633 coords[C_INDEX_Z(counter)] = z;
634
635 ++counter;
636 }
637 } } }
638 } } }
639
640 Verify(counter == nFluid);
641
642 uint32_t * adjList;
643
644 double indexMib = nFluid * sizeof(uint32_t) * N_D3Q19_IDX / 1024.0 / 1024.0;
645
646 printf("# index size: %e MiB\n", indexMib);
647
648 // AdjList only requires 18 instead of 19 entries per node, as
649 // the center PDF needs no addressing.
650 if (MemAlloc((void **)&adjList, nFluid * sizeof(uint32_t) * N_D3Q19_IDX)) {
651 printf("ERROR: allocating adjList array failed: %lu bytes.\n", nFluid * sizeof(uint32_t) * N_D3Q19_IDX);
652 exit(1);
653 }
654
655 kdl->AdjList = adjList;
656
657 int x, y, z;
658
659 uint32_t neighborIndex;
660 uint32_t dstIndex;
661
662 int nx, ny, nz, px, py, pz;
663
664 // Loop over all fluid nodes and compute the indices to the neighboring
665 // PDFs for configured data layout (AoS/SoA).
e3f82424
MW
666 #ifdef _OPENMP
667 #pragma omp parallel for
668 #endif
669 for (int index = 0; index < nFluid; ++index) {
670 for (int d = 0; d < N_D3Q19_IDX; ++d) {
671 adjList[index * N_D3Q19_IDX + d] = -1;
672 }
673 }
674
10988083
MW
675 // #ifdef _OPENMP --> add line continuation
676 // #pragma omp parallel for default(none)
677 // shared(nFluid, nCells, coords, D3Q19_INV, D3Q19_X, D3Q19_Y, D3Q19_Z,
678 // stderr,
679 // lDims, grid, ld, lX, lY, lZ, adjList)
680 // private(x, y, z, nx, ny, nz, neighborIndex, dstIndex)
681 // #endif
682 for (int index = 0; index < nFluid; ++index) {
683 x = coords[C_INDEX_X(index)];
684 y = coords[C_INDEX_Y(index)];
685 z = coords[C_INDEX_Z(index)];
686
687 Assert(x >= 0 && x < lX);
688 Assert(y >= 0 && y < lY);
689 Assert(z >= 0 && z < lZ);
690
691 Assert(ld->Lattice[L_INDEX_4(lDims, x, y, z)] != LAT_CELL_OBSTACLE);
692
693 // Loop over all directions except the center one.
694 for(int d = 0; d < N_D3Q19 - 1; ++d) {
695 Assert(d != D3Q19_C);
696
697#ifdef PROP_MODEL_PUSH
698 nx = x + D3Q19_X[d];
699 ny = y + D3Q19_Y[d];
700 nz = z + D3Q19_Z[d];
701
702#elif PROP_MODEL_PULL
703 nx = x - D3Q19_X[d];
704 ny = y - D3Q19_Y[d];
705 nz = z - D3Q19_Z[d];
706#else
707 #error No implementation for this PROP_MODEL_NAME.
708#endif
709 // If the neighbor is outside the latcie in X direction and we have a
710 // periodic boundary then we need to wrap around.
711 if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
712 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
713 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
714 ){
715 // x periodic
716
717 if (nx < 0) {
718 px = lX - 1;
719 }
720 else if (nx >= lX) {
721 px = 0;
722 } else {
723 px = nx;
724 }
725 // y periodic
726 if (ny < 0) {
727 py = lY - 1;
728 }
729 else if (ny >= lY) {
730 py = 0;
731 } else {
732 py = ny;
733 }
734
735 // z periodic
736 if (nz < 0) {
737 pz = lZ - 1;
738 }
739 else if (nz >= lZ) {
740 pz = 0;
741 } else {
742 pz = nz;
743 }
744
745 if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
746 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
747 }
748 else {
749 neighborIndex = grid[L_INDEX_4(lDims, px, py, pz)];
750
751 AssertMsg(neighborIndex != ~0, "Neighbor has no Index. (%d %d %d) direction %s (%d)\n", px, py, pz, D3Q19_NAMES[d], d);
752
753 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
754 }
755 }
756 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
757 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
758 }
759 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
760 dstIndex = P_INDEX_3(nCells, index, D3Q19_INV[d]);
761 }
762 else {
763 neighborIndex = grid[L_INDEX_4(lDims, nx, ny, nz)];
764
765 Assert(neighborIndex != ~0);
766
767 dstIndex = P_INDEX_3(nCells, neighborIndex, d);
768 }
769
770 Assert(dstIndex >= 0);
771 Assert(dstIndex < nCells * N_D3Q19);
772
773 adjList[index * N_D3Q19_IDX + d] = dstIndex;
774 }
775 }
776
777 int nThreads = 1;
778
779#ifdef _OPENMP
780 nThreads = omp_get_max_threads();
781#endif
782
783 SetupConsecNodes(ld, KDLR(kd), nThreads);
784
785 double loopBalanceEven = 2.0 * 19 * sizeof(PdfT);
786 double loopBalanceOdd = 2.0 * 19 * sizeof(PdfT) + (double)kdlr->nConsecNodes / nFluid * (18 * 4.0 + 4.0);
787 double loopBalance = (loopBalanceEven + loopBalanceOdd) / 2.0;
788
789 printf("# loop balance: %.2f B/FLUP even: %.2f B/FLUP odd %.2f B/FLUP\n",
790 loopBalance, loopBalanceEven, loopBalanceOdd);
791
792 // Fill remaining KernelData structures
793 kd->GetNode = GetNode;
794 kd->SetNode = SetNode;
795
796 kd->BoundaryConditionsGetPdf = FNAME(BCGetPdf);
797 kd->BoundaryConditionsSetPdf = FNAME(BCSetPdf);
798
799 kd->Kernel = FNAME(D3Q19ListAaRiaKernel);
800
801 kd->DstPdfs = NULL;
802 kd->PdfsActive = kd->Pdfs[0];
803
804 return;
805}
806
807void FNAME(D3Q19ListAaRiaDeinit)(LatticeDesc * ld, KernelData ** kernelData)
808{
809 KernelDataListRia ** kdlr = (KernelDataListRia **)kernelData;
810
811 MemFree((void **)&((*kdlr)->ConsecNodes));
812
813 if ((*kdlr)->ConsecThreadIndices != NULL) {
814 MemFree((void **)&((*kdlr)->ConsecThreadIndices));
815 }
816
817 if ((*kdlr)->FluidNodeThreadIndices != NULL) {
818 MemFree((void **)&((*kdlr)->FluidNodeThreadIndices));
819 }
820
821 KernelDataList ** kdl = (KernelDataList **)kernelData;
822
823 MemFree((void **)&((*kdl)->AdjList));
824 MemFree((void **)&((*kdl)->Coords));
825 MemFree((void **)&((*kdl)->Grid));
826
827 MemFree((void **)&((*kernelData)->Pdfs[0]));
828
829 MemFree((void **)kernelData);
830 return;
831}
832
This page took 0.123084 seconds and 5 git commands to generate.