add citation information
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19Common.c
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12 //
13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
14 //  it under the terms of the GNU General Public License as published by
15 //  the Free Software Foundation, either version 3 of the License, or
16 //  (at your option) any later version.
17 //
18 //  LbmBenchKernels is distributed in the hope that it will be useful,
19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //  GNU General Public License for more details.
22 //
23 //  You should have received a copy of the GNU General Public License
24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
25 //
26 // --------------------------------------------------------------------------
27 #include "BenchKernelD3Q19Common.h"
28
29 #include "Memory.h"
30 #include "Vtk.h"
31
32 #include <inttypes.h>
33 #include <math.h>
34
35 #ifdef _OPENMP
36         #include <omp.h>
37 #endif
38
39 // Forward definition.
40 void FNAME(D3Q19Kernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
41
42 void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
43
44
45
46 static void FNAME(BcGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
47 {
48         Assert(kd != NULL);
49         Assert(kd->PdfsActive != NULL);
50         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
51         Assert(pdf != NULL);
52
53         Assert(x >= 0);
54         Assert(y >= 0);
55         Assert(z >= 0);
56         Assert(x < kd->Dims[0]);
57         Assert(y < kd->Dims[1]);
58         Assert(z < kd->Dims[2]);
59         Assert(dir >= 0);
60         Assert(dir < N_D3Q19);
61
62         int oX = kd->Offsets[0];
63         int oY = kd->Offsets[1];
64         int oZ = kd->Offsets[2];
65
66 #ifdef PROP_MODEL_PUSH
67         int nx = x;
68         int ny = y;
69         int nz = z;
70 #elif PROP_MODEL_PULL
71         int nx = x - D3Q19_X[dir];
72         int ny = y - D3Q19_Y[dir];
73         int nz = z - D3Q19_Z[dir];
74 #endif
75
76         #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
77         *pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)];
78         #undef I
79
80         return;
81 }
82
83 static void FNAME(BcSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
84 {
85         Assert(kd != NULL);
86         Assert(kd->PdfsActive != NULL);
87         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
88         Assert(x >= 0);
89         Assert(y >= 0);
90         Assert(z >= 0);
91         Assert(x < kd->Dims[0]);
92         Assert(y < kd->Dims[1]);
93         Assert(z < kd->Dims[2]);
94         Assert(dir >= 0);
95         Assert(dir < N_D3Q19);
96
97         int oX = kd->Offsets[0];
98         int oY = kd->Offsets[1];
99         int oZ = kd->Offsets[2];
100
101 #ifdef PROP_MODEL_PUSH
102         int nx = x;
103         int ny = y;
104         int nz = z;
105 #elif PROP_MODEL_PULL
106         int nx = x - D3Q19_X[dir];
107         int ny = y - D3Q19_Y[dir];
108         int nz = z - D3Q19_Z[dir];
109 #endif
110
111         #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
112         kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)] = pdf;
113         #undef I
114
115
116         return;
117 }
118
119
120 static void FNAME(GetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
121 {
122         Assert(kd != NULL);
123         Assert(kd->PdfsActive != NULL);
124         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
125         Assert(pdfs != NULL);
126         Assert(x >= 0);
127         Assert(y >= 0);
128         Assert(z >= 0);
129         Assert(x < kd->Dims[0]);
130         Assert(y < kd->Dims[1]);
131         Assert(z < kd->Dims[2]);
132
133         int oX = kd->Offsets[0];
134         int oY = kd->Offsets[1];
135         int oZ = kd->Offsets[2];
136
137
138         #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
139 #ifdef PROP_MODEL_PUSH
140         #define X(name, idx, idxinv, _x, _y, _z)        pdfs[idx] = kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)];
141 #elif PROP_MODEL_PULL
142         #define X(name, idx, idxinv, _x, _y, _z)        pdfs[idx] = kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)];
143 #endif
144         D3Q19_LIST
145         #undef X
146         #undef I
147
148 #if 0           // DETECT NANs
149
150         for (int d = 0; d < 19; ++d) {
151                 if (isnan(pdfs[d])) {
152                         printf("%d %d %d %d nan! get node\n", x, y, z, d);
153
154                         for (int d2 = 0; d2 < 19; ++d2) {
155                                 printf("%d: %e\n", d2, pdfs[d2]);
156                         }
157
158                         exit(1);
159                 }
160         }
161
162 #endif
163
164         return;
165 }
166
167
168 static void FNAME(SetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
169 {
170         Assert(kd != NULL);
171         Assert(kd->PdfsActive != NULL);
172         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
173         Assert(pdfs != NULL);
174
175         Assert(x >= 0);
176         Assert(y >= 0);
177         Assert(z >= 0);
178         Assert(x < kd->Dims[0]);
179         Assert(y < kd->Dims[1]);
180         Assert(z < kd->Dims[2]);
181
182         int oX = kd->Offsets[0];
183         int oY = kd->Offsets[1];
184         int oZ = kd->Offsets[2];
185
186         #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
187 #ifdef PROP_MODEL_PUSH
188         #define X(name, idx, idxinv, _x, _y, _z)        kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)] = pdfs[idx];
189 #elif PROP_MODEL_PULL
190         #define X(name, idx, idxinv, _x, _y, _z)        kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)] = pdfs[idx];
191 #endif
192         D3Q19_LIST
193         #undef X
194         #undef I
195
196         return;
197 }
198
199
200 static void ParameterUsage()
201 {
202         printf("Kernel parameters:\n");
203         printf("  [-blk <n>] [-blk-[xyz] <n>]\n");
204
205         return;
206 }
207
208 static void ParseParameters(Parameters * params, int * blk)
209 {
210         Assert(blk != NULL);
211
212         blk[0] = 0; blk[1] = 0; blk[2] = 0;
213
214         #define ARG_IS(param)                   (!strcmp(params->KernelArgs[i], param))
215         #define NEXT_ARG_PRESENT() \
216                 do { \
217                         if (i + 1 >= params->nKernelArgs) { \
218                                 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
219                                 exit(1); \
220                         } \
221                 } while (0)
222
223
224         for (int i = 0; i < params->nKernelArgs; ++i) {
225                 if (ARG_IS("-blk") || ARG_IS("--blk")) {
226                         NEXT_ARG_PRESENT();
227
228                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
229
230                         if (tmp < 0) {
231                                 printf("ERROR: blocking parameter must be >= 0.\n");
232                                 exit(1);
233                         }
234
235                         blk[0] = blk[1] = blk[2] = tmp;
236                 }
237                 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
238                         NEXT_ARG_PRESENT();
239
240                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
241
242                         if (tmp < 0) {
243                                 printf("ERROR: blocking parameter must be >= 0.\n");
244                                 exit(1);
245                         }
246
247                         blk[0] = tmp;
248                 }
249                 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
250                         NEXT_ARG_PRESENT();
251
252                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
253
254                         if (tmp < 0) {
255                                 printf("ERROR: blocking parameter must be >= 0.\n");
256                                 exit(1);
257                         }
258
259                         blk[1] = tmp;
260                 }
261                 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
262                         NEXT_ARG_PRESENT();
263
264                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
265
266                         if (tmp < 0) {
267                                 printf("ERROR: blocking parameter must be >= 0.\n");
268                                 exit(1);
269                         }
270
271                         blk[2] = tmp;
272                 }
273                 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
274                         ParameterUsage();
275                         exit(1);
276                 }
277                 else {
278                         printf("ERROR: unknown kernel parameter.\n");
279                         ParameterUsage();
280                         exit(1);
281                 }
282         }
283
284         #undef ARG_IS
285         #undef NEXT_ARG_PRESENT
286
287         return;
288 }
289
290
291 static void D3Q19InternalInit(int blocking, LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
292 {
293         KernelDataEx * kdex = NULL;
294         MemAlloc((void **)&kdex, sizeof(KernelDataEx));
295
296         kdex->Blk[0] = 0; kdex->Blk[1] = 0; kdex->Blk[2] = 0;
297
298         KernelData * kd = &kdex->kd;
299         *kernelData = kd;
300
301         kd->nObstIndices = ld->nObst;
302
303         // Ajust the dimensions according to padding, if used.
304         kd->Dims[0] = ld->Dims[0];
305         kd->Dims[1] = ld->Dims[1];
306         kd->Dims[2] = ld->Dims[2];
307
308
309         int * lDims = ld->Dims;
310         int * gDims = kd->GlobalDims;
311
312         gDims[0] = lDims[0] + 2;
313         gDims[1] = lDims[1] + 2;
314         gDims[2] = lDims[2] + 2;
315
316         kd->Offsets[0] = 1;
317         kd->Offsets[1] = 1;
318         kd->Offsets[2] = 1;
319
320         int lX = lDims[0];
321         int lY = lDims[1];
322         int lZ = lDims[2];
323
324         int gX = gDims[0];
325         int gY = gDims[1];
326         int gZ = gDims[2];
327
328         int oX = kd->Offsets[0];
329         int oY = kd->Offsets[1];
330         int oZ = kd->Offsets[2];
331
332         int blk[3] = { 0 };
333
334         int nCells = gX * gY * gZ;
335
336         PdfT * pdfs[2];
337
338         if (blocking) {
339                 ParseParameters(params, blk);
340         }
341         else {
342                 if (params->nKernelArgs != 0) {
343                         printf("ERROR: unknown kernel parameter.\n");
344                         printf("This kernels accepts no parameters.\n");
345                         exit(1);
346                 }
347         }
348
349
350         if (blk[0] == 0) blk[0] = gX;
351         if (blk[1] == 0) blk[1] = gY;
352         if (blk[2] == 0) blk[2] = gZ;
353
354         printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
355
356
357         kdex->Blk[0] = blk[0]; kdex->Blk[1] = blk[1]; kdex->Blk[2] = blk[2];
358
359
360         printf("# allocating data for %d LB nodes with padding (%lu bytes = %f MiB for both lattices)\n",
361                nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19,
362                2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
363
364         // MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19);
365         // MemAlloc((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19);
366         MemAllocAligned((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19, 2 * 1024 * 1024);
367         MemAllocAligned((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19, 2 * 1024 * 1024);
368
369         printf("# pdfs[0] = %p\n", pdfs[0]);
370         printf("# pdfs[1] = %p\n", pdfs[1]);
371
372         kd->Pdfs[0] = pdfs[0];
373         kd->Pdfs[1] = pdfs[1];
374
375         // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
376         // This depends on the chosen data layout.
377         // The structure of the loop should resemble the same "execution layout"
378         // as in the kernel!
379
380         if (blocking) {
381                 int nX = ld->Dims[0];
382                 int nY = ld->Dims[1];
383                 int nZ = ld->Dims[2];
384
385                 int nThreads = 1;
386
387                 #ifdef _OPENMP
388                 nThreads = omp_get_max_threads();
389                 #endif
390
391                 #ifdef _OPENMP
392                         #pragma omp parallel for default(none) \
393                                 shared(pdfs, gDims, nX, nY, nZ, oX, oY, oZ, blk, nThreads)
394                 #endif
395                 for (int i = 0; i < nThreads; ++i) {
396
397                         int threadStartX = nX / nThreads * i;
398                         int threadEndX   = nX / nThreads * (i + 1);
399
400                         if (nX % nThreads > 0) {
401                                 if (nX % nThreads > i) {
402                                         threadStartX += i;
403                                         threadEndX   += i + 1;
404                                 }
405                                 else {
406                                         threadStartX += nX % nThreads;
407                                         threadEndX   += nX % nThreads;
408                                 }
409                         }
410
411                         for (int bX = oX + threadStartX; bX < threadEndX + oX; bX += blk[0]) {
412                                 for (int bY = oY; bY < nY + oY; bY += blk[1]) {
413                                         for (int bZ = oZ; bZ < nZ + oZ; bZ += blk[2]) {
414
415                                                 int eZ = MIN(bZ + blk[2], nZ + oZ);
416                                                 int eY = MIN(bY + blk[1], nY + oY);
417                                                 int eX = MIN(bX + blk[0], threadEndX + oX);
418
419                                                 for (int x = bX; x < eX; ++x) {
420                                                         for (int y = bY; y < eY; ++y) {
421                                                                 for (int z = bZ; z < eZ; ++z) {
422                                                                         for (int d = 0; d < N_D3Q19; ++d) {
423                                                                                 pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
424                                                                                 pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
425                                                                         }
426                                                                 }
427                                                         }
428                                                 }
429                                         }
430                                 }
431                         }
432                 }
433
434         }
435         else {
436                 #ifdef _OPENMP
437                         #pragma omp parallel for collapse(3)
438                 #endif
439                 for (int x = 0; x < gX; ++x) {
440                         for (int y = 0; y < gY; ++y) {
441                                 for (int z = 0; z < gZ; ++z) {
442                                         for (int d = 0; d < N_D3Q19; ++d) {
443                                                 pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
444                                                 pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 1.0;
445                                         }
446                                 }
447                         }
448                 }
449         }
450
451         // Initialize all PDFs to some standard value.
452         for (int x = 0; x < gX; ++x) {
453                 for (int y = 0; y < gY; ++y) {
454                         for (int z = 0; z < gZ; ++z) {
455                                 for (int d = 0; d < N_D3Q19; ++d) {
456                                         pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 0.0;
457                                         pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 0.0;
458                                 }
459                         }
460                 }
461         }
462
463
464         // Count how many *PDFs* need bounce back treatment.
465
466         uint64_t nPdfs = ((uint64_t)19) * gX * gY * gZ;
467
468         if (nPdfs > ((2LU << 31) - 1)) {
469                 printf("ERROR: number of PDFs exceed 2^31.\n");
470                 exit(1);
471         }
472
473         // Compiler bug? Incorrect computation of nBounceBackPdfs when using icc 15.0.2.
474         // Works when declaring nBounceBackPdfs as int64_t or using volatile.
475         volatile int nBounceBackPdfs = 0;
476         // int64_t nBounceBackPdfs = 0;
477         int nx, ny, nz, px, py, pz;
478
479         // TODO: apply blocking?
480
481         for (int x = 0; x < lX; ++x) {
482                 for (int y = 0; y < lY; ++y) {
483                         for (int z = 0; z < lZ; ++z) {
484
485                                 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
486                                         for (int d = 0; d < N_D3Q19; ++d) {
487 #ifdef PROP_MODEL_PUSH
488                                                 nx = x + D3Q19_X[d];
489                                                 ny = y + D3Q19_Y[d];
490                                                 nz = z + D3Q19_Z[d];
491 #elif PROP_MODEL_PULL
492                                                 nx = x - D3Q19_X[d];
493                                                 ny = y - D3Q19_Y[d];
494                                                 nz = z - D3Q19_Z[d];
495 #else
496         #error PROP_MODEL_NAME unknown.
497 #endif
498                                                 // Check if neighbor is inside the lattice.
499                                                 // if(nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
500                                                 //      continue;
501                                                 // }
502                                                 if ((nx < 0 || nx >= lX) && ld->PeriodicX) {
503                                                         ++nBounceBackPdfs; // Compiler bug --> see above
504                                                 }
505                                                 else if ((ny < 0 || ny >= lY) && ld->PeriodicY) {
506                                                         ++nBounceBackPdfs; // Compiler bug --> see above
507                                                 }
508                                                 else if ((nz < 0 || nz >= lZ) && ld->PeriodicZ) {
509                                                         ++nBounceBackPdfs; // Compiler bug --> see above
510                                                 }
511                                                 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
512                                                         continue;
513                                                 }
514                                                 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
515                                                         ++nBounceBackPdfs; // Compiler bug --> see above
516                                                 }
517                                         }
518                                 }
519                         }
520                 }
521         }
522
523
524         printf("# allocating %d indices for bounce back pdfs (%s for source and destination array)\n", nBounceBackPdfs, ByteToHuman(sizeof(int) * nBounceBackPdfs * 2));
525
526         MemAlloc((void **) & (kd->BounceBackPdfsSrc), sizeof(int) * nBounceBackPdfs + 100);
527         MemAlloc((void **) & (kd->BounceBackPdfsDst), sizeof(int) * nBounceBackPdfs + 100);
528
529         kd->nBounceBackPdfs = nBounceBackPdfs;
530         nBounceBackPdfs = 0;
531
532         int srcIndex;
533         int dstIndex;
534
535         for (int x = 0; x < lX; ++x) {
536                 for (int y = 0; y < lY; ++y) {
537                         for (int z = 0; z < lZ; ++z) {
538
539                                 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
540                                         for (int d = 0; d < N_D3Q19; ++d) {
541 #ifdef PROP_MODEL_PUSH
542                                                 nx = x + D3Q19_X[d];
543                                                 ny = y + D3Q19_Y[d];
544                                                 nz = z + D3Q19_Z[d];
545 #elif PROP_MODEL_PULL
546                                                 nx = x - D3Q19_X[d];
547                                                 ny = y - D3Q19_Y[d];
548                                                 nz = z - D3Q19_Z[d];
549 #else
550         #error PROP_MODEL_NAME unknown.
551 #endif
552
553                                                 if (    ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
554                                                                 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
555                                                                 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
556                                                 ){
557                                                         // Implement periodic boundary in X direction.
558
559                                                         // If the target node reached through propagation is outside the lattice
560                                                         // the kernel stores it in some buffer around the domain.
561                                                         // From this position the PDF must be transported to the other side of the
562                                                         // geometry.
563
564                                                         // Take PDF from outside the domain.
565
566                                                         // x periodic
567                                                         if (nx < 0) {
568                                                                 px = lX - 1;
569                                                         }
570                                                         else if (nx >= lX) {
571                                                                 px = 0;
572                                                         } else {
573                                                                 px = nx;
574                                                         }
575
576                                                         // y periodic
577                                                         if (ny < 0) {
578                                                                 py = lY - 1;
579                                                         }
580                                                         else if (ny >= lY) {
581                                                                 py = 0;
582                                                         } else {
583                                                                 py = ny;
584                                                         }
585
586                                                         // z periodic
587                                                         if (nz < 0) {
588                                                                 pz = lZ - 1;
589                                                         }
590                                                         else if (nz >= lZ) {
591                                                                 pz = 0;
592                                                         } else {
593                                                                 pz = nz;
594                                                         }
595
596                                                         if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
597 #ifdef PROP_MODEL_PUSH
598                                                                 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
599                                                                 dstIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
600 #elif PROP_MODEL_PULL
601                                                                 srcIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
602                                                                 dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
603 #endif
604                                                         }
605                                                         else {
606
607 #ifdef PROP_MODEL_PUSH
608                                                                 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
609                                                                 // Put it on the other side back into the domain.
610                                                                 dstIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d);
611 #elif PROP_MODEL_PULL
612                                                                 srcIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d);
613                                                                 // Put it on the other side back into the ghost layer.
614                                                                 dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
615 #endif
616
617                                                                 VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d  xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
618
619                                                         }
620
621                                                         kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
622                                                         kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
623
624                                                         ++nBounceBackPdfs;
625
626                                                 }
627                                                 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
628                                                         continue;
629                                                 }
630                                                 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
631 #ifdef PROP_MODEL_PUSH
632                                                         srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
633                                                         dstIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
634 #elif PROP_MODEL_PULL
635                                                         srcIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
636                                                         dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
637                                                         // srcIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, d);
638                                                         // dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, D3Q19_INV[d]);
639 #endif
640
641                                                         VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d  xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
642
643                                                         kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
644                                                         kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
645
646                                                         ++nBounceBackPdfs;
647                                                 }
648                                         }
649                                 }
650                         }
651                 }
652         }
653
654
655         // Fill remaining KernelData structures
656         kd->GetNode = FNAME(GetNode);
657         kd->SetNode = FNAME(SetNode);
658
659         kd->BoundaryConditionsGetPdf = FNAME(BcGetPdf);
660         kd->BoundaryConditionsSetPdf = FNAME(BcSetPdf);
661
662         if (blocking) {
663                 kd->Kernel = FNAME(D3Q19BlkKernel);
664         }
665         else {
666                 kd->Kernel = FNAME(D3Q19Kernel);
667         }
668
669         kd->DstPdfs = NULL;
670         kd->PdfsActive = kd->Pdfs[0];
671
672         return;
673 }
674
675
676 void FNAME(D3Q19BlkInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
677 {
678         D3Q19InternalInit(1, ld, kernelData, params);
679 }
680
681
682 void FNAME(D3Q19BlkDeinit)(LatticeDesc * ld, KernelData ** kernelData)
683 {
684         MemFree((void **) & ((*kernelData)->Pdfs[0]));
685         MemFree((void **) & ((*kernelData)->Pdfs[1]));
686
687         MemFree((void **) & ((*kernelData)->BounceBackPdfsSrc));
688         MemFree((void **) & ((*kernelData)->BounceBackPdfsDst));
689
690         MemFree((void **)kernelData);
691
692         return;
693 }
694
695 // Kernels without blocking perform the same initialization/deinitialization as with
696 // blocking, except that a different kernel is called. Hence, no arguments are allowed.
697
698 void FNAME(D3Q19Init)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
699 {
700         D3Q19InternalInit(0, ld, kernelData, params);
701
702 }
703
704 void FNAME(D3Q19Deinit)(LatticeDesc * ld, KernelData ** kernelData)
705 {
706         FNAME(D3Q19BlkDeinit)(ld, kernelData);
707         return;
708 }
This page took 0.083248 seconds and 4 git commands to generate.