add citation information
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19AaVecCommon.c
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12 //
13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
14 //  it under the terms of the GNU General Public License as published by
15 //  the Free Software Foundation, either version 3 of the License, or
16 //  (at your option) any later version.
17 //
18 //  LbmBenchKernels is distributed in the hope that it will be useful,
19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //  GNU General Public License for more details.
22 //
23 //  You should have received a copy of the GNU General Public License
24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
25 //
26 // --------------------------------------------------------------------------
27 #include "BenchKernelD3Q19AaVecCommon.h"
28
29 #include "Memory.h"
30 #include "Vtk.h"
31 #include "Vector.h"
32
33 #include <inttypes.h>
34 #include <math.h>
35
36 #ifdef _OPENMP
37         #include <omp.h>
38 #endif
39
40 // Forward definition.
41 void FNAME(D3Q19AaVecKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd);
42
43
44 static void FNAME(BcGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf)
45 {
46         Assert(kd != NULL);
47         Assert(kd->PdfsActive != NULL);
48         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
49         Assert(pdf != NULL);
50
51         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
52         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
53         Assert(dir >= 0); Assert(dir < N_D3Q19);
54
55         KernelDataAa * kda = KDA(kd);
56
57         int oX = kd->Offsets[0];
58         int oY = kd->Offsets[1];
59         int oZ = kd->Offsets[2];
60
61         if (kda->Iteration % 2 == 0) {
62                 // Pdfs are stored inverse, local PDFs are located in remote nodes
63                 int nx = x - D3Q19_X[dir];
64                 int ny = y - D3Q19_Y[dir];
65                 int nz = z - D3Q19_Z[dir];
66
67                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
68                 *pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, D3Q19_INV[dir])];
69                 #undef I
70         }
71         else {
72                 int nx = x;
73                 int ny = y;
74                 int nz = z;
75
76                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
77                 *pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)];
78                 #undef I
79         }
80
81
82         return;
83 }
84
85 static void FNAME(BcSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf)
86 {
87         Assert(kd != NULL);
88         Assert(kd->PdfsActive != NULL);
89         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
90
91         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
92         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
93         Assert(dir >= 0); Assert(dir < N_D3Q19);
94
95         KernelDataAa * kda = KDA(kd);
96
97         int oX = kd->Offsets[0];
98         int oY = kd->Offsets[1];
99         int oZ = kd->Offsets[2];
100
101         if (kda->Iteration % 2 == 0) {
102                 // Pdfs are stored inverse, local PDFs are located in remote nodes
103                 int nx = x - D3Q19_X[dir];
104                 int ny = y - D3Q19_Y[dir];
105                 int nz = z - D3Q19_Z[dir];
106
107                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
108                 pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, D3Q19_INV[dir])] = pdf;
109                 #undef I
110         }
111         else {
112                 int nx = x;
113                 int ny = y;
114                 int nz = z;
115
116                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
117                 kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)] = pdf;
118                 #undef I
119         }
120
121         return;
122 }
123
124
125 static void FNAME(GetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
126 {
127         Assert(kd != NULL);
128         Assert(kd->PdfsActive != NULL);
129         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
130         Assert(pdfs != NULL);
131
132         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
133         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
134
135         KernelDataAa * kda = KDA(kd);
136
137         int oX = kd->Offsets[0];
138         int oY = kd->Offsets[1];
139         int oZ = kd->Offsets[2];
140
141
142         if (kda->Iteration % 2 == 0) {
143                 // Pdfs are stored inverse, local PDFs are located in remote nodes
144
145                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
146                 #define X(name, idx, idxinv, _x, _y, _z)        pdfs[idx] = kd->PdfsActive[I(x + oX - _x, y + oY - _y, z + oZ - _z, D3Q19_INV[idx])];
147                 D3Q19_LIST
148                 #undef X
149                 #undef I
150         }
151         else {
152                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
153                 #define X(name, idx, idxinv, _x, _y, _z)        pdfs[idx] = kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)];
154                 D3Q19_LIST
155                 #undef X
156                 #undef I
157
158         }
159
160 #if 0           // DETECT NANs
161
162         for (int d = 0; d < 19; ++d) {
163                 if (isnan(pdfs[d])) {
164                         printf("%d %d %d %d nan! get node\n", x, y, z, d);
165
166                         for (int d2 = 0; d2 < 19; ++d2) {
167                                 printf("%d: %e\n", d2, pdfs[d2]);
168                         }
169
170                         exit(1);
171                 }
172         }
173
174 #endif
175
176         return;
177 }
178
179
180 static void FNAME(SetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs)
181 {
182         Assert(kd != NULL);
183         Assert(kd->PdfsActive != NULL);
184         Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]);
185         Assert(pdfs != NULL);
186
187         Assert(x >= 0); Assert(y >= 0); Assert(z >= 0);
188         Assert(x < kd->Dims[0]); Assert(y < kd->Dims[1]); Assert(z < kd->Dims[2]);
189
190         KernelDataAa * kda = KDA(kd);
191
192         int oX = kd->Offsets[0];
193         int oY = kd->Offsets[1];
194         int oZ = kd->Offsets[2];
195
196         if (kda->Iteration % 2 == 0) {
197                 // Pdfs are stored inverse, local PDFs are located in remote nodes
198
199                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
200                 #define X(name, idx, idxinv, _x, _y, _z)        kd->PdfsActive[I(x + oX - _x, y + oY - _y, z + oZ - _z, D3Q19_INV[idx])] = pdfs[idx];
201                 D3Q19_LIST
202                 #undef X
203                 #undef I
204         }
205         else {
206                 #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir))
207                 #define X(name, idx, idxinv, _x, _y, _z)        kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)] = pdfs[idx];
208                 D3Q19_LIST
209                 #undef X
210                 #undef I
211         }
212         return;
213 }
214
215
216 static void ParameterUsage()
217 {
218         printf("Kernel parameters:\n");
219         printf("  [-blk <n>] [-blk-[xyz] <n>]\n");
220
221         return;
222 }
223
224 static void ParseParameters(Parameters * params, int * blk)
225 {
226         Assert(blk != NULL);
227
228         blk[0] = 0; blk[1] = 0; blk[2] = 0;
229
230         #define ARG_IS(param)                   (!strcmp(params->KernelArgs[i], param))
231         #define NEXT_ARG_PRESENT() \
232                 do { \
233                         if (i + 1 >= params->nKernelArgs) { \
234                                 printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \
235                                 exit(1); \
236                         } \
237                 } while (0)
238
239
240         for (int i = 0; i < params->nKernelArgs; ++i) {
241                 if (ARG_IS("-blk") || ARG_IS("--blk")) {
242                         NEXT_ARG_PRESENT();
243
244                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
245
246                         if (tmp < 0) {
247                                 printf("ERROR: blocking parameter must be >= 0.\n");
248                                 exit(1);
249                         }
250
251                         blk[0] = blk[1] = blk[2] = tmp;
252                 }
253                 else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) {
254                         NEXT_ARG_PRESENT();
255
256                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
257
258                         if (tmp < 0) {
259                                 printf("ERROR: blocking parameter must be >= 0.\n");
260                                 exit(1);
261                         }
262
263                         blk[0] = tmp;
264                 }
265                 else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) {
266                         NEXT_ARG_PRESENT();
267
268                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
269
270                         if (tmp < 0) {
271                                 printf("ERROR: blocking parameter must be >= 0.\n");
272                                 exit(1);
273                         }
274
275                         blk[1] = tmp;
276                 }
277                 else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) {
278                         NEXT_ARG_PRESENT();
279
280                         int tmp = strtol(params->KernelArgs[++i], NULL, 0);
281
282                         if (tmp < 0) {
283                                 printf("ERROR: blocking parameter must be >= 0.\n");
284                                 exit(1);
285                         }
286
287                         blk[2] = tmp;
288                 }
289                 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
290                         ParameterUsage();
291                         exit(1);
292                 }
293                 else {
294                         printf("ERROR: unknown kernel parameter.\n");
295                         ParameterUsage();
296                         exit(1);
297                 }
298         }
299
300         #undef ARG_IS
301         #undef NEXT_ARG_PRESENT
302
303         return;
304 }
305
306
307 void FNAME(D3Q19AaVecInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params)
308 {
309         KernelDataAa * kda = NULL;
310         MemAlloc((void **)&kda, sizeof(KernelDataAa));
311
312         kda->Blk[0] = 0; kda->Blk[1] = 0; kda->Blk[2] = 0;
313         kda->Iteration = -1;
314
315         KernelData * kd = &kda->kd;
316         *kernelData = kd;
317
318         kd->nObstIndices = ld->nObst;
319
320         // Ajust the dimensions according to padding, if used.
321         kd->Dims[0] = ld->Dims[0];
322         kd->Dims[1] = ld->Dims[1];
323         kd->Dims[2] = ld->Dims[2];
324
325
326         int * lDims = ld->Dims;
327         int * gDims = kd->GlobalDims;
328
329         Assert(VSIZE <= 4);
330
331         // TODO: only add enough ghost cells so we can compute everything vectorized.
332         gDims[0] = lDims[0] + 2;
333         gDims[1] = lDims[1] + 2;
334         // TODO: fix this for aa-vec2-soa
335         gDims[2] = lDims[2] + 2 + VSIZE - 2; // one ghost cell in front, one in the back, plus at most two at the back for VSIZE = 4
336
337         kd->Offsets[0] = 1;
338         kd->Offsets[1] = 1;
339         kd->Offsets[2] = 1;
340
341         int lX = lDims[0];
342         int lY = lDims[1];
343         int lZ = lDims[2];
344
345         int gX = gDims[0];
346         int gY = gDims[1];
347         int gZ = gDims[2];
348
349         int oX = kd->Offsets[0];
350         int oY = kd->Offsets[1];
351         int oZ = kd->Offsets[2];
352
353         int blk[3] = { 0 };
354
355         int nCells = gX * gY * gZ;
356
357         PdfT * pdfs[2] = { NULL, NULL };
358
359         ParseParameters(params, blk);
360
361         if (blk[2] % VSIZE != 0) {
362                 blk[2] -= blk[2] % VSIZE;
363                 printf("WARNING: blocking factor for z direction must be a multiple of VSIZE = %d, adjusting it to %d.\n", VSIZE, blk[2]);
364         }
365
366         if (blk[0] == 0) blk[0] = gX;
367         if (blk[1] == 0) blk[1] = gY;
368         if (blk[2] == 0) blk[2] = gZ;
369
370         printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]);
371
372         kda->Blk[0] = blk[0]; kda->Blk[1] = blk[1]; kda->Blk[2] = blk[2];
373
374
375         printf("# allocating data for %d LB nodes with padding (%lu bytes = %f MiB for the single lattice)\n",
376                nCells,
377                    sizeof(PdfT) * nCells * N_D3Q19,
378                sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0);
379
380 #define PAGE_4K         4096
381
382         MemAllocAligned((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19, PAGE_4K);
383
384         kd->Pdfs[0] = pdfs[0];
385         kd->Pdfs[1] = NULL;
386
387
388         // Initialize PDFs with some (arbitrary) data for correct NUMA placement.
389         // This depends on the chosen data layout.
390         // The structure of the loop should resemble the same "execution layout"
391         // as in the kernel!
392
393         int nThreads;
394 #ifdef _OPENMP
395         nThreads = omp_get_max_threads();
396 #endif
397
398 #ifdef _OPENMP
399         #pragma omp parallel for \
400                                 shared(gDims, pdfs, \
401                                 oX, oY, oZ, lX, lY, lZ, blk, nThreads, ld)
402 #endif
403         for (int i = 0; i < nThreads; ++i) {
404
405                 int threadStartX = lX / nThreads * i;
406                 int threadEndX   = lX / nThreads * (i + 1);
407
408                 if (lX % nThreads > 0) {
409                         if (lX % nThreads > i) {
410                                 threadStartX += i;
411                                 threadEndX   += i + 1;
412                         }
413                         else {
414                                 threadStartX += lX % nThreads;
415                                 threadEndX   += lX % nThreads;
416                         }
417                 }
418
419                 for (int bX = oX + threadStartX; bX < threadEndX + oX; bX += blk[0]) {
420                 for (int bY = oY; bY < lY + oY; bY += blk[1]) {
421                 for (int bZ = oZ; bZ < lZ + oZ; bZ += blk[2]) {
422
423                         int eX = MIN(bX + blk[0], threadEndX + oX);
424                         int eY = MIN(bY + blk[1], lY + oY);
425                         int eZ = MIN(bZ + blk[2], lZ + oZ);
426
427                         // printf("%d: %d-%d  %d-%d  %d-%d  %d - %d\n", omp_get_thread_num(), bZ, eZ, bY, eY, bX, eX, threadStartX, threadEndX);
428
429                         for (int x = bX; x < eX; ++x) {
430                         for (int y = bY; y < eY; ++y) {
431                         for (int z = bZ; z < eZ; ++z) {
432
433                                 for (int d = 0; d < N_D3Q19; ++d) {
434                                         pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = -50.0;
435                                 }
436
437                         } } } // x, y, z
438                 } } } // bx, by, bz
439         }
440
441
442         // Initialize all PDFs to some standard value.
443         for (int x = oX; x < lX + oX; ++x) {
444         for (int y = oY; y < lY + oY; ++y) {
445         for (int z = oZ; z < lZ + oZ; ++z) {
446                 for (int d = 0; d < N_D3Q19; ++d) {
447                         pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 0.0;
448                 }
449         } } } // x, y, z
450
451
452         // Count how many *PDFs* need bounce back treatment.
453
454         uint64_t nPdfs = ((uint64_t)19) * gX * gY * gZ;
455
456         if (nPdfs > ((2LU << 31) - 1)) {
457                 printf("ERROR: number of PDFs exceed 2^31.\n");
458                 exit(1);
459         }
460
461         // Compiler bug? Incorrect computation of nBounceBackPdfs when using icc 15.0.2.
462         // Works when declaring nBounceBackPdfs as int64_t or using volatile.
463         volatile int nBounceBackPdfs = 0;
464         // int64_t nBounceBackPdfs = 0;
465         int nx, ny, nz, px, py, pz;
466
467
468         for (int x = 0; x < lX; ++x) {
469                 for (int y = 0; y < lY; ++y) {
470                         for (int z = 0; z < lZ; ++z) {
471
472                                 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
473                                         for (int d = 0; d < N_D3Q19; ++d) {
474                                                 nx = x - D3Q19_X[d];
475                                                 ny = y - D3Q19_Y[d];
476                                                 nz = z - D3Q19_Z[d];
477
478                                                 // Check if neighbor is inside the lattice.
479                                                 // if(nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
480                                                 //      continue;
481                                                 // }
482                                                 if ((nx < 0 || nx >= lX) && ld->PeriodicX) {
483                                                         ++nBounceBackPdfs; // Compiler bug --> see above
484                                                 }
485                                                 else if ((ny < 0 || ny >= lY) && ld->PeriodicY) {
486                                                         ++nBounceBackPdfs; // Compiler bug --> see above
487                                                 }
488                                                 else if ((nz < 0 || nz >= lZ) && ld->PeriodicZ) {
489                                                         ++nBounceBackPdfs; // Compiler bug --> see above
490                                                 }
491                                                 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
492                                                         continue;
493                                                 }
494                                                 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
495                                                         ++nBounceBackPdfs; // Compiler bug --> see above
496                                                 }
497                                         }
498                                 }
499                         }
500                 }
501         }
502
503         printf("# allocating %d indices for bounce back pdfs (%s for source and destination array)\n", nBounceBackPdfs, ByteToHuman(sizeof(int) * nBounceBackPdfs * 2));
504
505         MemAlloc((void **) & (kd->BounceBackPdfsSrc), sizeof(int) * nBounceBackPdfs + 100);
506         MemAlloc((void **) & (kd->BounceBackPdfsDst), sizeof(int) * nBounceBackPdfs + 100);
507
508         kd->nBounceBackPdfs = nBounceBackPdfs;
509         nBounceBackPdfs = 0;
510
511         int srcIndex;
512         int dstIndex;
513
514         // TODO: currently this is not NUMA-aware
515     //       - maybe use the same blocking as for lattice initialization?
516         //       - do place the bounce back index vector parallel?
517
518         for (int x = 0; x < lX; ++x) {
519                 for (int y = 0; y < lY; ++y) {
520                         for (int z = 0; z < lZ; ++z) {
521
522                                 if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) {
523                                         for (int d = 0; d < N_D3Q19; ++d) {
524                                                 nx = x + D3Q19_X[d];
525                                                 ny = y + D3Q19_Y[d];
526                                                 nz = z + D3Q19_Z[d];
527
528                                                 if (    ((nx < 0 || nx >= lX) && ld->PeriodicX) ||
529                                                                 ((ny < 0 || ny >= lY) && ld->PeriodicY) ||
530                                                                 ((nz < 0 || nz >= lZ) && ld->PeriodicZ)
531                                                 ){
532                                                         // For periodicity:
533
534                                                         // We assume we have finished odd time step (accessing neighbor PDFs) and are
535                                                         // before executing the even time step (accessing local PDFs only).
536
537                                                         // Assuming we are at the most east position of the lattice. Through the odd
538                                                         // time step propagation has put a PDF in the east slot of the ghost cell east
539                                                         // of us, i.e. nx, ny, nz.  We copy it to the east slot of the most west node.
540
541                                                         // In case of transition from even to odd time step , src and dest must be
542                                                         // exchanged.
543
544
545                                                         // x periodic
546                                                         if (nx < 0) {
547                                                                 px = lX - 1;
548                                                         }
549                                                         else if (nx >= lX) {
550                                                                 px = 0;
551                                                         } else {
552                                                                 px = nx;
553                                                         }
554
555                                                         // y periodic
556                                                         if (ny < 0) {
557                                                                 py = lY - 1;
558                                                         }
559                                                         else if (ny >= lY) {
560                                                                 py = 0;
561                                                         } else {
562                                                                 py = ny;
563                                                         }
564
565                                                         // z periodic
566                                                         if (nz < 0) {
567                                                                 pz = lZ - 1;
568                                                         }
569                                                         else if (nz >= lZ) {
570                                                                 pz = 0;
571                                                         } else {
572                                                                 pz = nz;
573                                                         }
574
575                                                         if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) {
576                                                                 // See description of bounce back handling below.
577                                                                 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
578                                                                 dstIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
579                                                         }
580                                                         else {
581
582                                                                 srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
583                                                                 // Put it on the other side back into the domain.
584                                                                 dstIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d);
585
586                                                                 VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d  xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
587
588                                                         }
589
590                                                         kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
591                                                         kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
592
593                                                         ++nBounceBackPdfs;
594
595                                                 }
596                                                 else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) {
597                                                         continue;
598                                                 }
599                                                 else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) {
600                                                         // Depending on the time step we are in we have to exchange src and dst index.
601
602                                                         // We build the list for the case, when we have finished odd time step
603                                                         // (accessing neighbor PDFs) and before we start with the even time step
604                                                         // (accessing local PDFs only).
605
606                                                         // Assume our neighbor east of us, i.e. nx, ny, nz, is an obstacle cell.
607                                                         // Then we have to move the east PDF from the obstacle to our west position,
608                                                         // i.e. the inverse of east.
609
610                                                         // In case of transition from even to odd time step src and dest just
611                                                         // have to be exchanged.
612
613                                                         srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d);
614                                                         dstIndex = P_INDEX_5(gDims,  x + oX,  y + oY,  z + oZ, D3Q19_INV[d]);
615
616                                                         VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d  xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d);
617
618                                                         kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex;
619                                                         kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex;
620
621                                                         ++nBounceBackPdfs;
622                                                 }
623                                         }
624                                 }
625                         }
626                 }
627         }
628
629
630         // Fill remaining KernelData structures
631         kd->GetNode = FNAME(GetNode);
632         kd->SetNode = FNAME(SetNode);
633
634         kd->BoundaryConditionsGetPdf = FNAME(BcGetPdf);
635         kd->BoundaryConditionsSetPdf = FNAME(BcSetPdf);
636
637         kd->Kernel = FNAME(D3Q19AaVecKernel);
638
639         kd->DstPdfs = NULL;
640         kd->PdfsActive = kd->Pdfs[0];
641
642         return;
643 }
644
645 void FNAME(D3Q19AaVecDeinit)(LatticeDesc * ld, KernelData ** kernelData)
646 {
647         MemFree((void **) & ((*kernelData)->Pdfs[0]));
648
649         MemFree((void **) & ((*kernelData)->BounceBackPdfsSrc));
650         MemFree((void **) & ((*kernelData)->BounceBackPdfsDst));
651
652         MemFree((void **)kernelData);
653
654         return;
655 }
656
This page took 0.086977 seconds and 4 git commands to generate.