Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
12 | // | |
13 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
14 | // it under the terms of the GNU General Public License as published by | |
15 | // the Free Software Foundation, either version 3 of the License, or | |
16 | // (at your option) any later version. | |
17 | // | |
18 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | // GNU General Public License for more details. | |
22 | // | |
23 | // You should have received a copy of the GNU General Public License | |
24 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
25 | // | |
26 | // -------------------------------------------------------------------------- | |
27 | #include "BenchKernelD3Q19Common.h" | |
28 | ||
29 | #include "Memory.h" | |
30 | #include "Vtk.h" | |
31 | ||
32 | #include <inttypes.h> | |
33 | #include <math.h> | |
34 | ||
e3f82424 MW |
35 | #ifdef _OPENMP |
36 | #include <omp.h> | |
37 | #endif | |
10988083 MW |
38 | |
39 | // Forward definition. | |
40 | void FNAME(D3Q19Kernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd); | |
41 | ||
42 | void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, struct KernelData_ * kd, CaseData * cd); | |
43 | ||
44 | ||
45 | ||
46 | static void FNAME(BcGetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT * pdf) | |
47 | { | |
48 | Assert(kd != NULL); | |
49 | Assert(kd->PdfsActive != NULL); | |
50 | Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]); | |
51 | Assert(pdf != NULL); | |
52 | ||
53 | Assert(x >= 0); | |
54 | Assert(y >= 0); | |
55 | Assert(z >= 0); | |
56 | Assert(x < kd->Dims[0]); | |
57 | Assert(y < kd->Dims[1]); | |
58 | Assert(z < kd->Dims[2]); | |
59 | Assert(dir >= 0); | |
60 | Assert(dir < N_D3Q19); | |
61 | ||
62 | int oX = kd->Offsets[0]; | |
63 | int oY = kd->Offsets[1]; | |
64 | int oZ = kd->Offsets[2]; | |
65 | ||
66 | #ifdef PROP_MODEL_PUSH | |
67 | int nx = x; | |
68 | int ny = y; | |
69 | int nz = z; | |
70 | #elif PROP_MODEL_PULL | |
71 | int nx = x - D3Q19_X[dir]; | |
72 | int ny = y - D3Q19_Y[dir]; | |
73 | int nz = z - D3Q19_Z[dir]; | |
74 | #endif | |
75 | ||
76 | #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir)) | |
77 | *pdf = kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)]; | |
78 | #undef I | |
79 | ||
80 | return; | |
81 | } | |
82 | ||
83 | static void FNAME(BcSetPdf)(KernelData * kd, int x, int y, int z, int dir, PdfT pdf) | |
84 | { | |
85 | Assert(kd != NULL); | |
86 | Assert(kd->PdfsActive != NULL); | |
87 | Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]); | |
88 | Assert(x >= 0); | |
89 | Assert(y >= 0); | |
90 | Assert(z >= 0); | |
91 | Assert(x < kd->Dims[0]); | |
92 | Assert(y < kd->Dims[1]); | |
93 | Assert(z < kd->Dims[2]); | |
94 | Assert(dir >= 0); | |
95 | Assert(dir < N_D3Q19); | |
96 | ||
97 | int oX = kd->Offsets[0]; | |
98 | int oY = kd->Offsets[1]; | |
99 | int oZ = kd->Offsets[2]; | |
100 | ||
101 | #ifdef PROP_MODEL_PUSH | |
102 | int nx = x; | |
103 | int ny = y; | |
104 | int nz = z; | |
105 | #elif PROP_MODEL_PULL | |
106 | int nx = x - D3Q19_X[dir]; | |
107 | int ny = y - D3Q19_Y[dir]; | |
108 | int nz = z - D3Q19_Z[dir]; | |
109 | #endif | |
110 | ||
111 | #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir)) | |
112 | kd->PdfsActive[I(nx + oX, ny + oY, nz + oZ, dir)] = pdf; | |
113 | #undef I | |
114 | ||
115 | ||
116 | return; | |
117 | } | |
118 | ||
119 | ||
120 | static void FNAME(GetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs) | |
121 | { | |
122 | Assert(kd != NULL); | |
123 | Assert(kd->PdfsActive != NULL); | |
124 | Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]); | |
125 | Assert(pdfs != NULL); | |
126 | Assert(x >= 0); | |
127 | Assert(y >= 0); | |
128 | Assert(z >= 0); | |
129 | Assert(x < kd->Dims[0]); | |
130 | Assert(y < kd->Dims[1]); | |
131 | Assert(z < kd->Dims[2]); | |
132 | ||
133 | int oX = kd->Offsets[0]; | |
134 | int oY = kd->Offsets[1]; | |
135 | int oZ = kd->Offsets[2]; | |
136 | ||
137 | ||
138 | #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir)) | |
139 | #ifdef PROP_MODEL_PUSH | |
140 | #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)]; | |
141 | #elif PROP_MODEL_PULL | |
142 | #define X(name, idx, idxinv, _x, _y, _z) pdfs[idx] = kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)]; | |
143 | #endif | |
144 | D3Q19_LIST | |
145 | #undef X | |
146 | #undef I | |
147 | ||
148 | #if 0 // DETECT NANs | |
149 | ||
150 | for (int d = 0; d < 19; ++d) { | |
151 | if (isnan(pdfs[d])) { | |
152 | printf("%d %d %d %d nan! get node\n", x, y, z, d); | |
153 | ||
154 | for (int d2 = 0; d2 < 19; ++d2) { | |
155 | printf("%d: %e\n", d2, pdfs[d2]); | |
156 | } | |
157 | ||
158 | exit(1); | |
159 | } | |
160 | } | |
161 | ||
162 | #endif | |
163 | ||
164 | return; | |
165 | } | |
166 | ||
167 | ||
168 | static void FNAME(SetNode)(KernelData * kd, int x, int y, int z, PdfT * pdfs) | |
169 | { | |
170 | Assert(kd != NULL); | |
171 | Assert(kd->PdfsActive != NULL); | |
172 | Assert(kd->PdfsActive == kd->Pdfs[0] || kd->PdfsActive == kd->Pdfs[1]); | |
173 | Assert(pdfs != NULL); | |
174 | ||
175 | Assert(x >= 0); | |
176 | Assert(y >= 0); | |
177 | Assert(z >= 0); | |
178 | Assert(x < kd->Dims[0]); | |
179 | Assert(y < kd->Dims[1]); | |
180 | Assert(z < kd->Dims[2]); | |
181 | ||
182 | int oX = kd->Offsets[0]; | |
183 | int oY = kd->Offsets[1]; | |
184 | int oZ = kd->Offsets[2]; | |
185 | ||
186 | #define I(x, y, z, dir) P_INDEX_5(kd->GlobalDims, (x), (y), (z), (dir)) | |
187 | #ifdef PROP_MODEL_PUSH | |
188 | #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x + oX, y + oY, z + oZ, idx)] = pdfs[idx]; | |
189 | #elif PROP_MODEL_PULL | |
190 | #define X(name, idx, idxinv, _x, _y, _z) kd->PdfsActive[I(x + oX - (_x), y + oY - (_y), z + oZ - (_z), idx)] = pdfs[idx]; | |
191 | #endif | |
192 | D3Q19_LIST | |
193 | #undef X | |
194 | #undef I | |
195 | ||
196 | return; | |
197 | } | |
198 | ||
199 | ||
200 | static void ParameterUsage() | |
201 | { | |
202 | printf("Kernel parameters:\n"); | |
203 | printf(" [-blk <n>] [-blk-[xyz] <n>]\n"); | |
204 | ||
205 | return; | |
206 | } | |
207 | ||
208 | static void ParseParameters(Parameters * params, int * blk) | |
209 | { | |
210 | Assert(blk != NULL); | |
211 | ||
212 | blk[0] = 0; blk[1] = 0; blk[2] = 0; | |
213 | ||
214 | #define ARG_IS(param) (!strcmp(params->KernelArgs[i], param)) | |
215 | #define NEXT_ARG_PRESENT() \ | |
216 | do { \ | |
217 | if (i + 1 >= params->nKernelArgs) { \ | |
218 | printf("ERROR: argument %s requires a parameter.\n", params->KernelArgs[i]); \ | |
219 | exit(1); \ | |
220 | } \ | |
221 | } while (0) | |
222 | ||
223 | ||
224 | for (int i = 0; i < params->nKernelArgs; ++i) { | |
225 | if (ARG_IS("-blk") || ARG_IS("--blk")) { | |
226 | NEXT_ARG_PRESENT(); | |
227 | ||
228 | int tmp = strtol(params->KernelArgs[++i], NULL, 0); | |
229 | ||
e3f82424 MW |
230 | if (tmp < 0) { |
231 | printf("ERROR: blocking parameter must be >= 0.\n"); | |
10988083 MW |
232 | exit(1); |
233 | } | |
234 | ||
235 | blk[0] = blk[1] = blk[2] = tmp; | |
236 | } | |
237 | else if (ARG_IS("-blk-x") || ARG_IS("--blk-x")) { | |
238 | NEXT_ARG_PRESENT(); | |
239 | ||
240 | int tmp = strtol(params->KernelArgs[++i], NULL, 0); | |
241 | ||
e3f82424 MW |
242 | if (tmp < 0) { |
243 | printf("ERROR: blocking parameter must be >= 0.\n"); | |
10988083 MW |
244 | exit(1); |
245 | } | |
246 | ||
247 | blk[0] = tmp; | |
248 | } | |
249 | else if (ARG_IS("-blk-y") || ARG_IS("--blk-y")) { | |
250 | NEXT_ARG_PRESENT(); | |
251 | ||
252 | int tmp = strtol(params->KernelArgs[++i], NULL, 0); | |
253 | ||
e3f82424 MW |
254 | if (tmp < 0) { |
255 | printf("ERROR: blocking parameter must be >= 0.\n"); | |
10988083 MW |
256 | exit(1); |
257 | } | |
258 | ||
259 | blk[1] = tmp; | |
260 | } | |
261 | else if (ARG_IS("-blk-z") || ARG_IS("--blk-z")) { | |
262 | NEXT_ARG_PRESENT(); | |
263 | ||
264 | int tmp = strtol(params->KernelArgs[++i], NULL, 0); | |
265 | ||
e3f82424 MW |
266 | if (tmp < 0) { |
267 | printf("ERROR: blocking parameter must be >= 0.\n"); | |
10988083 MW |
268 | exit(1); |
269 | } | |
270 | ||
271 | blk[2] = tmp; | |
272 | } | |
273 | else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) { | |
274 | ParameterUsage(); | |
275 | exit(1); | |
276 | } | |
277 | else { | |
278 | printf("ERROR: unknown kernel parameter.\n"); | |
279 | ParameterUsage(); | |
280 | exit(1); | |
281 | } | |
282 | } | |
283 | ||
284 | #undef ARG_IS | |
285 | #undef NEXT_ARG_PRESENT | |
286 | ||
287 | return; | |
288 | } | |
289 | ||
290 | ||
e3f82424 | 291 | static void D3Q19InternalInit(int blocking, LatticeDesc * ld, KernelData ** kernelData, Parameters * params) |
10988083 MW |
292 | { |
293 | KernelDataEx * kdex = NULL; | |
294 | MemAlloc((void **)&kdex, sizeof(KernelDataEx)); | |
295 | ||
296 | kdex->Blk[0] = 0; kdex->Blk[1] = 0; kdex->Blk[2] = 0; | |
297 | ||
298 | KernelData * kd = &kdex->kd; | |
299 | *kernelData = kd; | |
300 | ||
301 | kd->nObstIndices = ld->nObst; | |
302 | ||
303 | // Ajust the dimensions according to padding, if used. | |
304 | kd->Dims[0] = ld->Dims[0]; | |
305 | kd->Dims[1] = ld->Dims[1]; | |
306 | kd->Dims[2] = ld->Dims[2]; | |
307 | ||
308 | ||
309 | int * lDims = ld->Dims; | |
310 | int * gDims = kd->GlobalDims; | |
311 | ||
312 | gDims[0] = lDims[0] + 2; | |
313 | gDims[1] = lDims[1] + 2; | |
314 | gDims[2] = lDims[2] + 2; | |
315 | ||
316 | kd->Offsets[0] = 1; | |
317 | kd->Offsets[1] = 1; | |
318 | kd->Offsets[2] = 1; | |
319 | ||
320 | int lX = lDims[0]; | |
321 | int lY = lDims[1]; | |
322 | int lZ = lDims[2]; | |
323 | ||
324 | int gX = gDims[0]; | |
325 | int gY = gDims[1]; | |
326 | int gZ = gDims[2]; | |
327 | ||
328 | int oX = kd->Offsets[0]; | |
329 | int oY = kd->Offsets[1]; | |
330 | int oZ = kd->Offsets[2]; | |
331 | ||
332 | int blk[3] = { 0 }; | |
333 | ||
334 | int nCells = gX * gY * gZ; | |
335 | ||
336 | PdfT * pdfs[2]; | |
337 | ||
e3f82424 MW |
338 | if (blocking) { |
339 | ParseParameters(params, blk); | |
340 | } | |
341 | else { | |
342 | if (params->nKernelArgs != 0) { | |
343 | printf("ERROR: unknown kernel parameter.\n"); | |
344 | printf("This kernels accepts no parameters.\n"); | |
345 | exit(1); | |
346 | } | |
347 | } | |
348 | ||
10988083 MW |
349 | |
350 | if (blk[0] == 0) blk[0] = gX; | |
351 | if (blk[1] == 0) blk[1] = gY; | |
352 | if (blk[2] == 0) blk[2] = gZ; | |
353 | ||
354 | printf("# blocking x: %3d y: %3d z: %3d\n", blk[0], blk[1], blk[2]); | |
355 | ||
356 | ||
357 | kdex->Blk[0] = blk[0]; kdex->Blk[1] = blk[1]; kdex->Blk[2] = blk[2]; | |
358 | ||
359 | ||
360 | printf("# allocating data for %d LB nodes with padding (%lu bytes = %f MiB for both lattices)\n", | |
361 | nCells, 2 * sizeof(PdfT) * nCells * N_D3Q19, | |
362 | 2 * sizeof(PdfT) * nCells * N_D3Q19 / 1024.0 / 1024.0); | |
363 | ||
e3f82424 MW |
364 | // MemAlloc((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19); |
365 | // MemAlloc((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19); | |
366 | MemAllocAligned((void **)&pdfs[0], sizeof(PdfT) * nCells * N_D3Q19, 2 * 1024 * 1024); | |
367 | MemAllocAligned((void **)&pdfs[1], sizeof(PdfT) * nCells * N_D3Q19, 2 * 1024 * 1024); | |
368 | ||
369 | printf("# pdfs[0] = %p\n", pdfs[0]); | |
370 | printf("# pdfs[1] = %p\n", pdfs[1]); | |
10988083 MW |
371 | |
372 | kd->Pdfs[0] = pdfs[0]; | |
373 | kd->Pdfs[1] = pdfs[1]; | |
374 | ||
375 | // Initialize PDFs with some (arbitrary) data for correct NUMA placement. | |
376 | // This depends on the chosen data layout. | |
377 | // The structure of the loop should resemble the same "execution layout" | |
378 | // as in the kernel! | |
10988083 | 379 | |
e3f82424 MW |
380 | if (blocking) { |
381 | int nX = ld->Dims[0]; | |
382 | int nY = ld->Dims[1]; | |
383 | int nZ = ld->Dims[2]; | |
10988083 | 384 | |
e3f82424 | 385 | int nThreads = 1; |
10988083 | 386 | |
e3f82424 MW |
387 | #ifdef _OPENMP |
388 | nThreads = omp_get_max_threads(); | |
389 | #endif | |
10988083 | 390 | |
e3f82424 MW |
391 | #ifdef _OPENMP |
392 | #pragma omp parallel for default(none) \ | |
393 | shared(pdfs, gDims, nX, nY, nZ, oX, oY, oZ, blk, nThreads) | |
394 | #endif | |
395 | for (int i = 0; i < nThreads; ++i) { | |
396 | ||
397 | int threadStartX = nX / nThreads * i; | |
398 | int threadEndX = nX / nThreads * (i + 1); | |
10988083 | 399 | |
e3f82424 MW |
400 | if (nX % nThreads > 0) { |
401 | if (nX % nThreads > i) { | |
402 | threadStartX += i; | |
403 | threadEndX += i + 1; | |
404 | } | |
405 | else { | |
406 | threadStartX += nX % nThreads; | |
407 | threadEndX += nX % nThreads; | |
408 | } | |
409 | } | |
410 | ||
411 | for (int bX = oX + threadStartX; bX < threadEndX + oX; bX += blk[0]) { | |
412 | for (int bY = oY; bY < nY + oY; bY += blk[1]) { | |
413 | for (int bZ = oZ; bZ < nZ + oZ; bZ += blk[2]) { | |
414 | ||
415 | int eZ = MIN(bZ + blk[2], nZ + oZ); | |
416 | int eY = MIN(bY + blk[1], nY + oY); | |
417 | int eX = MIN(bX + blk[0], threadEndX + oX); | |
418 | ||
419 | for (int x = bX; x < eX; ++x) { | |
420 | for (int y = bY; y < eY; ++y) { | |
421 | for (int z = bZ; z < eZ; ++z) { | |
422 | for (int d = 0; d < N_D3Q19; ++d) { | |
423 | pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 1.0; | |
424 | pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 1.0; | |
425 | } | |
426 | } | |
427 | } | |
10988083 MW |
428 | } |
429 | } | |
430 | } | |
431 | } | |
432 | } | |
e3f82424 MW |
433 | |
434 | } | |
435 | else { | |
436 | #ifdef _OPENMP | |
437 | #pragma omp parallel for collapse(3) | |
438 | #endif | |
439 | for (int x = 0; x < gX; ++x) { | |
440 | for (int y = 0; y < gY; ++y) { | |
441 | for (int z = 0; z < gZ; ++z) { | |
442 | for (int d = 0; d < N_D3Q19; ++d) { | |
443 | pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 1.0; | |
444 | pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 1.0; | |
445 | } | |
446 | } | |
447 | } | |
448 | } | |
10988083 MW |
449 | } |
450 | ||
451 | // Initialize all PDFs to some standard value. | |
e3f82424 | 452 | for (int x = 0; x < gX; ++x) { |
10988083 | 453 | for (int y = 0; y < gY; ++y) { |
e3f82424 | 454 | for (int z = 0; z < gZ; ++z) { |
10988083 MW |
455 | for (int d = 0; d < N_D3Q19; ++d) { |
456 | pdfs[0][P_INDEX_5(gDims, x, y, z, d)] = 0.0; | |
457 | pdfs[1][P_INDEX_5(gDims, x, y, z, d)] = 0.0; | |
458 | } | |
459 | } | |
460 | } | |
461 | } | |
462 | ||
463 | ||
464 | // Count how many *PDFs* need bounce back treatment. | |
465 | ||
466 | uint64_t nPdfs = ((uint64_t)19) * gX * gY * gZ; | |
467 | ||
468 | if (nPdfs > ((2LU << 31) - 1)) { | |
469 | printf("ERROR: number of PDFs exceed 2^31.\n"); | |
470 | exit(1); | |
471 | } | |
472 | ||
473 | // Compiler bug? Incorrect computation of nBounceBackPdfs when using icc 15.0.2. | |
474 | // Works when declaring nBounceBackPdfs as int64_t or using volatile. | |
475 | volatile int nBounceBackPdfs = 0; | |
476 | // int64_t nBounceBackPdfs = 0; | |
477 | int nx, ny, nz, px, py, pz; | |
478 | ||
479 | // TODO: apply blocking? | |
480 | ||
e3f82424 | 481 | for (int x = 0; x < lX; ++x) { |
10988083 | 482 | for (int y = 0; y < lY; ++y) { |
e3f82424 | 483 | for (int z = 0; z < lZ; ++z) { |
10988083 MW |
484 | |
485 | if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) { | |
486 | for (int d = 0; d < N_D3Q19; ++d) { | |
487 | #ifdef PROP_MODEL_PUSH | |
488 | nx = x + D3Q19_X[d]; | |
489 | ny = y + D3Q19_Y[d]; | |
490 | nz = z + D3Q19_Z[d]; | |
491 | #elif PROP_MODEL_PULL | |
492 | nx = x - D3Q19_X[d]; | |
493 | ny = y - D3Q19_Y[d]; | |
494 | nz = z - D3Q19_Z[d]; | |
495 | #else | |
496 | #error PROP_MODEL_NAME unknown. | |
497 | #endif | |
498 | // Check if neighbor is inside the lattice. | |
499 | // if(nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) { | |
500 | // continue; | |
501 | // } | |
502 | if ((nx < 0 || nx >= lX) && ld->PeriodicX) { | |
503 | ++nBounceBackPdfs; // Compiler bug --> see above | |
504 | } | |
505 | else if ((ny < 0 || ny >= lY) && ld->PeriodicY) { | |
506 | ++nBounceBackPdfs; // Compiler bug --> see above | |
507 | } | |
508 | else if ((nz < 0 || nz >= lZ) && ld->PeriodicZ) { | |
509 | ++nBounceBackPdfs; // Compiler bug --> see above | |
510 | } | |
511 | else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) { | |
512 | continue; | |
513 | } | |
514 | else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) { | |
515 | ++nBounceBackPdfs; // Compiler bug --> see above | |
516 | } | |
517 | } | |
518 | } | |
519 | } | |
520 | } | |
521 | } | |
522 | ||
523 | ||
524 | printf("# allocating %d indices for bounce back pdfs (%s for source and destination array)\n", nBounceBackPdfs, ByteToHuman(sizeof(int) * nBounceBackPdfs * 2)); | |
525 | ||
526 | MemAlloc((void **) & (kd->BounceBackPdfsSrc), sizeof(int) * nBounceBackPdfs + 100); | |
527 | MemAlloc((void **) & (kd->BounceBackPdfsDst), sizeof(int) * nBounceBackPdfs + 100); | |
528 | ||
529 | kd->nBounceBackPdfs = nBounceBackPdfs; | |
530 | nBounceBackPdfs = 0; | |
531 | ||
532 | int srcIndex; | |
533 | int dstIndex; | |
534 | ||
e3f82424 | 535 | for (int x = 0; x < lX; ++x) { |
10988083 | 536 | for (int y = 0; y < lY; ++y) { |
e3f82424 | 537 | for (int z = 0; z < lZ; ++z) { |
10988083 MW |
538 | |
539 | if (ld->Lattice[L_INDEX_4(ld->Dims, x, y, z)] != LAT_CELL_OBSTACLE) { | |
540 | for (int d = 0; d < N_D3Q19; ++d) { | |
541 | #ifdef PROP_MODEL_PUSH | |
542 | nx = x + D3Q19_X[d]; | |
543 | ny = y + D3Q19_Y[d]; | |
544 | nz = z + D3Q19_Z[d]; | |
545 | #elif PROP_MODEL_PULL | |
546 | nx = x - D3Q19_X[d]; | |
547 | ny = y - D3Q19_Y[d]; | |
548 | nz = z - D3Q19_Z[d]; | |
549 | #else | |
550 | #error PROP_MODEL_NAME unknown. | |
551 | #endif | |
552 | ||
553 | if ( ((nx < 0 || nx >= lX) && ld->PeriodicX) || | |
554 | ((ny < 0 || ny >= lY) && ld->PeriodicY) || | |
555 | ((nz < 0 || nz >= lZ) && ld->PeriodicZ) | |
556 | ){ | |
557 | // Implement periodic boundary in X direction. | |
558 | ||
559 | // If the target node reached through propagation is outside the lattice | |
560 | // the kernel stores it in some buffer around the domain. | |
561 | // From this position the PDF must be transported to the other side of the | |
562 | // geometry. | |
563 | ||
564 | // Take PDF from outside the domain. | |
565 | ||
566 | // x periodic | |
567 | if (nx < 0) { | |
568 | px = lX - 1; | |
569 | } | |
570 | else if (nx >= lX) { | |
571 | px = 0; | |
572 | } else { | |
573 | px = nx; | |
574 | } | |
575 | ||
576 | // y periodic | |
577 | if (ny < 0) { | |
578 | py = lY - 1; | |
579 | } | |
580 | else if (ny >= lY) { | |
581 | py = 0; | |
582 | } else { | |
583 | py = ny; | |
584 | } | |
585 | ||
586 | // z periodic | |
587 | if (nz < 0) { | |
588 | pz = lZ - 1; | |
589 | } | |
590 | else if (nz >= lZ) { | |
591 | pz = 0; | |
592 | } else { | |
593 | pz = nz; | |
594 | } | |
595 | ||
596 | if (ld->Lattice[L_INDEX_4(lDims, px, py, pz)] == LAT_CELL_OBSTACLE) { | |
597 | #ifdef PROP_MODEL_PUSH | |
598 | srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
599 | dstIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]); | |
600 | #elif PROP_MODEL_PULL | |
601 | srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]); | |
602 | dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
603 | #endif | |
604 | } | |
605 | else { | |
606 | ||
607 | #ifdef PROP_MODEL_PUSH | |
608 | srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
609 | // Put it on the other side back into the domain. | |
610 | dstIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d); | |
611 | #elif PROP_MODEL_PULL | |
612 | srcIndex = P_INDEX_5(gDims, px + oX, py + oY, pz + oZ, d); | |
613 | // Put it on the other side back into the ghost layer. | |
614 | dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
615 | #endif | |
616 | ||
617 | VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d); | |
618 | ||
619 | } | |
620 | ||
621 | kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex; | |
622 | kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex; | |
623 | ||
624 | ++nBounceBackPdfs; | |
625 | ||
626 | } | |
627 | else if (nx < 0 || ny < 0 || nz < 0 || nx >= lX || ny >= lY || nz >= lZ) { | |
628 | continue; | |
629 | } | |
630 | else if (ld->Lattice[L_INDEX_4(lDims, nx, ny, nz)] == LAT_CELL_OBSTACLE) { | |
631 | #ifdef PROP_MODEL_PUSH | |
632 | srcIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
633 | dstIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]); | |
634 | #elif PROP_MODEL_PULL | |
635 | srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, D3Q19_INV[d]); | |
636 | dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, d); | |
637 | // srcIndex = P_INDEX_5(gDims, x + oX, y + oY, z + oZ, d); | |
638 | // dstIndex = P_INDEX_5(gDims, nx + oX, ny + oY, nz + oZ, D3Q19_INV[d]); | |
639 | #endif | |
640 | ||
641 | VerifyMsg(nBounceBackPdfs < kd->nBounceBackPdfs, "nBBPdfs %d < kd->nBBPdfs %d xyz: %d %d %d d: %d\n", nBounceBackPdfs, kd->nBounceBackPdfs, x, y, z, d); | |
642 | ||
643 | kd->BounceBackPdfsSrc[nBounceBackPdfs] = srcIndex; | |
644 | kd->BounceBackPdfsDst[nBounceBackPdfs] = dstIndex; | |
645 | ||
646 | ++nBounceBackPdfs; | |
647 | } | |
648 | } | |
649 | } | |
650 | } | |
651 | } | |
652 | } | |
653 | ||
654 | ||
655 | // Fill remaining KernelData structures | |
656 | kd->GetNode = FNAME(GetNode); | |
657 | kd->SetNode = FNAME(SetNode); | |
658 | ||
659 | kd->BoundaryConditionsGetPdf = FNAME(BcGetPdf); | |
660 | kd->BoundaryConditionsSetPdf = FNAME(BcSetPdf); | |
661 | ||
e3f82424 MW |
662 | if (blocking) { |
663 | kd->Kernel = FNAME(D3Q19BlkKernel); | |
664 | } | |
665 | else { | |
666 | kd->Kernel = FNAME(D3Q19Kernel); | |
667 | } | |
10988083 MW |
668 | |
669 | kd->DstPdfs = NULL; | |
670 | kd->PdfsActive = kd->Pdfs[0]; | |
671 | ||
672 | return; | |
673 | } | |
674 | ||
e3f82424 MW |
675 | |
676 | void FNAME(D3Q19BlkInit)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params) | |
677 | { | |
678 | D3Q19InternalInit(1, ld, kernelData, params); | |
679 | } | |
680 | ||
681 | ||
10988083 MW |
682 | void FNAME(D3Q19BlkDeinit)(LatticeDesc * ld, KernelData ** kernelData) |
683 | { | |
684 | MemFree((void **) & ((*kernelData)->Pdfs[0])); | |
685 | MemFree((void **) & ((*kernelData)->Pdfs[1])); | |
686 | ||
687 | MemFree((void **) & ((*kernelData)->BounceBackPdfsSrc)); | |
688 | MemFree((void **) & ((*kernelData)->BounceBackPdfsDst)); | |
689 | ||
690 | MemFree((void **)kernelData); | |
691 | ||
692 | return; | |
693 | } | |
694 | ||
695 | // Kernels without blocking perform the same initialization/deinitialization as with | |
696 | // blocking, except that a different kernel is called. Hence, no arguments are allowed. | |
697 | ||
698 | void FNAME(D3Q19Init)(LatticeDesc * ld, KernelData ** kernelData, Parameters * params) | |
699 | { | |
e3f82424 | 700 | D3Q19InternalInit(0, ld, kernelData, params); |
10988083 MW |
701 | |
702 | } | |
703 | ||
704 | void FNAME(D3Q19Deinit)(LatticeDesc * ld, KernelData ** kernelData) | |
705 | { | |
706 | FNAME(D3Q19BlkDeinit)(ld, kernelData); | |
707 | return; | |
708 | } |