merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19Aa.c
CommitLineData
e3f82424
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19AaCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
31#include "LikwidIf.h"
32
33#include <inttypes.h>
34#include <math.h>
35
36#ifdef _OPENMP
37 #include <omp.h>
38#endif
39
40void FNAME(D3Q19AaKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd)
41{
42 Assert(ld != NULL);
43 Assert(kernelData != NULL);
44 Assert(cd != NULL);
45
0fde6e45
MW
46 Assert(cd->Omega > F(0.0));
47 Assert(cd->Omega < F(2.0));
e3f82424
MW
48
49 KernelData * kd = (KernelData *)kernelData;
50
51
52 int nX = ld->Dims[0];
53 int nY = ld->Dims[1];
54 int nZ = ld->Dims[2];
55
56 int * gDims = kd->GlobalDims;
57
58 int oX = kd->Offsets[0];
59 int oY = kd->Offsets[1];
60 int oZ = kd->Offsets[2];
61
62 KernelDataAa * kda = KDA(kd);
63
64 int blk[3];
65 blk[0] = kda->Blk[0];
66 blk[1] = kda->Blk[1];
67 blk[2] = kda->Blk[2];
68
69 PdfT omega = cd->Omega;
70 PdfT omegaEven = omega;
0fde6e45 71 PdfT magicParam = F(1.0) / F(12.0);
e3f82424
MW
72 // 1/4: best stability;
73 // 1/12: removes third-order advection error (best advection);
74 // 1/6: removes fourth-order diffusion error (best diffusion);
75 // 3/16: exact location of bounce back for poiseuille flow
76
0fde6e45 77 PdfT omegaOdd = F(1.0)/( F(0.5) + magicParam/(F(1.0)/omega - F(0.5)) );
e3f82424 78
0fde6e45
MW
79 PdfT evenPart = F(0.0);
80 PdfT oddPart = F(0.0);
81 PdfT dir_indep_trm = F(0.0);
e3f82424 82
0fde6e45
MW
83 PdfT w_0 = F(1.0) / F(3.0);
84 PdfT w_1 = F(1.0) / F(18.0);
85 PdfT w_2 = F(1.0) / F(36.0);
e3f82424 86
0fde6e45
MW
87 PdfT w_1_x3 = w_1 * F(3.0); PdfT w_1_nine_half = w_1 * F(9.0)/F(2.0); PdfT w_1_indep = F(0.0);
88 PdfT w_2_x3 = w_2 * F(3.0); PdfT w_2_nine_half = w_2 * F(9.0)/F(2.0); PdfT w_2_indep = F(0.0);
e3f82424
MW
89
90 PdfT ux, uy, uz, ui;
91 PdfT dens;
92
93 // Declare pdf_N, pdf_E, pdf_S, pdf_W, ...
94 #define X(name, idx, idxinv, x, y, z) PdfT JOIN(pdf_,name);
95 D3Q19_LIST
96 #undef X
97
98 PdfT * src = kd->Pdfs[0];
99
100 int maxIterations = cd->MaxIterations;
101
102 #ifdef VTK_OUTPUT
103 if (cd->VtkOutput) {
104 kd->PdfsActive = src;
105 VtkWrite(ld, kd, cd, -1);
106 }
107 #endif
108
109 #ifdef STATISTICS
110 kd->PdfsActive = src;
111 KernelStatistics(kd, ld, cd, 0);
112 #endif
113
114
115 int nThreads = 1;
116
117 #ifdef _OPENMP
118 nThreads = omp_get_max_threads();
119 #endif
120
8cafd9ea
MW
121 X_KERNEL_START(kernelData);
122
e3f82424
MW
123 for (int iter = 0; iter < maxIterations; iter += 2) {
124
125 // --------------------------------------------------------------------
126 // even time step
127
128 X_LIKWID_START("aa-even");
129
130 // {{{
131
132 #ifdef _OPENMP
133 #pragma omp parallel for default(none) \
134 shared(gDims,src, w_0, w_1, w_2, omegaEven, omegaOdd, \
135 w_1_x3, w_2_x3, w_1_nine_half, w_2_nine_half, cd, \
136 oX, oY, oZ, nX, nY, nZ, blk, nThreads, ld) \
137 private(ux, uy, uz, ui, dens, dir_indep_trm, \
138 pdf_C, \
139 pdf_N, pdf_E, pdf_S, pdf_W, \
140 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
141 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
142 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
143 evenPart, oddPart, w_1_indep, w_2_indep)
144 #endif
145
146 for (int i = 0; i < nThreads; ++i) {
147
148 int threadStartX = nX / nThreads * i;
149 int threadEndX = nX / nThreads * (i + 1);
150
151 if (nX % nThreads > 0) {
152 if (nX % nThreads > i) {
153 threadStartX += i;
154 threadEndX += i + 1;
155 }
156 else {
157 threadStartX += nX % nThreads;
158 threadEndX += nX % nThreads;
159 }
160 }
161
162 for (int bX = oX + threadStartX; bX < threadEndX + oX; bX += blk[0]) {
163 for (int bY = oY; bY < nY + oY; bY += blk[1]) {
164 for (int bZ = oZ; bZ < nZ + oZ; bZ += blk[2]) {
165
166 int eX = MIN(bX + blk[0], threadEndX + oX);
167 int eY = MIN(bY + blk[1], nY + oY);
168 int eZ = MIN(bZ + blk[2], nZ + oZ);
169
170 // printf("%d: %d-%d %d-%d %d-%d %d - %d\n", omp_get_thread_num(), bZ, eZ, bY, eY, bX, eX, threadStartX, threadEndX);
171
172 for (int x = bX; x < eX; ++x) {
173 for (int y = bY; y < eY; ++y) {
0fde6e45
MW
174 #ifdef INTEL_OPT_DIRECTIVES
175 #pragma ivdep
176 #pragma vector always
177 #pragma simd
178 #endif
8cafd9ea 179 for (int z = bZ; z < eZ; ++z) { // LOOP aa-even
e3f82424
MW
180
181
182 if (ld->Lattice[L_INDEX_4(ld->Dims, x - oX, y - oY, z - oZ)] == LAT_CELL_OBSTACLE) {
183 continue;
184 }
185
186 #define I(x, y, z, dir) P_INDEX_5(gDims, (x), (y), (z), (dir))
187
188
189 // Load PDFs of local cell: pdf_N = src[I(x, y, z, D3Q19_N)]; ...
190 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[I(x, y, z, idx)];
191 D3Q19_LIST
192 #undef X
193
194// #define LID_DRIVEN_CAVITY
195
196#ifdef LID_DRIVEN_CAVITY
197
198 if (z == nZ - 4 + oZ && x > 3 + oX && x < (nX - 4 + oX) && y > 3 + oY && y < (nY - 4 + oY)) {
0fde6e45
MW
199 ux = F(0.1) * F(0.5)77;
200 uy = F(0.0);
201 uz = F(0.0);
e3f82424
MW
202
203 } else {
204 #endif
205 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
206 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
207 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
208 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
209 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
210 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
211 #ifdef LID_DRIVEN_CAVITY
212 }
213 #endif
214
215 dens = pdf_C +
216 pdf_N + pdf_E + pdf_S + pdf_W +
217 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
218 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
219 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
220
0fde6e45 221 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*F(3.0)/F(2.0);
e3f82424
MW
222
223 // direction: w_0
224 src[I(x, y, z, D3Q19_C)] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
225
226 // direction: w_1
227 w_1_indep = w_1*dir_indep_trm;
228
229 ui = uy;
0fde6e45
MW
230 evenPart = omegaEven*( F(0.5)*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
231 oddPart = omegaOdd*(F(0.5)*(pdf_N - pdf_S) - ui*w_1_x3 );
e3f82424
MW
232 src[I(x, y, z, D3Q19_S)] = pdf_N - evenPart - oddPart;
233 src[I(x, y, z, D3Q19_N)] = pdf_S - evenPart + oddPart;
234
235 ui = ux;
0fde6e45
MW
236 evenPart = omegaEven*( F(0.5)*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
237 oddPart = omegaOdd*(F(0.5)*(pdf_E - pdf_W) - ui*w_1_x3 );
e3f82424
MW
238 src[I(x, y, z, D3Q19_W)] = pdf_E - evenPart - oddPart;
239 src[I(x, y, z, D3Q19_E)] = pdf_W - evenPart + oddPart;
240
241 ui = uz;
0fde6e45
MW
242 evenPart = omegaEven*( F(0.5)*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
243 oddPart = omegaOdd*(F(0.5)*(pdf_T - pdf_B) - ui*w_1_x3 );
e3f82424
MW
244 src[I(x, y, z, D3Q19_B)] = pdf_T - evenPart - oddPart;
245 src[I(x, y, z, D3Q19_T)] = pdf_B - evenPart + oddPart;
246
247 // direction: w_2
248 w_2_indep = w_2*dir_indep_trm;
249
250 ui = -ux + uy;
0fde6e45
MW
251 evenPart = omegaEven*( F(0.5)*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
252 oddPart = omegaOdd*(F(0.5)*(pdf_NW - pdf_SE) - ui*w_2_x3 );
e3f82424
MW
253 src[I(x, y, z, D3Q19_SE)] = pdf_NW - evenPart - oddPart;
254 src[I(x, y, z, D3Q19_NW)] = pdf_SE - evenPart + oddPart;
255
256 ui = ux + uy;
0fde6e45
MW
257 evenPart = omegaEven*( F(0.5)*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
258 oddPart = omegaOdd*(F(0.5)*(pdf_NE - pdf_SW) - ui*w_2_x3 );
e3f82424
MW
259 src[I(x, y, z, D3Q19_SW)] = pdf_NE - evenPart - oddPart;
260 src[I(x, y, z, D3Q19_NE)] = pdf_SW - evenPart + oddPart;
261
262 ui = -ux + uz;
0fde6e45
MW
263 evenPart = omegaEven*( F(0.5)*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
264 oddPart = omegaOdd*(F(0.5)*(pdf_TW - pdf_BE) - ui*w_2_x3 );
e3f82424
MW
265 src[I(x, y, z, D3Q19_BE)] = pdf_TW - evenPart - oddPart;
266 src[I(x, y, z, D3Q19_TW)] = pdf_BE - evenPart + oddPart;
267
268 ui = ux + uz;
0fde6e45
MW
269 evenPart = omegaEven*( F(0.5)*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
270 oddPart = omegaOdd*(F(0.5)*(pdf_TE - pdf_BW) - ui*w_2_x3 );
e3f82424
MW
271 src[I(x, y, z, D3Q19_BW)] = pdf_TE - evenPart - oddPart;
272 src[I(x, y, z, D3Q19_TE)] = pdf_BW - evenPart + oddPart;
273
274 ui = -uy + uz;
0fde6e45
MW
275 evenPart = omegaEven*( F(0.5)*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
276 oddPart = omegaOdd*(F(0.5)*(pdf_TS - pdf_BN) - ui*w_2_x3 );
e3f82424
MW
277 src[I(x, y, z, D3Q19_BN)] = pdf_TS - evenPart - oddPart;
278 src[I(x, y, z, D3Q19_TS)] = pdf_BN - evenPart + oddPart;
279
280 ui = uy + uz;
0fde6e45
MW
281 evenPart = omegaEven*( F(0.5)*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
282 oddPart = omegaOdd*(F(0.5)*(pdf_TN - pdf_BS) - ui*w_2_x3 );
e3f82424
MW
283 src[I(x, y, z, D3Q19_BS)] = pdf_TN - evenPart - oddPart;
284 src[I(x, y, z, D3Q19_TN)] = pdf_BS - evenPart + oddPart;
285
286 #undef I
287 } } } // z, y, x (from inner to outer)
288 } } } // z, y, x (from inner to outer)
289
290 } // loop over threads
291
292 // }}}
293
294 X_LIKWID_STOP("aa-even");
295
296 #ifdef STATISTICS
297 kd->PdfsActive = src;
298 KernelStatistics(kd, ld, cd, iter);
299 #endif
300
301 // Fixup bounce back PDFs.
302 #ifdef _OPENMP
303 #pragma omp parallel for default(none) \
304 shared(kd, src)
305 #endif
0fde6e45
MW
306 #ifdef INTEL_OPT_DIRECTIVES
307 #pragma ivdep
308 #endif
e3f82424
MW
309 for (int i = 0; i < kd->nBounceBackPdfs; ++i) {
310 src[kd->BounceBackPdfsSrc[i]] = src[kd->BounceBackPdfsDst[i]];
311 }
312
313 // save current iteration
314 kda->Iteration = iter;
315
316 #ifdef VERIFICATION
317 kd->PdfsActive = src;
318 KernelAddBodyForce(kd, ld, cd);
319 #endif
320
321 #ifdef VTK_OUTPUT
322 if (cd->VtkOutput && (iter % cd->VtkModulus) == 0) {
323 kd->PdfsActive = src;
324 VtkWrite(ld, kd, cd, iter);
325 }
326 #endif
327
328 #ifdef STATISTICS
329 kd->PdfsActive = src;
330 KernelStatistics(kd, ld, cd, iter);
331 #endif
332
333 // --------------------------------------------------------------------
334 // odd time step
335
336
337 X_LIKWID_START("aa-odd");
338
339 // {{{
340
341 #ifdef _OPENMP
342 #pragma omp parallel for default(none) \
343 shared(gDims,src, w_0, w_1, w_2, omegaEven, omegaOdd, \
344 w_1_x3, w_2_x3, w_1_nine_half, w_2_nine_half, cd, \
345 oX, oY, oZ, nX, nY, nZ, blk, nThreads) \
346 private(ux, uy, uz, ui, dens, dir_indep_trm, \
347 pdf_C, \
348 pdf_N, pdf_E, pdf_S, pdf_W, \
349 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
350 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
351 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
352 evenPart, oddPart, w_1_indep, w_2_indep)
353 #endif
354
355 for (int i = 0; i < nThreads; ++i) {
356
357 int threadStartX = nX / nThreads * i;
358 int threadEndX = nX / nThreads * (i + 1);
359
360 if (nX % nThreads > 0) {
361 if (nX % nThreads > i) {
362 threadStartX += i;
363 threadEndX += i + 1;
364 }
365 else {
366 threadStartX += nX % nThreads;
367 threadEndX += nX % nThreads;
368 }
369 }
370
371 for (int bX = oX + threadStartX; bX < threadEndX + oX; bX += blk[0]) {
372 for (int bY = oY; bY < nY + oY; bY += blk[1]) {
373 for (int bZ = oZ; bZ < nZ + oZ; bZ += blk[2]) {
374
375 // Must do everything here, else it would break collapse.
376 int eZ = MIN(bZ + blk[2], nZ + oZ);
377 int eY = MIN(bY + blk[1], nY + oY);
378 int eX = MIN(bX + blk[0], threadEndX + oX);
379
380 for (int x = bX; x < eX; ++x) {
381 for (int y = bY; y < eY; ++y) {
0fde6e45
MW
382 #ifdef INTEL_OPT_DIRECTIVES
383 #pragma ivdep
384 #pragma vector always
385 #pragma simd
386 #endif
8cafd9ea 387 for (int z = bZ; z < eZ; ++z) { // LOOP aa-odd
e3f82424
MW
388
389 #define I(x, y, z, dir) P_INDEX_5(gDims, (x), (y), (z), (dir))
390
391 // Load PDFs of local cell: pdf_N = src[I(x, y, z, D3Q19_N)]; ...
392 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[I(x - _x, y - _y, z - _z, idxinv)];
393 D3Q19_LIST
394 #undef X
395
396
397// #define LID_DRIVEN_CAVITY
398
399#ifdef LID_DRIVEN_CAVITY
400
401 if (z == nZ - 4 + oZ && x > 3 + oX && x < (nX - 4 + oX) && y > 3 + oY && y < (nY - 4 + oY)) {
0fde6e45
MW
402 ux = F(0.1) * F(0.5)77;
403 uy = F(0.0);
404 uz = F(0.0);
e3f82424
MW
405
406 } else {
407#endif
408 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
409 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
410 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
411 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
412 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
413 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
414#ifdef LID_DRIVEN_CAVITY
415 }
416#endif
417
418 dens = pdf_C +
419 pdf_N + pdf_E + pdf_S + pdf_W +
420 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
421 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
422 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
423
0fde6e45 424 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*F(3.0)/F(2.0);
e3f82424
MW
425
426 // direction: w_0
427 src[I(x, y, z, D3Q19_C)] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
428
429 // direction: w_1
430 w_1_indep = w_1*dir_indep_trm;
431
432 ui = uy;
0fde6e45
MW
433 evenPart = omegaEven*( F(0.5)*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
434 oddPart = omegaOdd*(F(0.5)*(pdf_N - pdf_S) - ui*w_1_x3 );
e3f82424
MW
435 src[I(x, y + 1, z, D3Q19_N)] = pdf_N - evenPart - oddPart;
436 src[I(x, y - 1, z, D3Q19_S)] = pdf_S - evenPart + oddPart;
437
438 ui = ux;
0fde6e45
MW
439 evenPart = omegaEven*( F(0.5)*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
440 oddPart = omegaOdd*(F(0.5)*(pdf_E - pdf_W) - ui*w_1_x3 );
e3f82424
MW
441 src[I(x + 1, y, z, D3Q19_E)] = pdf_E - evenPart - oddPart;
442 src[I(x - 1, y, z, D3Q19_W)] = pdf_W - evenPart + oddPart;
443
444 ui = uz;
0fde6e45
MW
445 evenPart = omegaEven*( F(0.5)*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
446 oddPart = omegaOdd*(F(0.5)*(pdf_T - pdf_B) - ui*w_1_x3 );
e3f82424
MW
447 src[I(x, y, z + 1, D3Q19_T)] = pdf_T - evenPart - oddPart;
448 src[I(x, y, z - 1, D3Q19_B)] = pdf_B - evenPart + oddPart;
449
450 // direction: w_2
451 w_2_indep = w_2*dir_indep_trm;
452
453 ui = -ux + uy;
0fde6e45
MW
454 evenPart = omegaEven*( F(0.5)*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
455 oddPart = omegaOdd*(F(0.5)*(pdf_NW - pdf_SE) - ui*w_2_x3 );
e3f82424
MW
456 src[I(x - 1, y + 1, z, D3Q19_NW)] = pdf_NW - evenPart - oddPart;
457 src[I(x + 1, y - 1, z, D3Q19_SE)] = pdf_SE - evenPart + oddPart;
458
459 ui = ux + uy;
0fde6e45
MW
460 evenPart = omegaEven*( F(0.5)*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
461 oddPart = omegaOdd*(F(0.5)*(pdf_NE - pdf_SW) - ui*w_2_x3 );
e3f82424
MW
462 src[I(x + 1, y + 1, z, D3Q19_NE)] = pdf_NE - evenPart - oddPart;
463 src[I(x - 1, y - 1, z, D3Q19_SW)] = pdf_SW - evenPart + oddPart;
464
465 ui = -ux + uz;
0fde6e45
MW
466 evenPart = omegaEven*( F(0.5)*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
467 oddPart = omegaOdd*(F(0.5)*(pdf_TW - pdf_BE) - ui*w_2_x3 );
e3f82424
MW
468 src[I(x - 1, y, z + 1, D3Q19_TW)] = pdf_TW - evenPart - oddPart;
469 src[I(x + 1, y, z - 1, D3Q19_BE)] = pdf_BE - evenPart + oddPart;
470
471 ui = ux + uz;
0fde6e45
MW
472 evenPart = omegaEven*( F(0.5)*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
473 oddPart = omegaOdd*(F(0.5)*(pdf_TE - pdf_BW) - ui*w_2_x3 );
e3f82424
MW
474 src[I(x + 1, y, z + 1, D3Q19_TE)] = pdf_TE - evenPart - oddPart;
475 src[I(x - 1, y, z - 1, D3Q19_BW)] = pdf_BW - evenPart + oddPart;
476
477 ui = -uy + uz;
0fde6e45
MW
478 evenPart = omegaEven*( F(0.5)*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
479 oddPart = omegaOdd*(F(0.5)*(pdf_TS - pdf_BN) - ui*w_2_x3 );
e3f82424
MW
480 src[I(x, y - 1, z + 1, D3Q19_TS)] = pdf_TS - evenPart - oddPart;
481 src[I(x, y + 1, z - 1, D3Q19_BN)] = pdf_BN - evenPart + oddPart;
482
483 ui = uy + uz;
0fde6e45
MW
484 evenPart = omegaEven*( F(0.5)*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
485 oddPart = omegaOdd*(F(0.5)*(pdf_TN - pdf_BS) - ui*w_2_x3 );
e3f82424
MW
486 src[I(x, y + 1, z + 1, D3Q19_TN)] = pdf_TN - evenPart - oddPart;
487 src[I(x, y - 1, z - 1, D3Q19_BS)] = pdf_BS - evenPart + oddPart;
488
489
490 #undef I
491 } } } // z, y, x (from inner to outer)
492 } } } // z, y, x (from inner to outer)
493 } // loop over threads
494
495 // }}}
496
497 // Stop counters before bounce back. Else computing loop balance will be incorrect.
498
499 X_LIKWID_STOP("aa-odd");
500
501 // Fixup bounce back PDFs.
502 #ifdef _OPENMP
503 #pragma omp parallel for default(none) \
504 shared(kd, src)
505 #endif
0fde6e45
MW
506 #ifdef INTEL_OPT_DIRECTIVES
507 #pragma ivdep
508 #endif
e3f82424
MW
509 for (int i = 0; i < kd->nBounceBackPdfs; ++i) {
510 src[kd->BounceBackPdfsDst[i]] = src[kd->BounceBackPdfsSrc[i]];
511 }
512
513 // save current iteration
514 kda->Iteration = iter + 1;
515
516 #ifdef VERIFICATION
517 kd->PdfsActive = src;
518 KernelAddBodyForce(kd, ld, cd);
519 #endif
520
521 #ifdef VTK_OUTPUT
522 if (cd->VtkOutput && (iter + 1 % cd->VtkModulus) == 0) {
523 kd->PdfsActive = src;
524 VtkWrite(ld, kd, cd, iter + 1);
525 }
526 #endif
527
528 #ifdef STATISTICS
529 kd->PdfsActive = src;
530 KernelStatistics(kd, ld, cd, iter + 1);
531 #endif // }}}
532
533
534 } // for (int iter = 0; ...
535
8cafd9ea
MW
536 X_KERNEL_END(kernelData);
537
e3f82424
MW
538 #ifdef VTK_OUTPUT
539
540 if (cd->VtkOutput) {
541 kd->PdfsActive = src;
542 VtkWrite(ld, kd, cd, maxIterations);
543 }
544
545 #endif
546
547 return;
548}
549
This page took 0.089427 seconds and 5 git commands to generate.