1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
13 // LbmBenchKernels is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
18 // LbmBenchKernels is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
23 // You should have received a copy of the GNU General Public License
24 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
26 // --------------------------------------------------------------------------
27 #include "BenchKernelD3Q19ListAaRiaCommon.h"
40 void FNAME(D3Q19ListAaRiaKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd)
43 Assert(kernelData != NULL);
46 Assert(cd->Omega > 0.0);
47 Assert(cd->Omega < 2.0);
49 KernelData * kd = (KernelData *)kernelData;
50 KernelDataList * kdl = KDL(kernelData);
51 KernelDataListRia * kdlr = KDLR(kernelData);
52 PdfT omega = cd->Omega;
53 PdfT omegaEven = omega;
54 // 1/ 4: best stability;
55 // 1/12: removes third-order advection error (best advection);
56 // 1/ 6: removes fourth-order diffusion error (best diffusion);
57 // 3/16: exact location of bounce back for poiseuille flow
58 PdfT magicParam = F(1.0) / F(12.0);
59 PdfT omegaOdd = F(1.0) / (F(0.5) + magicParam / (F(1.0) / omega - F(0.5)));
61 PdfT evenPart = F(0.0);
62 PdfT oddPart = F(0.0);
63 PdfT dir_indep_trm = F(0.0);
65 const PdfT w_0 = F(1.0) / F(3.0);
66 const PdfT w_1 = F(1.0) / F(18.0);
67 const PdfT w_2 = F(1.0) / F(36.0);
69 const PdfT w_1_x3 = w_1 * F(3.0); const PdfT w_1_nine_half = w_1 * F(9.0) / F(2.0); PdfT w_1_indep = F(0.0);
70 const PdfT w_2_x3 = w_2 * F(3.0); const PdfT w_2_nine_half = w_2 * F(9.0) / F(2.0); PdfT w_2_indep = F(0.0);
77 // Declare pdf_N, pdf_E, pdf_S, pdf_W, ...
78 #define X(name, idx, idxinv, x, y, z) PdfT JOIN(pdf_,name);
82 // Declare pointers to pdfs ppdf_N, ppdf_E, ppdf_S, ppdf_W, ...
83 #define X(name, idx, idxinv, x, y, z) PdfT * JOIN(ppdf_,name) = NULL;
87 uint32_t nConsecNodes = kdlr->nConsecNodes;
88 uint32_t * consecNodes = kdlr->ConsecNodes;
89 uint32_t consecIndex = 0;
90 uint32_t consecValue = 0;
96 PdfT * src = kd->Pdfs[0];
98 int maxIterations = cd->MaxIterations;
99 int nFluid = kdl->nFluid;
100 int nCells = kdl->nCells;
102 uint32_t adjListIndex;
103 uint32_t * adjList = kdl->AdjList;
109 kd->PdfsActive = src;
110 VtkWrite(ld, kd, cd, -1);
115 kd->PdfsActive = src;
116 KernelStatistics(kd, ld, cd, 0);
119 X_KERNEL_START(kernelData);
121 // TODO: outer openmp parallel
122 for(int iter = 0; iter < maxIterations; iter += 2) {
124 X_LIKWID_START("list-aa-ria-even");
126 // --------------------------------------------------------------------
128 // --------------------------------------------------------------------
130 #pragma omp parallel for default(none) \
131 shared(stderr, nFluid, nCells, kd, kdl, adjList, omegaOdd, omegaEven, src) \
132 private(ux, uy, uz, dens, adjListIndex, evenPart, oddPart, dir_indep_trm, w_1_indep, w_2_indep, ui,\
134 pdf_N, pdf_E, pdf_S, pdf_W, \
135 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
136 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
137 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW)
139 #ifdef INTEL_OPT_DIRECTIVES
141 #pragma vector always
144 for (int index = 0; index < nFluid; ++index) { // LOOP list-aa-ria-even
146 #define I(index, dir) P_INDEX_3((nCells), (index), (dir))
148 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[I(index, idx)];
152 // #define LID_DRIVEN_CAVITY
154 #ifdef LID_DRIVEN_CAVITY
155 int nX = kd->Dims[0];
156 int nY = kd->Dims[1];
157 int nZ = kd->Dims[2];
159 int x = kdl->Coords[C_INDEX_X(index)];
160 int y = kdl->Coords[C_INDEX_Y(index)];
161 int z = kdl->Coords[C_INDEX_Z(index)];
163 if (z == nZ - 4 && x > 3 && x < (nX - 4) && y > 3 && y < (nY - 4)) {
164 ux = F(0.1) * F(0.5)77;
169 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
170 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
171 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
172 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
173 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
174 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
175 #ifdef LID_DRIVEN_CAVITY
180 pdf_N + pdf_E + pdf_S + pdf_W +
181 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
182 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
183 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
185 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*F(3.0)/F(2.0);
188 src[I(index, D3Q19_C) ] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
191 w_1_indep = w_1*dir_indep_trm;
194 evenPart = omegaEven*( F(0.5)*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
195 oddPart = omegaOdd*(F(0.5)*(pdf_N - pdf_S) - ui*w_1_x3 );
196 src[I(index, D3Q19_S)] = pdf_N - evenPart - oddPart;
197 src[I(index, D3Q19_N)] = pdf_S - evenPart + oddPart;
200 evenPart = omegaEven*( F(0.5)*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
201 oddPart = omegaOdd*(F(0.5)*(pdf_E - pdf_W) - ui*w_1_x3 );
202 src[I(index, D3Q19_W)] = pdf_E - evenPart - oddPart;
203 src[I(index, D3Q19_E)] = pdf_W - evenPart + oddPart;
206 evenPart = omegaEven*( F(0.5)*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
207 oddPart = omegaOdd*(F(0.5)*(pdf_T - pdf_B) - ui*w_1_x3 );
208 src[I(index, D3Q19_B)] = pdf_T - evenPart - oddPart;
209 src[I(index, D3Q19_T)] = pdf_B - evenPart + oddPart;
212 w_2_indep = w_2*dir_indep_trm;
215 evenPart = omegaEven*( F(0.5)*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
216 oddPart = omegaOdd*(F(0.5)*(pdf_NW - pdf_SE) - ui*w_2_x3 );
217 src[I(index, D3Q19_SE)] = pdf_NW - evenPart - oddPart;
218 src[I(index, D3Q19_NW)] = pdf_SE - evenPart + oddPart;
221 evenPart = omegaEven*( F(0.5)*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
222 oddPart = omegaOdd*(F(0.5)*(pdf_NE - pdf_SW) - ui*w_2_x3 );
223 src[I(index, D3Q19_SW)] = pdf_NE - evenPart - oddPart;
224 src[I(index, D3Q19_NE)] = pdf_SW - evenPart + oddPart;
227 evenPart = omegaEven*( F(0.5)*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
228 oddPart = omegaOdd*(F(0.5)*(pdf_TW - pdf_BE) - ui*w_2_x3 );
229 src[I(index, D3Q19_BE)] = pdf_TW - evenPart - oddPart;
230 src[I(index, D3Q19_TW)] = pdf_BE - evenPart + oddPart;
233 evenPart = omegaEven*( F(0.5)*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
234 oddPart = omegaOdd*(F(0.5)*(pdf_TE - pdf_BW) - ui*w_2_x3 );
235 src[I(index, D3Q19_BW)] = pdf_TE - evenPart - oddPart;
236 src[I(index, D3Q19_TE)] = pdf_BW - evenPart + oddPart;
239 evenPart = omegaEven*( F(0.5)*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
240 oddPart = omegaOdd*(F(0.5)*(pdf_TS - pdf_BN) - ui*w_2_x3 );
241 src[I(index, D3Q19_BN)] = pdf_TS - evenPart - oddPart;
242 src[I(index, D3Q19_TS)] = pdf_BN - evenPart + oddPart;
245 evenPart = omegaEven*( F(0.5)*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
246 oddPart = omegaOdd*(F(0.5)*(pdf_TN - pdf_BS) - ui*w_2_x3 );
247 src[I(index, D3Q19_BS)] = pdf_TN - evenPart - oddPart;
248 src[I(index, D3Q19_TN)] = pdf_BS - evenPart + oddPart;
250 } // (parallel) loop over fluid nodes
252 X_LIKWID_STOP("list-aa-ria-even");
254 // save current iteration
255 kdl->Iteration = iter;
257 kd->PdfsActive = src;
258 KernelAddBodyForce(kd, ld, cd);
261 // --------------------------------------------------------------------
263 // --------------------------------------------------------------------
265 X_LIKWID_START("list-aa-ria-odd");
268 #pragma omp parallel default(none) \
269 shared(stderr, nFluid, nCells, kd, kdl, kdlr, adjList, omegaOdd, omegaEven, src, consecNodes, nConsecNodes) \
270 private(ux, uy, uz, dens, adjListIndex, evenPart, oddPart, dir_indep_trm, w_1_indep, w_2_indep, ui,\
272 pdf_N, pdf_E, pdf_S, pdf_W, \
273 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
274 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
275 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
277 ppdf_N, ppdf_E, ppdf_S, ppdf_W, \
278 ppdf_NE, ppdf_SE, ppdf_SW, ppdf_NW, \
279 ppdf_T, ppdf_TN, ppdf_TE, ppdf_TS, ppdf_TW, \
280 ppdf_B, ppdf_BN, ppdf_BE, ppdf_BS, ppdf_BW, \
281 consecValue, consecIndex)
287 threadId = omp_get_thread_num();
290 consecIndex = kdlr->ConsecThreadIndices[threadId];
293 int * threadIndices = kdlr->FluidNodeThreadIndices;
295 int nFluidThread = threadIndices[threadId + 1] - threadIndices[threadId];
297 int indexStart = threadIndices[threadId];
298 int indexStop = threadIndices[threadId] + nFluidThread;
300 // Because of runlength coding iterations are not independent.
301 for (int index = indexStart; index < indexStop; ++index) { // LOOP list-aa-ria-odd
303 #define I(index, dir) P_INDEX_3((nCells), (index), (dir))
306 if (consecValue > 0) {
308 // Increment all pdf pointers.
309 #define X(name, idx, idxinv, _x, _y, _z) ++JOIN(ppdf_,name);
314 Assert(consecIndex < nConsecNodes);
316 consecValue = consecNodes[consecIndex] - 1;
317 // Load new pointers to PDFs of local cell:
319 adjListIndex = index * N_D3Q19_IDX;
321 #define X(name, idx, idxinv, _x, _y, _z) JOIN(ppdf_,name) = &(src[adjList[adjListIndex + idxinv]]);
325 ppdf_C = &(src[P_INDEX_3(nCells, index, D3Q19_C)]);
329 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = *JOIN(ppdf_,name);
333 adjListIndex = index * N_D3Q19_IDX;
335 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
336 pdf_C = src[P_INDEX_3(nCells, index, D3Q19_C)];
338 #define X(name, idx, idxinv, _x, _y, _z) JOIN(ppdf_,name) = &(src[adjList[adjListIndex + idxinv]]);
342 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[adjList[adjListIndex + idxinv]];
347 #ifdef LID_DRIVEN_CAVITY
348 int nX = kd->Dims[0];
349 int nY = kd->Dims[1];
350 int nZ = kd->Dims[2];
352 int x = kdl->Coords[C_INDEX_X(index)];
353 int y = kdl->Coords[C_INDEX_Y(index)];
354 int z = kdl->Coords[C_INDEX_Z(index)];
356 if (z == nZ - 4 && x > 3 && x < (nX - 4) && y > 3 && y < (nY - 4)) {
357 ux = F(0.1) * F(0.5)77;
362 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
363 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
364 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
365 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
366 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
367 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
368 #ifdef LID_DRIVEN_CAVITY
373 pdf_N + pdf_E + pdf_S + pdf_W +
374 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
375 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
376 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
378 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*F(3.0)/F(2.0);
380 adjListIndex = index * N_D3Q19_IDX;
383 src[I(index, D3Q19_C) ] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
386 w_1_indep = w_1*dir_indep_trm;
389 evenPart = omegaEven*( F(0.5)*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
390 oddPart = omegaOdd*(F(0.5)*(pdf_N - pdf_S) - ui*w_1_x3 );
391 *ppdf_S = pdf_N - evenPart - oddPart;
392 *ppdf_N = pdf_S - evenPart + oddPart;
395 evenPart = omegaEven*( F(0.5)*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
396 oddPart = omegaOdd*(F(0.5)*(pdf_E - pdf_W) - ui*w_1_x3 );
397 *ppdf_W = pdf_E - evenPart - oddPart;
398 *ppdf_E = pdf_W - evenPart + oddPart;
401 evenPart = omegaEven*( F(0.5)*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
402 oddPart = omegaOdd*(F(0.5)*(pdf_T - pdf_B) - ui*w_1_x3 );
403 *ppdf_B = pdf_T - evenPart - oddPart;
404 *ppdf_T = pdf_B - evenPart + oddPart;
407 w_2_indep = w_2*dir_indep_trm;
410 evenPart = omegaEven*( F(0.5)*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
411 oddPart = omegaOdd*(F(0.5)*(pdf_NW - pdf_SE) - ui*w_2_x3 );
412 *ppdf_SE = pdf_NW - evenPart - oddPart;
413 *ppdf_NW = pdf_SE - evenPart + oddPart;
416 evenPart = omegaEven*( F(0.5)*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
417 oddPart = omegaOdd*(F(0.5)*(pdf_NE - pdf_SW) - ui*w_2_x3 );
418 *ppdf_SW = pdf_NE - evenPart - oddPart;
419 *ppdf_NE = pdf_SW - evenPart + oddPart;
422 evenPart = omegaEven*( F(0.5)*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
423 oddPart = omegaOdd*(F(0.5)*(pdf_TW - pdf_BE) - ui*w_2_x3 );
424 *ppdf_BE = pdf_TW - evenPart - oddPart;
425 *ppdf_TW = pdf_BE - evenPart + oddPart;
428 evenPart = omegaEven*( F(0.5)*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
429 oddPart = omegaOdd*(F(0.5)*(pdf_TE - pdf_BW) - ui*w_2_x3 );
430 *ppdf_BW = pdf_TE - evenPart - oddPart;
431 *ppdf_TE = pdf_BW - evenPart + oddPart;
434 evenPart = omegaEven*( F(0.5)*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
435 oddPart = omegaOdd*(F(0.5)*(pdf_TS - pdf_BN) - ui*w_2_x3 );
436 *ppdf_BN = pdf_TS - evenPart - oddPart;
437 *ppdf_TS = pdf_BN - evenPart + oddPart;
440 evenPart = omegaEven*( F(0.5)*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
441 oddPart = omegaOdd*(F(0.5)*(pdf_TN - pdf_BS) - ui*w_2_x3 );
442 *ppdf_BS = pdf_TN - evenPart - oddPart;
443 *ppdf_TN = pdf_BS - evenPart + oddPart;
446 } // loop over fluid nodes
447 } // end pragma omp parallel
449 X_LIKWID_STOP("list-aa-ria-odd");
451 // save current iteration
452 kdl->Iteration = iter + 1;
455 kd->PdfsActive = src;
456 KernelAddBodyForce(kd, ld, cd);
460 if (cd->VtkOutput && (iter % cd->VtkModulus) == 0) {
461 kd->PdfsActive = src;
462 VtkWrite(ld, kd, cd, iter);
467 kd->PdfsActive = src;
468 KernelStatistics(kd, ld, cd, iter);
472 } // for (int iter = 0; ...
474 X_KERNEL_END(kernelData);
478 kd->PdfsActive = src;
479 VtkWrite(ld, kd, cd, maxIterations);
484 kd->PdfsActive = src;
485 KernelStatistics(kd, ld, cd, maxIterations);