version 0.1
[LbmBenchmarkKernelsPublic.git] / src / BenchKernelD3Q19ListAaRia.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include "BenchKernelD3Q19ListAaRiaCommon.h"
28
29#include "Memory.h"
30#include "Vtk.h"
31#include "LikwidIf.h"
32
33#include <inttypes.h>
34#include <math.h>
35
36#ifdef _OPENMP
37 #include <omp.h>
38#endif
39
40void FNAME(D3Q19ListAaRiaKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd)
41{
42 Assert(ld != NULL);
43 Assert(kernelData != NULL);
44 Assert(cd != NULL);
45
46 Assert(cd->Omega > 0.0);
47 Assert(cd->Omega < 2.0);
48
49 KernelData * kd = (KernelData *)kernelData;
50 KernelDataList * kdl = KDL(kernelData);
51 KernelDataListRia * kdlr = KDLR(kernelData);
52 PdfT omega = cd->Omega;
53 PdfT omegaEven = omega;
54 // 1/ 4: best stability;
55 // 1/12: removes third-order advection error (best advection);
56 // 1/ 6: removes fourth-order diffusion error (best diffusion);
57 // 3/16: exact location of bounce back for poiseuille flow
58 PdfT magicParam = 1.0 / 12.0;
59 PdfT omegaOdd = 1.0 / (0.5 + magicParam / (1.0 / omega - 0.5));
60
61 PdfT evenPart = 0.0;
62 PdfT oddPart = 0.0;
63 PdfT dir_indep_trm = 0.0;
64
65 const PdfT w_0 = 1.0 / 3.0;
66 const PdfT w_1 = 1.0 / 18.0;
67 const PdfT w_2 = 1.0 / 36.0;
68
69 const PdfT w_1_x3 = w_1 * 3.0; const PdfT w_1_nine_half = w_1 * 9.0 / 2.0; PdfT w_1_indep = 0.0;
70 const PdfT w_2_x3 = w_2 * 3.0; const PdfT w_2_nine_half = w_2 * 9.0 / 2.0; PdfT w_2_indep = 0.0;
71
72 PdfT ui;
73
74 PdfT ux, uy, uz;
75 PdfT dens;
76
77 // Declare pdf_N, pdf_E, pdf_S, pdf_W, ...
78 #define X(name, idx, idxinv, x, y, z) PdfT JOIN(pdf_,name);
79 D3Q19_LIST
80 #undef X
81
82 // Declare pointers to pdfs ppdf_N, ppdf_E, ppdf_S, ppdf_W, ...
83 #define X(name, idx, idxinv, x, y, z) PdfT * JOIN(ppdf_,name) = NULL;
84 D3Q19_LIST
85 #undef X
86
87 uint32_t nConsecNodes = kdlr->nConsecNodes;
88 uint32_t * consecNodes = kdlr->ConsecNodes;
89 uint32_t consecIndex = 0;
90 uint32_t consecValue = 0;
91
92#ifndef DEBUG
93 UNUSED(nConsecNodes);
94#endif
95
96 PdfT * src = kd->Pdfs[0];
97
98 int maxIterations = cd->MaxIterations;
99 int nFluid = kdl->nFluid;
100 int nCells = kdl->nCells;
101
102 uint32_t adjListIndex;
103 uint32_t * adjList = kdl->AdjList;
104
105
106
107 #ifdef VTK_OUTPUT
108 if (cd->VtkOutput) {
109 kd->PdfsActive = src;
110 VtkWrite(ld, kd, cd, -1);
111 }
112 #endif
113
114 #ifdef STATISTICS
115 kd->PdfsActive = src;
116 KernelStatistics(kd, ld, cd, 0);
117 #endif
118
119 // TODO: outer openmp parallel
120 for(int iter = 0; iter < maxIterations; iter += 2) {
121
122 X_LIKWID_START("list-aa-ria-even");
123
124 // --------------------------------------------------------------------
125 // even time step
126 // --------------------------------------------------------------------
127 #ifdef _OPENMP
128 #pragma omp parallel for default(none) \
129 shared(stderr, nFluid, nCells, kd, kdl, adjList, omegaOdd, omegaEven, src) \
130 private(ux, uy, uz, dens, adjListIndex, evenPart, oddPart, dir_indep_trm, w_1_indep, w_2_indep, ui,\
131 pdf_C, \
132 pdf_N, pdf_E, pdf_S, pdf_W, \
133 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
134 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
135 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW)
136 #endif
137 for (int index = 0; index < nFluid; ++index) {
138
139 #define I(index, dir) P_INDEX_3((nCells), (index), (dir))
140
141 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[I(index, idx)];
142 D3Q19_LIST
143 #undef X
144
145// #define LID_DRIVEN_CAVITY
146
147#ifdef LID_DRIVEN_CAVITY
148 int nX = kd->Dims[0];
149 int nY = kd->Dims[1];
150 int nZ = kd->Dims[2];
151
152 int x = kdl->Coords[C_INDEX_X(index)];
153 int y = kdl->Coords[C_INDEX_Y(index)];
154 int z = kdl->Coords[C_INDEX_Z(index)];
155
156 if (z == nZ - 4 && x > 3 && x < (nX - 4) && y > 3 && y < (nY - 4)) {
157 ux = 0.1 * 0.577;
158 uy = 0.0;
159 uz = 0.0;
160 } else {
161#endif
162 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
163 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
164 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
165 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
166 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
167 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
168#ifdef LID_DRIVEN_CAVITY
169 }
170#endif
171
172 dens = pdf_C +
173 pdf_N + pdf_E + pdf_S + pdf_W +
174 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
175 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
176 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
177
178 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*3.0/2.0;
179
180 // direction: w_0
181 src[I(index, D3Q19_C) ] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
182
183 // direction: w_1
184 w_1_indep = w_1*dir_indep_trm;
185
186 ui = uy;
187 evenPart = omegaEven*( 0.5*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
188 oddPart = omegaOdd*(0.5*(pdf_N - pdf_S) - ui*w_1_x3 );
189 src[I(index, D3Q19_S)] = pdf_N - evenPart - oddPart;
190 src[I(index, D3Q19_N)] = pdf_S - evenPart + oddPart;
191
192 ui = ux;
193 evenPart = omegaEven*( 0.5*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
194 oddPart = omegaOdd*(0.5*(pdf_E - pdf_W) - ui*w_1_x3 );
195 src[I(index, D3Q19_W)] = pdf_E - evenPart - oddPart;
196 src[I(index, D3Q19_E)] = pdf_W - evenPart + oddPart;
197
198 ui = uz;
199 evenPart = omegaEven*( 0.5*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
200 oddPart = omegaOdd*(0.5*(pdf_T - pdf_B) - ui*w_1_x3 );
201 src[I(index, D3Q19_B)] = pdf_T - evenPart - oddPart;
202 src[I(index, D3Q19_T)] = pdf_B - evenPart + oddPart;
203
204 // direction: w_2
205 w_2_indep = w_2*dir_indep_trm;
206
207 ui = -ux + uy;
208 evenPart = omegaEven*( 0.5*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
209 oddPart = omegaOdd*(0.5*(pdf_NW - pdf_SE) - ui*w_2_x3 );
210 src[I(index, D3Q19_SE)] = pdf_NW - evenPart - oddPart;
211 src[I(index, D3Q19_NW)] = pdf_SE - evenPart + oddPart;
212
213 ui = ux + uy;
214 evenPart = omegaEven*( 0.5*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
215 oddPart = omegaOdd*(0.5*(pdf_NE - pdf_SW) - ui*w_2_x3 );
216 src[I(index, D3Q19_SW)] = pdf_NE - evenPart - oddPart;
217 src[I(index, D3Q19_NE)] = pdf_SW - evenPart + oddPart;
218
219 ui = -ux + uz;
220 evenPart = omegaEven*( 0.5*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
221 oddPart = omegaOdd*(0.5*(pdf_TW - pdf_BE) - ui*w_2_x3 );
222 src[I(index, D3Q19_BE)] = pdf_TW - evenPart - oddPart;
223 src[I(index, D3Q19_TW)] = pdf_BE - evenPart + oddPart;
224
225 ui = ux + uz;
226 evenPart = omegaEven*( 0.5*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
227 oddPart = omegaOdd*(0.5*(pdf_TE - pdf_BW) - ui*w_2_x3 );
228 src[I(index, D3Q19_BW)] = pdf_TE - evenPart - oddPart;
229 src[I(index, D3Q19_TE)] = pdf_BW - evenPart + oddPart;
230
231 ui = -uy + uz;
232 evenPart = omegaEven*( 0.5*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
233 oddPart = omegaOdd*(0.5*(pdf_TS - pdf_BN) - ui*w_2_x3 );
234 src[I(index, D3Q19_BN)] = pdf_TS - evenPart - oddPart;
235 src[I(index, D3Q19_TS)] = pdf_BN - evenPart + oddPart;
236
237 ui = uy + uz;
238 evenPart = omegaEven*( 0.5*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
239 oddPart = omegaOdd*(0.5*(pdf_TN - pdf_BS) - ui*w_2_x3 );
240 src[I(index, D3Q19_BS)] = pdf_TN - evenPart - oddPart;
241 src[I(index, D3Q19_TN)] = pdf_BS - evenPart + oddPart;
242
243 } // (parallel) loop over fluid nodes
244
245 X_LIKWID_STOP("list-aa-ria-even");
246
247 // save current iteration
248 kdl->Iteration = iter;
249 #ifdef VERIFICATION
250 kd->PdfsActive = src;
251 KernelAddBodyForce(kd, ld, cd);
252 #endif
253
254 // --------------------------------------------------------------------
255 // odd time step
256 // --------------------------------------------------------------------
257
258 X_LIKWID_START("list-aa-ria-odd");
259
260 #ifdef _OPENMP
261 #pragma omp parallel default(none) \
262 shared(stderr, nFluid, nCells, kd, kdl, kdlr, adjList, omegaOdd, omegaEven, src, consecNodes, nConsecNodes) \
263 private(ux, uy, uz, dens, adjListIndex, evenPart, oddPart, dir_indep_trm, w_1_indep, w_2_indep, ui,\
264 pdf_C, \
265 pdf_N, pdf_E, pdf_S, pdf_W, \
266 pdf_NE, pdf_SE, pdf_SW, pdf_NW, \
267 pdf_T, pdf_TN, pdf_TE, pdf_TS, pdf_TW, \
268 pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
269 ppdf_C, \
270 ppdf_N, ppdf_E, ppdf_S, ppdf_W, \
271 ppdf_NE, ppdf_SE, ppdf_SW, ppdf_NW, \
272 ppdf_T, ppdf_TN, ppdf_TE, ppdf_TS, ppdf_TW, \
273 ppdf_B, ppdf_BN, ppdf_BE, ppdf_BS, ppdf_BW, \
274 consecValue, consecIndex)
275 #endif
276 {
277 int threadId = 0;
278
279 #ifdef _OPENMP
280 threadId = omp_get_thread_num();
281 #endif
282
283 consecIndex = kdlr->ConsecThreadIndices[threadId];
284 consecValue = 0;
285
286 int * threadIndices = kdlr->FluidNodeThreadIndices;
287
288 int nFluidThread = threadIndices[threadId + 1] - threadIndices[threadId];
289
290 int indexStart = threadIndices[threadId];
291 int indexStop = threadIndices[threadId] + nFluidThread;
292
293 for (int index = indexStart; index < indexStop; ++index) {
294
295 #define I(index, dir) P_INDEX_3((nCells), (index), (dir))
296
297#if 1
298 if (consecValue > 0) {
299 --consecValue;
300 // Increment all pdf pointers.
301 #define X(name, idx, idxinv, _x, _y, _z) ++JOIN(ppdf_,name);
302 D3Q19_LIST
303 #undef X
304 }
305 else {
306 Assert(consecIndex < nConsecNodes);
307
308 consecValue = consecNodes[consecIndex] - 1;
309 // Load new pointers to PDFs of local cell:
310
311 adjListIndex = index * N_D3Q19_IDX;
312
313 #define X(name, idx, idxinv, _x, _y, _z) JOIN(ppdf_,name) = &(src[adjList[adjListIndex + idxinv]]);
314 D3Q19_LIST_WO_C
315 #undef X
316
317 ppdf_C = &(src[P_INDEX_3(nCells, index, D3Q19_C)]);
318 ++consecIndex;
319 }
320
321 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = *JOIN(ppdf_,name);
322 D3Q19_LIST
323 #undef X
324#else
325 adjListIndex = index * N_D3Q19_IDX;
326
327 // Load PDFs of local cell: pdf_N = src[adjList[adjListIndex + D3Q19_S]]; ...
328 pdf_C = src[P_INDEX_3(nCells, index, D3Q19_C)];
329
330 #define X(name, idx, idxinv, _x, _y, _z) JOIN(ppdf_,name) = &(src[adjList[adjListIndex + idxinv]]);
331 D3Q19_LIST_WO_C
332 #undef X
333
334 #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[adjList[adjListIndex + idxinv]];
335 D3Q19_LIST_WO_C
336 #undef X
337#endif
338
339#ifdef LID_DRIVEN_CAVITY
340 int nX = kd->Dims[0];
341 int nY = kd->Dims[1];
342 int nZ = kd->Dims[2];
343
344 int x = kdl->Coords[C_INDEX_X(index)];
345 int y = kdl->Coords[C_INDEX_Y(index)];
346 int z = kdl->Coords[C_INDEX_Z(index)];
347
348 if (z == nZ - 4 && x > 3 && x < (nX - 4) && y > 3 && y < (nY - 4)) {
349 ux = 0.1 * 0.577;
350 uy = 0.0;
351 uz = 0.0;
352 } else {
353#endif
354 ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE -
355 pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW;
356 uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN -
357 pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS;
358 uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS -
359 pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS;
360#ifdef LID_DRIVEN_CAVITY
361 }
362#endif
363
364 dens = pdf_C +
365 pdf_N + pdf_E + pdf_S + pdf_W +
366 pdf_NE + pdf_SE + pdf_SW + pdf_NW +
367 pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW +
368 pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW;
369
370 dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz)*3.0/2.0;
371
372 adjListIndex = index * N_D3Q19_IDX;
373
374 // direction: w_0
375 src[I(index, D3Q19_C) ] = pdf_C - omegaEven*(pdf_C - w_0*dir_indep_trm);
376
377 // direction: w_1
378 w_1_indep = w_1*dir_indep_trm;
379
380 ui = uy;
381 evenPart = omegaEven*( 0.5*(pdf_N + pdf_S) - ui*ui*w_1_nine_half - w_1_indep );
382 oddPart = omegaOdd*(0.5*(pdf_N - pdf_S) - ui*w_1_x3 );
383 *ppdf_S = pdf_N - evenPart - oddPart;
384 *ppdf_N = pdf_S - evenPart + oddPart;
385
386 ui = ux;
387 evenPart = omegaEven*( 0.5*(pdf_E + pdf_W) - ui*ui*w_1_nine_half - w_1_indep );
388 oddPart = omegaOdd*(0.5*(pdf_E - pdf_W) - ui*w_1_x3 );
389 *ppdf_W = pdf_E - evenPart - oddPart;
390 *ppdf_E = pdf_W - evenPart + oddPart;
391
392 ui = uz;
393 evenPart = omegaEven*( 0.5*(pdf_T + pdf_B) - ui*ui*w_1_nine_half - w_1_indep );
394 oddPart = omegaOdd*(0.5*(pdf_T - pdf_B) - ui*w_1_x3 );
395 *ppdf_B = pdf_T - evenPart - oddPart;
396 *ppdf_T = pdf_B - evenPart + oddPart;
397
398 // direction: w_2
399 w_2_indep = w_2*dir_indep_trm;
400
401 ui = -ux + uy;
402 evenPart = omegaEven*( 0.5*(pdf_NW + pdf_SE) - ui*ui*w_2_nine_half - w_2_indep );
403 oddPart = omegaOdd*(0.5*(pdf_NW - pdf_SE) - ui*w_2_x3 );
404 *ppdf_SE = pdf_NW - evenPart - oddPart;
405 *ppdf_NW = pdf_SE - evenPart + oddPart;
406
407 ui = ux + uy;
408 evenPart = omegaEven*( 0.5*(pdf_NE + pdf_SW) - ui*ui*w_2_nine_half - w_2_indep );
409 oddPart = omegaOdd*(0.5*(pdf_NE - pdf_SW) - ui*w_2_x3 );
410 *ppdf_SW = pdf_NE - evenPart - oddPart;
411 *ppdf_NE = pdf_SW - evenPart + oddPart;
412
413 ui = -ux + uz;
414 evenPart = omegaEven*( 0.5*(pdf_TW + pdf_BE) - ui*ui*w_2_nine_half - w_2_indep );
415 oddPart = omegaOdd*(0.5*(pdf_TW - pdf_BE) - ui*w_2_x3 );
416 *ppdf_BE = pdf_TW - evenPart - oddPart;
417 *ppdf_TW = pdf_BE - evenPart + oddPart;
418
419 ui = ux + uz;
420 evenPart = omegaEven*( 0.5*(pdf_TE + pdf_BW) - ui*ui*w_2_nine_half - w_2_indep );
421 oddPart = omegaOdd*(0.5*(pdf_TE - pdf_BW) - ui*w_2_x3 );
422 *ppdf_BW = pdf_TE - evenPart - oddPart;
423 *ppdf_TE = pdf_BW - evenPart + oddPart;
424
425 ui = -uy + uz;
426 evenPart = omegaEven*( 0.5*(pdf_TS + pdf_BN) - ui*ui*w_2_nine_half - w_2_indep );
427 oddPart = omegaOdd*(0.5*(pdf_TS - pdf_BN) - ui*w_2_x3 );
428 *ppdf_BN = pdf_TS - evenPart - oddPart;
429 *ppdf_TS = pdf_BN - evenPart + oddPart;
430
431 ui = uy + uz;
432 evenPart = omegaEven*( 0.5*(pdf_TN + pdf_BS) - ui*ui*w_2_nine_half - w_2_indep );
433 oddPart = omegaOdd*(0.5*(pdf_TN - pdf_BS) - ui*w_2_x3 );
434 *ppdf_BS = pdf_TN - evenPart - oddPart;
435 *ppdf_TN = pdf_BS - evenPart + oddPart;
436
437 #undef I
438 } // loop over fluid nodes
439 } // end pragma omp parallel
440
441 X_LIKWID_STOP("list-aa-ria-odd");
442
443 // save current iteration
444 kdl->Iteration = iter + 1;
445
446 #ifdef VERIFICATION
447 kd->PdfsActive = src;
448 KernelAddBodyForce(kd, ld, cd);
449 #endif
450
451 #ifdef VTK_OUTPUT
452 if (cd->VtkOutput && (iter % cd->VtkModulus) == 0) {
453 kd->PdfsActive = src;
454 VtkWrite(ld, kd, cd, iter);
455 }
456 #endif
457
458 #ifdef STATISTICS
459 kd->PdfsActive = src;
460 KernelStatistics(kd, ld, cd, iter);
461 #endif
462
463
464 } // for (int iter = 0; ...
465
466#ifdef VTK_OUTPUT
467 if (cd->VtkOutput) {
468 kd->PdfsActive = src;
469 VtkWrite(ld, kd, cd, maxIterations);
470 }
471#endif
472
473#ifdef STATISTICS
474 kd->PdfsActive = src;
475 KernelStatistics(kd, ld, cd, maxIterations);
476#endif
477
478 return;
479}
This page took 0.100195 seconds and 5 git commands to generate.