Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
12 | // | |
13 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
14 | // it under the terms of the GNU General Public License as published by | |
15 | // the Free Software Foundation, either version 3 of the License, or | |
16 | // (at your option) any later version. | |
17 | // | |
18 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | // GNU General Public License for more details. | |
22 | // | |
23 | // You should have received a copy of the GNU General Public License | |
24 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
25 | // | |
26 | // -------------------------------------------------------------------------- | |
27 | ||
28 | #ifndef INDEX_START | |
29 | #error INDEX_START must be defined | |
30 | #endif | |
31 | ||
32 | #ifndef INDEX_STOP | |
33 | #error INDEX_STOP must be defined | |
34 | #endif | |
35 | ||
36 | #define I(index, dir) P_INDEX_3((nCells), (index), (dir)) | |
37 | ||
38 | for (int blockedIndex = (INDEX_START); blockedIndex < (INDEX_STOP); blockedIndex += nTmpArray) { | |
39 | ||
40 | indexMax = MinI(nTmpArray, (INDEX_STOP) - blockedIndex); | |
41 | #ifdef DEBUG | |
42 | memset(tmpArray, -1, sizeof(PdfT) * nTmpArray * N_TMP); | |
43 | #endif | |
44 | for (int index = 0; index < indexMax; ++index) { | |
45 | ||
46 | ||
47 | adjListIndex = (index + blockedIndex) * N_D3Q19_IDX; | |
48 | ||
49 | pdf_C = src[I(index + blockedIndex, D3Q19_C)]; | |
50 | ||
51 | #define X(name, idx, idxinv, _x, _y, _z) JOIN(pdf_,name) = src[adjList[adjListIndex + idx]]; tmpArray[TMP_INDEX(index, idx)] = JOIN(pdf_,name); | |
52 | D3Q19_LIST_WO_C | |
53 | #undef X | |
54 | ||
55 | ux = pdf_E + pdf_NE + pdf_SE + pdf_TE + pdf_BE - | |
56 | pdf_W - pdf_NW - pdf_SW - pdf_TW - pdf_BW; | |
57 | uy = pdf_N + pdf_NE + pdf_NW + pdf_TN + pdf_BN - | |
58 | pdf_S - pdf_SE - pdf_SW - pdf_TS - pdf_BS; | |
59 | uz = pdf_T + pdf_TE + pdf_TW + pdf_TN + pdf_TS - | |
60 | pdf_B - pdf_BE - pdf_BW - pdf_BN - pdf_BS; | |
61 | ||
62 | tmpArray[TMP_INDEX(index, TMP_UX)] = ux; | |
63 | tmpArray[TMP_INDEX(index, TMP_UY)] = uy; | |
64 | tmpArray[TMP_INDEX(index, TMP_UZ)] = uz; | |
65 | ||
66 | dens = pdf_C + | |
67 | pdf_N + pdf_E + pdf_S + pdf_W + | |
68 | pdf_NE + pdf_SE + pdf_SW + pdf_NW + | |
69 | pdf_T + pdf_TN + pdf_TE + pdf_TS + pdf_TW + | |
70 | pdf_B + pdf_BN + pdf_BE + pdf_BS + pdf_BW; | |
71 | ||
72 | dir_indep_trm = dens - (ux * ux + uy * uy + uz * uz) * 3.0 / 2.0; | |
73 | ||
74 | w_1_indep = w_1 * dir_indep_trm; | |
75 | w_2_indep = w_2 * dir_indep_trm; | |
76 | ||
77 | tmpArray[TMP_INDEX(index, TMP_W1)] = w_1_indep; | |
78 | tmpArray[TMP_INDEX(index, TMP_W2)] = w_2_indep; | |
79 | ||
80 | dst[I(index + blockedIndex, D3Q19_C )] = pdf_C - omegaEven * (pdf_C - w_0 * dir_indep_trm); | |
81 | } | |
82 | ||
83 | ||
84 | #define LOOP_1(_dir1, _dir2, _vel, _vel_tmp) \ | |
85 | for (int index = 0; index < indexMax; ++index) { \ | |
86 | _vel = tmpArray[TMP_INDEX(index, JOIN(TMP_,_vel_tmp))]; \ | |
87 | JOIN(pdf_,_dir1) = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir1))]; \ | |
88 | JOIN(pdf_,_dir2) = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))]; \ | |
89 | w_1_indep = tmpArray[TMP_INDEX(index, TMP_W1)]; \ | |
90 | \ | |
91 | ui = _vel; \ | |
92 | evenPart = omegaEven * (0.5 * (JOIN(pdf_,_dir1) + JOIN(pdf_,_dir2)) - ui * ui * w_1_nine_half - w_1_indep); \ | |
93 | oddPart = omegaOdd * (0.5 * (JOIN(pdf_,_dir1) - JOIN(pdf_,_dir2)) - ui * w_1_x3); \ | |
94 | dst[I(index + blockedIndex, JOIN(D3Q19_,_dir1) )] = JOIN(pdf_,_dir1) - evenPart - oddPart; \ | |
95 | tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))] = JOIN(pdf_,_dir2) - evenPart + oddPart; \ | |
96 | } \ | |
97 | for (int index = 0; index < indexMax; ++index) { \ | |
98 | dst[I(index + blockedIndex, JOIN(D3Q19_,_dir2) )] = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))]; \ | |
99 | } | |
100 | ||
101 | #define LOOP_2(_dir1, _dir2, _v1, _v2, _v1_tmp, _v2_tmp, _expr) \ | |
102 | for (int index = 0; index < indexMax; ++index) { \ | |
103 | _v1 = tmpArray[TMP_INDEX(index, JOIN(TMP_,_v1_tmp))]; \ | |
104 | _v2 = tmpArray[TMP_INDEX(index, JOIN(TMP_,_v2_tmp))]; \ | |
105 | JOIN(pdf_,_dir1) = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir1))]; \ | |
106 | JOIN(pdf_,_dir2) = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))]; \ | |
107 | w_2_indep = tmpArray[TMP_INDEX(index, TMP_W2)]; \ | |
108 | \ | |
109 | ui = _expr; \ | |
110 | evenPart = omegaEven * (0.5 * (JOIN(pdf_,_dir1) + JOIN(pdf_,_dir2)) - ui * ui * w_2_nine_half - w_2_indep); \ | |
111 | oddPart = omegaOdd * (0.5 * (JOIN(pdf_,_dir1) - JOIN(pdf_,_dir2)) - ui * w_2_x3); \ | |
112 | dst[I(index + blockedIndex, JOIN(D3Q19_,_dir1))] = JOIN(pdf_,_dir1) - evenPart - oddPart; \ | |
113 | tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))] = JOIN(pdf_,_dir2) - evenPart + oddPart; \ | |
114 | } \ | |
115 | for (int index = 0; index < indexMax; ++index) { \ | |
116 | dst[I(index + blockedIndex, JOIN(D3Q19_,_dir2) )] = tmpArray[TMP_INDEX(index, JOIN(D3Q19_,_dir2))]; \ | |
117 | } | |
118 | ||
119 | LOOP_1(N, S, uy, UY); | |
120 | LOOP_1(E, W, ux, UX); | |
121 | LOOP_1(T, B, uz, UZ); | |
122 | ||
123 | LOOP_2(NW, SE, uy, ux, UY, UX, uy - ux); | |
124 | LOOP_2(NE, SW, uy, ux, UY, UX, uy + ux); | |
125 | LOOP_2(TW, BE, ux, uz, UX, UZ, uz - ux); | |
126 | LOOP_2(TE, BW, ux, uz, UX, UZ, uz + ux); | |
127 | LOOP_2(TS, BN, uy, uz, UY, UZ, uz - uy); | |
128 | LOOP_2(TN, BS, uy, uz, UY, UZ, uz + uy); | |
129 | ||
130 | #undef LOOP_1 | |
131 | #undef LOOP_2 | |
132 | ||
133 | ||
134 | } | |
135 | ||
136 | #undef I | |
137 | ||
138 | #undef INDEX_START | |
139 | #undef INDEX_STOP |