Commit | Line | Data |
---|---|---|
8cafd9ea MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // Michael Hussnaetter, 2017-2018 | |
12 | // University of Erlangen-Nuremberg, Germany | |
13 | // michael.hussnaetter -at- fau.de | |
14 | // | |
15 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
16 | // | |
17 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
18 | // it under the terms of the GNU General Public License as published by | |
19 | // the Free Software Foundation, either version 3 of the License, or | |
20 | // (at your option) any later version. | |
21 | // | |
22 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
23 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | // GNU General Public License for more details. | |
26 | // | |
27 | // You should have received a copy of the GNU General Public License | |
28 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
29 | // | |
30 | // -------------------------------------------------------------------------- | |
31 | #ifndef __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ | |
32 | #define __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ | |
33 | ||
34 | #if !defined(DATA_LAYOUT_SOA) && !defined(DATA_LAYOUT_AOSOA) | |
35 | #error List Gather Hybrid works only with DATA_LAYOUT_SOA or DATA_LAYOUT_AOSOA | |
36 | #endif | |
37 | ||
38 | #include "BenchKernelD3Q19ListAaCommon.h" | |
39 | ||
40 | typedef struct KernelDataListRia_ { | |
41 | KernelDataList kdl; | |
42 | ||
43 | // Array contains information for loop start indices with the following scheme for every thread: | |
44 | // scalar peel start | (vectorized load store | vectorized gather scatter) ... | scalar remainder. | |
45 | // Example for 3 threads with ! indicating thread boundaries | |
46 | // [sp,vls,vgs,...,vls,vgs,sr ! sp, vls, vgs, ..., vls, vgs, sr ! sp, vls, vgs, ..., vls, vgs, sr] | |
47 | int * loopStartIndices; | |
48 | int nLoopStartIndices; // Number of entries in loopStartIndices array. | |
49 | ||
50 | // Array contains (for each thread) an index into loopStartIndices. | |
51 | int * oddKernelThreadStartIndices; | |
52 | // Number of entries in threadStartIndices | |
53 | int nOddKernelThreadStartIndices; | |
54 | ||
55 | } KernelDataListRia; | |
56 | ||
57 | // Macro for casting KernelData * to KernelDataList *. | |
58 | #define KDLR(_x_) ((KernelDataListRia *)(_x_)) | |
59 | ||
60 | ||
61 | #endif // __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ | |
62 |