| 1 | // -------------------------------------------------------------------------- |
| 2 | // |
| 3 | // Copyright |
| 4 | // Markus Wittmann, 2016-2017 |
| 5 | // RRZE, University of Erlangen-Nuremberg, Germany |
| 6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de |
| 7 | // |
| 8 | // Viktor Haag, 2016 |
| 9 | // LSS, University of Erlangen-Nuremberg, Germany |
| 10 | // |
| 11 | // Michael Hussnaetter, 2017-2018 |
| 12 | // University of Erlangen-Nuremberg, Germany |
| 13 | // michael.hussnaetter -at- fau.de |
| 14 | // |
| 15 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). |
| 16 | // |
| 17 | // LbmBenchKernels is free software: you can redistribute it and/or modify |
| 18 | // it under the terms of the GNU General Public License as published by |
| 19 | // the Free Software Foundation, either version 3 of the License, or |
| 20 | // (at your option) any later version. |
| 21 | // |
| 22 | // LbmBenchKernels is distributed in the hope that it will be useful, |
| 23 | // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 24 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 25 | // GNU General Public License for more details. |
| 26 | // |
| 27 | // You should have received a copy of the GNU General Public License |
| 28 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. |
| 29 | // |
| 30 | // -------------------------------------------------------------------------- |
| 31 | #ifndef __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ |
| 32 | #define __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ |
| 33 | |
| 34 | #if !defined(DATA_LAYOUT_SOA) && !defined(DATA_LAYOUT_AOSOA) |
| 35 | #error List Gather Hybrid works only with DATA_LAYOUT_SOA or DATA_LAYOUT_AOSOA |
| 36 | #endif |
| 37 | |
| 38 | #include "BenchKernelD3Q19ListAaCommon.h" |
| 39 | |
| 40 | typedef struct KernelDataListRia_ { |
| 41 | KernelDataList kdl; |
| 42 | |
| 43 | // Array contains information for loop start indices with the following scheme for every thread: |
| 44 | // scalar peel start | (vectorized load store | vectorized gather scatter) ... | scalar remainder. |
| 45 | // Example for 3 threads with ! indicating thread boundaries |
| 46 | // [sp,vls,vgs,...,vls,vgs,sr ! sp, vls, vgs, ..., vls, vgs, sr ! sp, vls, vgs, ..., vls, vgs, sr] |
| 47 | int * loopStartIndices; |
| 48 | int nLoopStartIndices; // Number of entries in loopStartIndices array. |
| 49 | |
| 50 | // Array contains (for each thread) an index into loopStartIndices. |
| 51 | int * oddKernelThreadStartIndices; |
| 52 | // Number of entries in threadStartIndices |
| 53 | int nOddKernelThreadStartIndices; |
| 54 | |
| 55 | } KernelDataListRia; |
| 56 | |
| 57 | // Macro for casting KernelData * to KernelDataList *. |
| 58 | #define KDLR(_x_) ((KernelDataListRia *)(_x_)) |
| 59 | |
| 60 | |
| 61 | #endif // __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__ |
| 62 | |