1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // Michael Hussnaetter, 2017-2018
12 // University of Erlangen-Nuremberg, Germany
13 // michael.hussnaetter -at- fau.de
15 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
17 // LbmBenchKernels is free software: you can redistribute it and/or modify
18 // it under the terms of the GNU General Public License as published by
19 // the Free Software Foundation, either version 3 of the License, or
20 // (at your option) any later version.
22 // LbmBenchKernels is distributed in the hope that it will be useful,
23 // but WITHOUT ANY WARRANTY; without even the implied warranty of
24 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 // GNU General Public License for more details.
27 // You should have received a copy of the GNU General Public License
28 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
30 // --------------------------------------------------------------------------
31 #ifndef __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__
32 #define __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__
34 #if !defined(DATA_LAYOUT_SOA) && !defined(DATA_LAYOUT_AOSOA)
35 #error List Gather Hybrid works only with DATA_LAYOUT_SOA or DATA_LAYOUT_AOSOA
38 #include "BenchKernelD3Q19ListAaCommon.h"
40 typedef struct KernelDataListRia_ {
43 // Array contains information for loop start indices with the following scheme for every thread:
44 // scalar peel start | (vectorized load store | vectorized gather scatter) ... | scalar remainder.
45 // Example for 3 threads with ! indicating thread boundaries
46 // [sp,vls,vgs,...,vls,vgs,sr ! sp, vls, vgs, ..., vls, vgs, sr ! sp, vls, vgs, ..., vls, vgs, sr]
47 int * loopStartIndices;
48 int nLoopStartIndices; // Number of entries in loopStartIndices array.
50 // Array contains (for each thread) an index into loopStartIndices.
51 int * oddKernelThreadStartIndices;
52 // Number of entries in threadStartIndices
53 int nOddKernelThreadStartIndices;
57 // Macro for casting KernelData * to KernelDataList *.
58 #define KDLR(_x_) ((KernelDataListRia *)(_x_))
61 #endif // __BENCH_KERNEL_D3Q19_LIST_AA_PV_GATHER_HYBRID_COMMON_H__