Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
12 | // | |
13 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
14 | // it under the terms of the GNU General Public License as published by | |
15 | // the Free Software Foundation, either version 3 of the License, or | |
16 | // (at your option) any later version. | |
17 | // | |
18 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | // GNU General Public License for more details. | |
22 | // | |
23 | // You should have received a copy of the GNU General Public License | |
24 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
25 | // | |
26 | // -------------------------------------------------------------------------- | |
27 | #ifndef __VECTOR_H__ | |
28 | #define __VECTOR_H__ | |
29 | ||
30 | #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE) | |
31 | #warning Defining VECTOR_AVX as no ISA extension was selected. | |
32 | #define VECTOR_AVX | |
33 | #endif | |
34 | ||
35 | #if defined(VECTOR_AVX) && defined(VECTOR_SSE) | |
36 | #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time. | |
37 | #endif | |
38 | ||
39 | #ifdef VECTOR_AVX | |
40 | ||
41 | #include <immintrin.h> | |
42 | // Vector size in double-precision floatin-point numbers. | |
43 | #define VSIZE 4 | |
44 | ||
45 | #define VPDFT __m256d | |
46 | ||
47 | #define VSET(scalar) _mm256_set1_pd(scalar) | |
48 | ||
49 | #define VLD(expr) _mm256_load_pd(expr) | |
50 | #define VLDU(expr) _mm256_loadu_pd(expr) | |
51 | ||
52 | #define VST(dst, src) _mm256_store_pd(dst, src) | |
53 | #define VSTU(dst, src) _mm256_storeu_pd(dst, src) | |
54 | #define VSTNT(dst, src) _mm256_stream_pd(dst, src) | |
55 | ||
56 | #define VMUL(a, b) _mm256_mul_pd(a, b) | |
57 | #define VADD(a, b) _mm256_add_pd(a, b) | |
58 | #define VSUB(a, b) _mm256_sub_pd(a, b) | |
59 | #endif | |
60 | ||
61 | #ifdef VECTOR_SSE | |
62 | #include <emmintrin.h> | |
63 | // Vector size in double-precision floatin-point numbers. | |
64 | #define VSIZE 2 | |
65 | ||
66 | #define VPDFT __m128d | |
67 | ||
68 | #define VSET(scalar) _mm_set1_pd(scalar) | |
69 | ||
70 | #define VLD(expr) _mm_load_pd(expr) | |
71 | #define VLDU(expr) _mm_loadu_pd(expr) | |
72 | ||
73 | #define VST(dst, src) _mm_store_pd(dst, src) | |
74 | #define VSTU(dst, src) _mm_storeu_pd(dst, src) | |
75 | #define VSTNT(dst, src) _mm_stream_pd(dst, src) | |
76 | ||
77 | #define VMUL(a, b) _mm_mul_pd(a, b) | |
78 | #define VADD(a, b) _mm_add_pd(a, b) | |
79 | #define VSUB(a, b) _mm_sub_pd(a, b) | |
80 | #endif | |
81 | ||
82 | ||
83 | #endif // __VECTOR_H__ |