Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
12 | // | |
13 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
14 | // it under the terms of the GNU General Public License as published by | |
15 | // the Free Software Foundation, either version 3 of the License, or | |
16 | // (at your option) any later version. | |
17 | // | |
18 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | // GNU General Public License for more details. | |
22 | // | |
23 | // You should have received a copy of the GNU General Public License | |
24 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
25 | // | |
26 | // -------------------------------------------------------------------------- | |
27 | #ifndef __VECTOR_H__ | |
28 | #define __VECTOR_H__ | |
29 | ||
30 | #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE) | |
31 | #warning Defining VECTOR_AVX as no ISA extension was selected. | |
32 | #define VECTOR_AVX | |
33 | #endif | |
34 | ||
35 | #if defined(VECTOR_AVX) && defined(VECTOR_SSE) | |
36 | #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time. | |
37 | #endif | |
38 | ||
0fde6e45 MW |
39 | #if !defined(PRECISION_DP) && !defined(PRECISION_SP) |
40 | #error PRECISION_DP or PRECISION_SP must be defined. | |
41 | #endif | |
10988083 | 42 | |
0fde6e45 MW |
43 | #if defined(PRECISION_DP) && defined(PRECISION_SP) |
44 | #error Only PRECISION_DP or PRECISION_SP can be defined at the same time. | |
45 | #endif | |
10988083 | 46 | |
0fde6e45 | 47 | #ifdef PRECISION_DP |
10988083 | 48 | |
0fde6e45 | 49 | #ifdef VECTOR_AVX |
10988083 | 50 | |
0fde6e45 MW |
51 | #include <immintrin.h> |
52 | // Vector size in double-precision floating-point numbers. | |
53 | #define VSIZE 4 | |
10988083 | 54 | |
0fde6e45 | 55 | #define VPDFT __m256d |
10988083 | 56 | |
0fde6e45 | 57 | #define VSET(scalar) _mm256_set1_pd(scalar) |
10988083 | 58 | |
0fde6e45 MW |
59 | #define VLD(expr) _mm256_load_pd(expr) |
60 | #define VLDU(expr) _mm256_loadu_pd(expr) | |
10988083 | 61 | |
0fde6e45 MW |
62 | #define VST(dst, src) _mm256_store_pd(dst, src) |
63 | #define VSTU(dst, src) _mm256_storeu_pd(dst, src) | |
64 | #define VSTNT(dst, src) _mm256_stream_pd(dst, src) | |
10988083 | 65 | |
0fde6e45 MW |
66 | #define VMUL(a, b) _mm256_mul_pd(a, b) |
67 | #define VADD(a, b) _mm256_add_pd(a, b) | |
68 | #define VSUB(a, b) _mm256_sub_pd(a, b) | |
69 | #endif | |
10988083 | 70 | |
0fde6e45 MW |
71 | #ifdef VECTOR_SSE |
72 | #include <emmintrin.h> | |
73 | // Vector size in double-precision floating-point numbers. | |
74 | #define VSIZE 2 | |
10988083 | 75 | |
0fde6e45 | 76 | #define VPDFT __m128d |
10988083 | 77 | |
0fde6e45 MW |
78 | #define VSET(scalar) _mm_set1_pd(scalar) |
79 | ||
80 | #define VLD(expr) _mm_load_pd(expr) | |
81 | #define VLDU(expr) _mm_loadu_pd(expr) | |
82 | ||
83 | #define VST(dst, src) _mm_store_pd(dst, src) | |
84 | #define VSTU(dst, src) _mm_storeu_pd(dst, src) | |
85 | #define VSTNT(dst, src) _mm_stream_pd(dst, src) | |
86 | ||
87 | #define VMUL(a, b) _mm_mul_pd(a, b) | |
88 | #define VADD(a, b) _mm_add_pd(a, b) | |
89 | #define VSUB(a, b) _mm_sub_pd(a, b) | |
90 | #endif | |
91 | ||
92 | #elif defined(PRECISION_SP) | |
93 | ||
94 | #ifdef VECTOR_AVX | |
95 | ||
96 | #include <immintrin.h> | |
97 | // Vector size in double-precision floating-point numbers. | |
98 | #define VSIZE 8 | |
99 | ||
100 | #define VPDFT __m256 | |
101 | ||
102 | #define VSET(scalar) _mm256_set1_ps(scalar) | |
103 | ||
104 | #define VLD(expr) _mm256_load_ps(expr) | |
105 | #define VLDU(expr) _mm256_loadu_ps(expr) | |
106 | ||
107 | #define VST(dst, src) _mm256_store_ps(dst, src) | |
108 | #define VSTU(dst, src) _mm256_storeu_ps(dst, src) | |
109 | #define VSTNT(dst, src) _mm256_stream_ps(dst, src) | |
110 | ||
111 | #define VMUL(a, b) _mm256_mul_ps(a, b) | |
112 | #define VADD(a, b) _mm256_add_ps(a, b) | |
113 | #define VSUB(a, b) _mm256_sub_ps(a, b) | |
114 | #endif | |
115 | ||
116 | #ifdef VECTOR_SSE | |
117 | #include <emmintrin.h> | |
118 | // Vector size in double-precision floating-point numbers. | |
119 | #define VSIZE 4 | |
120 | ||
121 | #define VPDFT __m128 | |
122 | ||
123 | #define VSET(scalar) _mm_set1_ps(scalar) | |
124 | ||
125 | #define VLD(expr) _mm_load_ps(expr) | |
126 | #define VLDU(expr) _mm_loadu_ps(expr) | |
127 | ||
128 | #define VST(dst, src) _mm_store_ps(dst, src) | |
129 | #define VSTU(dst, src) _mm_storeu_ps(dst, src) | |
130 | #define VSTNT(dst, src) _mm_stream_ps(dst, src) | |
131 | ||
132 | #define VMUL(a, b) _mm_mul_ps(a, b) | |
133 | #define VADD(a, b) _mm_add_ps(a, b) | |
134 | #define VSUB(a, b) _mm_sub_ps(a, b) | |
135 | #endif | |
10988083 | 136 | |
0fde6e45 | 137 | #endif // PRECISION |
10988083 MW |
138 | |
139 | #endif // __VECTOR_H__ |