add citation information
[LbmBenchmarkKernelsPublic.git] / src / Vector.h
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
8cafd9ea 4// Markus Wittmann, 2016-2018
10988083
MW
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
8cafd9ea
MW
11// Michael Hussnaetter, 2017-2018
12// University of Erlangen-Nuremberg, Germany
13// michael.hussnaetter -at- fau.de
14//
10988083
MW
15// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
16//
17// LbmBenchKernels is free software: you can redistribute it and/or modify
18// it under the terms of the GNU General Public License as published by
19// the Free Software Foundation, either version 3 of the License, or
20// (at your option) any later version.
21//
22// LbmBenchKernels is distributed in the hope that it will be useful,
23// but WITHOUT ANY WARRANTY; without even the implied warranty of
24// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25// GNU General Public License for more details.
26//
27// You should have received a copy of the GNU General Public License
28// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
29//
30// --------------------------------------------------------------------------
31#ifndef __VECTOR_H__
32#define __VECTOR_H__
33
8cafd9ea 34#if !defined(VECTOR_AVX512) && !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
10988083
MW
35 #warning Defining VECTOR_AVX as no ISA extension was selected.
36 #define VECTOR_AVX
37#endif
38
8cafd9ea
MW
39#if (defined(VECTOR_AVX512) && defined(VECTOR_AVX)) || \
40 (defined(VECTOR_AVX512) && defined(VECTOR_SEE)) || \
41 (defined(VECTOR_AVX) && defined(VECTOR_SSE))
42 #error Only VECTOR_AVX512 or VECTOR_AVX or VECTOR_SSE can be defined at the same time.
10988083
MW
43#endif
44
0fde6e45
MW
45#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
46 #error PRECISION_DP or PRECISION_SP must be defined.
47#endif
10988083 48
0fde6e45
MW
49#if defined(PRECISION_DP) && defined(PRECISION_SP)
50 #error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
51#endif
10988083 52
0fde6e45 53#ifdef PRECISION_DP
10988083 54
8cafd9ea
MW
55 #ifdef VECTOR_AVX512
56
57 #include <immintrin.h>
58 // Vector size in double-precision floatin-point numbers.
59 #define VSIZE 8
60
61 #define VPDFT __m512d
62
63 #define VSET(scalar) _mm512_set1_pd(scalar)
64 #define VSETI32(scalar) _mm256_set1_epi32(scalar)
65
66 #define VLD(expr) _mm512_load_pd(expr)
67 #define VLDU(expr) _mm512_loadu_pd(expr)
68 #define VLIU(expr) _mm256_loadu_si256((__m256i const *)expr)
69 #define VLI64(expr) _mm512_load_epi64(expr)
70
71 #define VST(dst, src) _mm512_store_pd(dst, src)
72 #define VSTU(dst, src) _mm512_storeu_pd(dst, src)
73 #define VSTNT(dst, src) _mm512_stream_pd(dst, src)
74
75 #define VG32(offsets, base, scale) _mm512_i32gather_pd(offsets, base, scale)
76 #define VG64(offsets, base, scale) _mm512_i64gather_pd(offsets, base, scale)
77
78 #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint)
79
80 #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale)
81 #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale)
82
83 #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint)
84
85 #define VMUL(a, b) _mm512_mul_pd(a, b)
86 #define VADD(a, b) _mm512_add_pd(a, b)
87 #define VADDI32(a,b) _mm256_add_epi32(a,b)
88 #define VMULI32(a,b) _mm256_mul_epi32(a,b)
89 #define VSUB(a, b) _mm512_sub_pd(a, b)
90 #endif
91
0fde6e45 92 #ifdef VECTOR_AVX
10988083 93
0fde6e45
MW
94 #include <immintrin.h>
95 // Vector size in double-precision floating-point numbers.
96 #define VSIZE 4
10988083 97
0fde6e45 98 #define VPDFT __m256d
10988083 99
0fde6e45 100 #define VSET(scalar) _mm256_set1_pd(scalar)
10988083 101
0fde6e45
MW
102 #define VLD(expr) _mm256_load_pd(expr)
103 #define VLDU(expr) _mm256_loadu_pd(expr)
10988083 104
0fde6e45
MW
105 #define VST(dst, src) _mm256_store_pd(dst, src)
106 #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
107 #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
10988083 108
0fde6e45
MW
109 #define VMUL(a, b) _mm256_mul_pd(a, b)
110 #define VADD(a, b) _mm256_add_pd(a, b)
111 #define VSUB(a, b) _mm256_sub_pd(a, b)
112 #endif
10988083 113
0fde6e45
MW
114 #ifdef VECTOR_SSE
115 #include <emmintrin.h>
116 // Vector size in double-precision floating-point numbers.
117 #define VSIZE 2
10988083 118
0fde6e45 119 #define VPDFT __m128d
10988083 120
0fde6e45
MW
121 #define VSET(scalar) _mm_set1_pd(scalar)
122
123 #define VLD(expr) _mm_load_pd(expr)
124 #define VLDU(expr) _mm_loadu_pd(expr)
125
126 #define VST(dst, src) _mm_store_pd(dst, src)
127 #define VSTU(dst, src) _mm_storeu_pd(dst, src)
128 #define VSTNT(dst, src) _mm_stream_pd(dst, src)
129
130 #define VMUL(a, b) _mm_mul_pd(a, b)
131 #define VADD(a, b) _mm_add_pd(a, b)
132 #define VSUB(a, b) _mm_sub_pd(a, b)
133 #endif
134
135#elif defined(PRECISION_SP)
136
8cafd9ea 137 #ifdef VECTOR_AVX512
9e0051cb
MW
138
139 #include <immintrin.h>
140 // Vector size in double-precision floatin-point numbers.
141 #define VSIZE 16
142
143 #define VPDFT __m512
144
145 #define VSET(scalar) _mm512_set1_ps(scalar)
146 // #define VSETI32(scalar) _mm256_set1_epi32(scalar)
147
148 #define VLD(expr) _mm512_load_ps(expr)
149 #define VLDU(expr) _mm512_loadu_ps(expr)
150 #define VLIU(expr) _mm256_loadu_si256((__m256i const *)expr)
151 // #define VLI64(expr) _mm512_load_epi64(expr)
152
153 #define VST(dst, src) _mm512_store_ps(dst, src)
154 #define VSTU(dst, src) _mm512_storeu_ps(dst, src)
155 #define VSTNT(dst, src) _mm512_stream_ps(dst, src)
156
157 // #define VG32(offsets, base, scale) _mm512_i32gather_pd(offsets, base, scale)
158 // #define VG64(offsets, base, scale) _mm512_i64gather_pd(offsets, base, scale)
159
160 // #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint)
161
162 // #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale)
163 // #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale)
164
165 // #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint)
166
167 #define VMUL(a, b) _mm512_mul_ps(a, b)
168 #define VADD(a, b) _mm512_add_ps(a, b)
169 // #define VADDI32(a,b) _mm256_add_epi32(a,b)
170 // #define VMULI32(a,b) _mm256_mul_epi32(a,b)
171 #define VSUB(a, b) _mm512_sub_ps(a, b)
8cafd9ea
MW
172 #endif
173
0fde6e45
MW
174 #ifdef VECTOR_AVX
175
176 #include <immintrin.h>
177 // Vector size in double-precision floating-point numbers.
178 #define VSIZE 8
179
180 #define VPDFT __m256
181
182 #define VSET(scalar) _mm256_set1_ps(scalar)
183
184 #define VLD(expr) _mm256_load_ps(expr)
185 #define VLDU(expr) _mm256_loadu_ps(expr)
186
187 #define VST(dst, src) _mm256_store_ps(dst, src)
188 #define VSTU(dst, src) _mm256_storeu_ps(dst, src)
189 #define VSTNT(dst, src) _mm256_stream_ps(dst, src)
190
191 #define VMUL(a, b) _mm256_mul_ps(a, b)
192 #define VADD(a, b) _mm256_add_ps(a, b)
193 #define VSUB(a, b) _mm256_sub_ps(a, b)
194 #endif
195
196 #ifdef VECTOR_SSE
197 #include <emmintrin.h>
198 // Vector size in double-precision floating-point numbers.
199 #define VSIZE 4
200
201 #define VPDFT __m128
202
203 #define VSET(scalar) _mm_set1_ps(scalar)
204
205 #define VLD(expr) _mm_load_ps(expr)
206 #define VLDU(expr) _mm_loadu_ps(expr)
207
208 #define VST(dst, src) _mm_store_ps(dst, src)
209 #define VSTU(dst, src) _mm_storeu_ps(dst, src)
210 #define VSTNT(dst, src) _mm_stream_ps(dst, src)
211
212 #define VMUL(a, b) _mm_mul_ps(a, b)
213 #define VADD(a, b) _mm_add_ps(a, b)
214 #define VSUB(a, b) _mm_sub_ps(a, b)
215 #endif
10988083 216
0fde6e45 217#endif // PRECISION
10988083
MW
218
219#endif // __VECTOR_H__
This page took 0.133387 seconds and 5 git commands to generate.