af12f77868f6da4d3796cdf1f4e8654e3fcfdb3b
[LbmBenchmarkKernelsPublic.git] / src / Vector.h
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12 //
13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
14 //  it under the terms of the GNU General Public License as published by
15 //  the Free Software Foundation, either version 3 of the License, or
16 //  (at your option) any later version.
17 //
18 //  LbmBenchKernels is distributed in the hope that it will be useful,
19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //  GNU General Public License for more details.
22 //
23 //  You should have received a copy of the GNU General Public License
24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
25 //
26 // --------------------------------------------------------------------------
27 #ifndef __VECTOR_H__
28 #define __VECTOR_H__
29
30 #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
31         #warning Defining VECTOR_AVX as no ISA extension was selected.
32         #define VECTOR_AVX
33 #endif
34
35 #if defined(VECTOR_AVX) && defined(VECTOR_SSE)
36         #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
37 #endif
38
39 #if !defined(PRECISION_DP) && !defined(PRECISION_SP)
40         #error PRECISION_DP or PRECISION_SP must be defined.
41 #endif
42
43 #if defined(PRECISION_DP) && defined(PRECISION_SP)
44         #error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
45 #endif
46
47 #ifdef PRECISION_DP
48
49         #ifdef VECTOR_AVX
50
51                 #include <immintrin.h>
52                 // Vector size in double-precision floating-point numbers.
53                 #define VSIZE   4
54
55                 #define VPDFT                           __m256d
56
57                 #define VSET(scalar)            _mm256_set1_pd(scalar)
58
59                 #define VLD(expr)                       _mm256_load_pd(expr)
60                 #define VLDU(expr)                      _mm256_loadu_pd(expr)
61
62                 #define VST(dst, src)           _mm256_store_pd(dst, src)
63                 #define VSTU(dst, src)          _mm256_storeu_pd(dst, src)
64                 #define VSTNT(dst, src)         _mm256_stream_pd(dst, src)
65
66                 #define VMUL(a, b)                      _mm256_mul_pd(a, b)
67                 #define VADD(a, b)                      _mm256_add_pd(a, b)
68                 #define VSUB(a, b)                      _mm256_sub_pd(a, b)
69         #endif
70
71         #ifdef VECTOR_SSE
72                 #include <emmintrin.h>
73                 // Vector size in double-precision floating-point numbers.
74                 #define VSIZE 2
75
76                 #define VPDFT                           __m128d
77
78                 #define VSET(scalar)            _mm_set1_pd(scalar)
79
80                 #define VLD(expr)                       _mm_load_pd(expr)
81                 #define VLDU(expr)                      _mm_loadu_pd(expr)
82
83                 #define VST(dst, src)           _mm_store_pd(dst, src)
84                 #define VSTU(dst, src)          _mm_storeu_pd(dst, src)
85                 #define VSTNT(dst, src)         _mm_stream_pd(dst, src)
86
87                 #define VMUL(a, b)                      _mm_mul_pd(a, b)
88                 #define VADD(a, b)                      _mm_add_pd(a, b)
89                 #define VSUB(a, b)                      _mm_sub_pd(a, b)
90         #endif
91
92 #elif defined(PRECISION_SP)
93
94         #ifdef VECTOR_AVX
95
96                 #include <immintrin.h>
97                 // Vector size in double-precision floating-point numbers.
98                 #define VSIZE   8
99
100                 #define VPDFT                           __m256
101
102                 #define VSET(scalar)            _mm256_set1_ps(scalar)
103
104                 #define VLD(expr)                       _mm256_load_ps(expr)
105                 #define VLDU(expr)                      _mm256_loadu_ps(expr)
106
107                 #define VST(dst, src)           _mm256_store_ps(dst, src)
108                 #define VSTU(dst, src)          _mm256_storeu_ps(dst, src)
109                 #define VSTNT(dst, src)         _mm256_stream_ps(dst, src)
110
111                 #define VMUL(a, b)                      _mm256_mul_ps(a, b)
112                 #define VADD(a, b)                      _mm256_add_ps(a, b)
113                 #define VSUB(a, b)                      _mm256_sub_ps(a, b)
114         #endif
115
116         #ifdef VECTOR_SSE
117                 #include <emmintrin.h>
118                 // Vector size in double-precision floating-point numbers.
119                 #define VSIZE   4
120
121                 #define VPDFT                           __m128
122
123                 #define VSET(scalar)            _mm_set1_ps(scalar)
124
125                 #define VLD(expr)                       _mm_load_ps(expr)
126                 #define VLDU(expr)                      _mm_loadu_ps(expr)
127
128                 #define VST(dst, src)           _mm_store_ps(dst, src)
129                 #define VSTU(dst, src)          _mm_storeu_ps(dst, src)
130                 #define VSTNT(dst, src)         _mm_stream_ps(dst, src)
131
132                 #define VMUL(a, b)                      _mm_mul_ps(a, b)
133                 #define VADD(a, b)                      _mm_add_ps(a, b)
134                 #define VSUB(a, b)                      _mm_sub_ps(a, b)
135         #endif
136
137 #endif  // PRECISION
138
139 #endif // __VECTOR_H__
This page took 0.057991 seconds and 3 git commands to generate.