add single precision, add aa-vec-sl-soa kernel, updated doc
[LbmBenchmarkKernelsPublic.git] / src / Vector.h
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#ifndef __VECTOR_H__
28#define __VECTOR_H__
29
30#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
31 #warning Defining VECTOR_AVX as no ISA extension was selected.
32 #define VECTOR_AVX
33#endif
34
35#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
36 #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
37#endif
38
0fde6e45
MW
39#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
40 #error PRECISION_DP or PRECISION_SP must be defined.
41#endif
10988083 42
0fde6e45
MW
43#if defined(PRECISION_DP) && defined(PRECISION_SP)
44 #error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
45#endif
10988083 46
0fde6e45 47#ifdef PRECISION_DP
10988083 48
0fde6e45 49 #ifdef VECTOR_AVX
10988083 50
0fde6e45
MW
51 #include <immintrin.h>
52 // Vector size in double-precision floating-point numbers.
53 #define VSIZE 4
10988083 54
0fde6e45 55 #define VPDFT __m256d
10988083 56
0fde6e45 57 #define VSET(scalar) _mm256_set1_pd(scalar)
10988083 58
0fde6e45
MW
59 #define VLD(expr) _mm256_load_pd(expr)
60 #define VLDU(expr) _mm256_loadu_pd(expr)
10988083 61
0fde6e45
MW
62 #define VST(dst, src) _mm256_store_pd(dst, src)
63 #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
64 #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
10988083 65
0fde6e45
MW
66 #define VMUL(a, b) _mm256_mul_pd(a, b)
67 #define VADD(a, b) _mm256_add_pd(a, b)
68 #define VSUB(a, b) _mm256_sub_pd(a, b)
69 #endif
10988083 70
0fde6e45
MW
71 #ifdef VECTOR_SSE
72 #include <emmintrin.h>
73 // Vector size in double-precision floating-point numbers.
74 #define VSIZE 2
10988083 75
0fde6e45 76 #define VPDFT __m128d
10988083 77
0fde6e45
MW
78 #define VSET(scalar) _mm_set1_pd(scalar)
79
80 #define VLD(expr) _mm_load_pd(expr)
81 #define VLDU(expr) _mm_loadu_pd(expr)
82
83 #define VST(dst, src) _mm_store_pd(dst, src)
84 #define VSTU(dst, src) _mm_storeu_pd(dst, src)
85 #define VSTNT(dst, src) _mm_stream_pd(dst, src)
86
87 #define VMUL(a, b) _mm_mul_pd(a, b)
88 #define VADD(a, b) _mm_add_pd(a, b)
89 #define VSUB(a, b) _mm_sub_pd(a, b)
90 #endif
91
92#elif defined(PRECISION_SP)
93
94 #ifdef VECTOR_AVX
95
96 #include <immintrin.h>
97 // Vector size in double-precision floating-point numbers.
98 #define VSIZE 8
99
100 #define VPDFT __m256
101
102 #define VSET(scalar) _mm256_set1_ps(scalar)
103
104 #define VLD(expr) _mm256_load_ps(expr)
105 #define VLDU(expr) _mm256_loadu_ps(expr)
106
107 #define VST(dst, src) _mm256_store_ps(dst, src)
108 #define VSTU(dst, src) _mm256_storeu_ps(dst, src)
109 #define VSTNT(dst, src) _mm256_stream_ps(dst, src)
110
111 #define VMUL(a, b) _mm256_mul_ps(a, b)
112 #define VADD(a, b) _mm256_add_ps(a, b)
113 #define VSUB(a, b) _mm256_sub_ps(a, b)
114 #endif
115
116 #ifdef VECTOR_SSE
117 #include <emmintrin.h>
118 // Vector size in double-precision floating-point numbers.
119 #define VSIZE 4
120
121 #define VPDFT __m128
122
123 #define VSET(scalar) _mm_set1_ps(scalar)
124
125 #define VLD(expr) _mm_load_ps(expr)
126 #define VLDU(expr) _mm_loadu_ps(expr)
127
128 #define VST(dst, src) _mm_store_ps(dst, src)
129 #define VSTU(dst, src) _mm_storeu_ps(dst, src)
130 #define VSTNT(dst, src) _mm_stream_ps(dst, src)
131
132 #define VMUL(a, b) _mm_mul_ps(a, b)
133 #define VADD(a, b) _mm_add_ps(a, b)
134 #define VSUB(a, b) _mm_sub_ps(a, b)
135 #endif
10988083 136
0fde6e45 137#endif // PRECISION
10988083
MW
138
139#endif // __VECTOR_H__
This page took 0.146744 seconds and 5 git commands to generate.