1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
13 // LbmBenchKernels is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
18 // LbmBenchKernels is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
23 // You should have received a copy of the GNU General Public License
24 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
26 // --------------------------------------------------------------------------
30 #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
31 #warning Defining VECTOR_AVX as no ISA extension was selected.
35 #if defined(VECTOR_AVX) && defined(VECTOR_SSE)
36 #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
41 #include <immintrin.h>
42 // Vector size in double-precision floatin-point numbers.
47 #define VSET(scalar) _mm256_set1_pd(scalar)
49 #define VLD(expr) _mm256_load_pd(expr)
50 #define VLDU(expr) _mm256_loadu_pd(expr)
52 #define VST(dst, src) _mm256_store_pd(dst, src)
53 #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
54 #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
56 #define VMUL(a, b) _mm256_mul_pd(a, b)
57 #define VADD(a, b) _mm256_add_pd(a, b)
58 #define VSUB(a, b) _mm256_sub_pd(a, b)
62 #include <emmintrin.h>
63 // Vector size in double-precision floatin-point numbers.
68 #define VSET(scalar) _mm_set1_pd(scalar)
70 #define VLD(expr) _mm_load_pd(expr)
71 #define VLDU(expr) _mm_loadu_pd(expr)
73 #define VST(dst, src) _mm_store_pd(dst, src)
74 #define VSTU(dst, src) _mm_storeu_pd(dst, src)
75 #define VSTNT(dst, src) _mm_stream_pd(dst, src)
77 #define VMUL(a, b) _mm_mul_pd(a, b)
78 #define VADD(a, b) _mm_add_pd(a, b)
79 #define VSUB(a, b) _mm_sub_pd(a, b)
83 #endif // __VECTOR_H__