version 0.1
[LbmBenchmarkKernelsPublic.git] / src / Vector.h
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#ifndef __VECTOR_H__
28#define __VECTOR_H__
29
30#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
31 #warning Defining VECTOR_AVX as no ISA extension was selected.
32 #define VECTOR_AVX
33#endif
34
35#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
36 #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
37#endif
38
39#ifdef VECTOR_AVX
40
41 #include <immintrin.h>
42 // Vector size in double-precision floatin-point numbers.
43 #define VSIZE 4
44
45 #define VPDFT __m256d
46
47 #define VSET(scalar) _mm256_set1_pd(scalar)
48
49 #define VLD(expr) _mm256_load_pd(expr)
50 #define VLDU(expr) _mm256_loadu_pd(expr)
51
52 #define VST(dst, src) _mm256_store_pd(dst, src)
53 #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
54 #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
55
56 #define VMUL(a, b) _mm256_mul_pd(a, b)
57 #define VADD(a, b) _mm256_add_pd(a, b)
58 #define VSUB(a, b) _mm256_sub_pd(a, b)
59#endif
60
61#ifdef VECTOR_SSE
62 #include <emmintrin.h>
63 // Vector size in double-precision floatin-point numbers.
64 #define VSIZE 2
65
66 #define VPDFT __m128d
67
68 #define VSET(scalar) _mm_set1_pd(scalar)
69
70 #define VLD(expr) _mm_load_pd(expr)
71 #define VLDU(expr) _mm_loadu_pd(expr)
72
73 #define VST(dst, src) _mm_store_pd(dst, src)
74 #define VSTU(dst, src) _mm_storeu_pd(dst, src)
75 #define VSTNT(dst, src) _mm_stream_pd(dst, src)
76
77 #define VMUL(a, b) _mm_mul_pd(a, b)
78 #define VADD(a, b) _mm_add_pd(a, b)
79 #define VSUB(a, b) _mm_sub_pd(a, b)
80#endif
81
82
83#endif // __VECTOR_H__
This page took 0.071488 seconds and 5 git commands to generate.