src/Vector.h

   1 // --------------------------------------------------------------------------
   2 //
   3 // Copyright
   4 //   Markus Wittmann, 2016-2017
   5 //   RRZE, University of Erlangen-Nuremberg, Germany
   6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
   7 //
   8 //   Viktor Haag, 2016
   9 //   LSS, University of Erlangen-Nuremberg, Germany
  10 //
  11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
  12 //
  13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
  14 //  it under the terms of the GNU General Public License as published by
  15 //  the Free Software Foundation, either version 3 of the License, or
  16 //  (at your option) any later version.
  17 //
  18 //  LbmBenchKernels is distributed in the hope that it will be useful,
  19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 //  GNU General Public License for more details.
  22 //
  23 //  You should have received a copy of the GNU General Public License
  24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
  25 //
  26 // --------------------------------------------------------------------------
  27 #ifndef __VECTOR_H__
  28 #define __VECTOR_H__
  29
  30 #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
  31         #warning Defining VECTOR_AVX as no ISA extension was selected.
  32         #define VECTOR_AVX
  33 #endif
  34
  35 #if defined(VECTOR_AVX) && defined(VECTOR_SSE)
  36         #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
  37 #endif
  38
  39 #ifdef VECTOR_AVX
  40
  41         #include <immintrin.h>
  42         // Vector size in double-precision floatin-point numbers.
  43         #define VSIZE   4
  44
  45         #define VPDFT                           __m256d
  46
  47         #define VSET(scalar)            _mm256_set1_pd(scalar)
  48
  49         #define VLD(expr)                       _mm256_load_pd(expr)
  50         #define VLDU(expr)                      _mm256_loadu_pd(expr)
  51
  52         #define VST(dst, src)           _mm256_store_pd(dst, src)
  53         #define VSTU(dst, src)          _mm256_storeu_pd(dst, src)
  54         #define VSTNT(dst, src)         _mm256_stream_pd(dst, src)
  55
  56         #define VMUL(a, b)                      _mm256_mul_pd(a, b)
  57         #define VADD(a, b)                      _mm256_add_pd(a, b)
  58         #define VSUB(a, b)                      _mm256_sub_pd(a, b)
  59 #endif
  60
  61 #ifdef VECTOR_SSE
  62         #include <emmintrin.h>
  63         // Vector size in double-precision floatin-point numbers.
  64         #define VSIZE 2
  65
  66         #define VPDFT                           __m128d
  67
  68         #define VSET(scalar)            _mm_set1_pd(scalar)
  69
  70         #define VLD(expr)                       _mm_load_pd(expr)
  71         #define VLDU(expr)                      _mm_loadu_pd(expr)
  72
  73         #define VST(dst, src)           _mm_store_pd(dst, src)
  74         #define VSTU(dst, src)          _mm_storeu_pd(dst, src)
  75         #define VSTNT(dst, src)         _mm_stream_pd(dst, src)
  76
  77         #define VMUL(a, b)                      _mm_mul_pd(a, b)
  78         #define VADD(a, b)                      _mm_add_pd(a, b)
  79         #define VSUB(a, b)                      _mm_sub_pd(a, b)
  80 #endif
  81
  82
  83 #endif // __VECTOR_H__