src/Vector.h

   1 // --------------------------------------------------------------------------
   2 //
   3 // Copyright
   4 //   Markus Wittmann, 2016-2017
   5 //   RRZE, University of Erlangen-Nuremberg, Germany
   6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
   7 //
   8 //   Viktor Haag, 2016
   9 //   LSS, University of Erlangen-Nuremberg, Germany
  10 //
  11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
  12 //
  13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
  14 //  it under the terms of the GNU General Public License as published by
  15 //  the Free Software Foundation, either version 3 of the License, or
  16 //  (at your option) any later version.
  17 //
  18 //  LbmBenchKernels is distributed in the hope that it will be useful,
  19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
  20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  21 //  GNU General Public License for more details.
  22 //
  23 //  You should have received a copy of the GNU General Public License
  24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
  25 //
  26 // --------------------------------------------------------------------------
  27 #ifndef __VECTOR_H__
  28 #define __VECTOR_H__
  29
  30 #if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
  31         #warning Defining VECTOR_AVX as no ISA extension was selected.
  32         #define VECTOR_AVX
  33 #endif
  34
  35 #if defined(VECTOR_AVX) && defined(VECTOR_SSE)
  36         #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
  37 #endif
  38
  39 #if !defined(PRECISION_DP) && !defined(PRECISION_SP)
  40         #error PRECISION_DP or PRECISION_SP must be defined.
  41 #endif
  42
  43 #if defined(PRECISION_DP) && defined(PRECISION_SP)
  44         #error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
  45 #endif
  46
  47 #ifdef PRECISION_DP
  48
  49         #ifdef VECTOR_AVX
  50
  51                 #include <immintrin.h>
  52                 // Vector size in double-precision floating-point numbers.
  53                 #define VSIZE   4
  54
  55                 #define VPDFT                           __m256d
  56
  57                 #define VSET(scalar)            _mm256_set1_pd(scalar)
  58
  59                 #define VLD(expr)                       _mm256_load_pd(expr)
  60                 #define VLDU(expr)                      _mm256_loadu_pd(expr)
  61
  62                 #define VST(dst, src)           _mm256_store_pd(dst, src)
  63                 #define VSTU(dst, src)          _mm256_storeu_pd(dst, src)
  64                 #define VSTNT(dst, src)         _mm256_stream_pd(dst, src)
  65
  66                 #define VMUL(a, b)                      _mm256_mul_pd(a, b)
  67                 #define VADD(a, b)                      _mm256_add_pd(a, b)
  68                 #define VSUB(a, b)                      _mm256_sub_pd(a, b)
  69         #endif
  70
  71         #ifdef VECTOR_SSE
  72                 #include <emmintrin.h>
  73                 // Vector size in double-precision floating-point numbers.
  74                 #define VSIZE 2
  75
  76                 #define VPDFT                           __m128d
  77
  78                 #define VSET(scalar)            _mm_set1_pd(scalar)
  79
  80                 #define VLD(expr)                       _mm_load_pd(expr)
  81                 #define VLDU(expr)                      _mm_loadu_pd(expr)
  82
  83                 #define VST(dst, src)           _mm_store_pd(dst, src)
  84                 #define VSTU(dst, src)          _mm_storeu_pd(dst, src)
  85                 #define VSTNT(dst, src)         _mm_stream_pd(dst, src)
  86
  87                 #define VMUL(a, b)                      _mm_mul_pd(a, b)
  88                 #define VADD(a, b)                      _mm_add_pd(a, b)
  89                 #define VSUB(a, b)                      _mm_sub_pd(a, b)
  90         #endif
  91
  92 #elif defined(PRECISION_SP)
  93
  94         #ifdef VECTOR_AVX
  95
  96                 #include <immintrin.h>
  97                 // Vector size in double-precision floating-point numbers.
  98                 #define VSIZE   8
  99
 100                 #define VPDFT                           __m256
 101
 102                 #define VSET(scalar)            _mm256_set1_ps(scalar)
 103
 104                 #define VLD(expr)                       _mm256_load_ps(expr)
 105                 #define VLDU(expr)                      _mm256_loadu_ps(expr)
 106
 107                 #define VST(dst, src)           _mm256_store_ps(dst, src)
 108                 #define VSTU(dst, src)          _mm256_storeu_ps(dst, src)
 109                 #define VSTNT(dst, src)         _mm256_stream_ps(dst, src)
 110
 111                 #define VMUL(a, b)                      _mm256_mul_ps(a, b)
 112                 #define VADD(a, b)                      _mm256_add_ps(a, b)
 113                 #define VSUB(a, b)                      _mm256_sub_ps(a, b)
 114         #endif
 115
 116         #ifdef VECTOR_SSE
 117                 #include <emmintrin.h>
 118                 // Vector size in double-precision floating-point numbers.
 119                 #define VSIZE   4
 120
 121                 #define VPDFT                           __m128
 122
 123                 #define VSET(scalar)            _mm_set1_ps(scalar)
 124
 125                 #define VLD(expr)                       _mm_load_ps(expr)
 126                 #define VLDU(expr)                      _mm_loadu_ps(expr)
 127
 128                 #define VST(dst, src)           _mm_store_ps(dst, src)
 129                 #define VSTU(dst, src)          _mm_storeu_ps(dst, src)
 130                 #define VSTNT(dst, src)         _mm_stream_ps(dst, src)
 131
 132                 #define VMUL(a, b)                      _mm_mul_ps(a, b)
 133                 #define VADD(a, b)                      _mm_add_ps(a, b)
 134                 #define VSUB(a, b)                      _mm_sub_ps(a, b)
 135         #endif
 136
 137 #endif  // PRECISION
 138
 139 #endif // __VECTOR_H__