merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git] / src / Vector.h
index af12f77868f6da4d3796cdf1f4e8654e3fcfdb3b..cc9b7d7a3533a6fee1989b23f581e478a576d173 100644 (file)
@@ -1,13 +1,17 @@
 // --------------------------------------------------------------------------
 //
 // Copyright
-//   Markus Wittmann, 2016-2017
+//   Markus Wittmann, 2016-2018
 //   RRZE, University of Erlangen-Nuremberg, Germany
 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
 //
 //   Viktor Haag, 2016
 //   LSS, University of Erlangen-Nuremberg, Germany
 //
+//   Michael Hussnaetter, 2017-2018
+//   University of Erlangen-Nuremberg, Germany
+//   michael.hussnaetter -at- fau.de
+//
 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
 //
 //  LbmBenchKernels is free software: you can redistribute it and/or modify
 #ifndef __VECTOR_H__
 #define __VECTOR_H__
 
-#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
+#if !defined(VECTOR_AVX512) && !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
        #warning Defining VECTOR_AVX as no ISA extension was selected.
        #define VECTOR_AVX
 #endif
 
-#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
-       #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
+#if (defined(VECTOR_AVX512) && defined(VECTOR_AVX)) || \
+       (defined(VECTOR_AVX512) && defined(VECTOR_SEE)) || \
+       (defined(VECTOR_AVX) && defined(VECTOR_SSE))
+       #error Only VECTOR_AVX512 or VECTOR_AVX or VECTOR_SSE can be defined at the same time.
 #endif
 
 #if !defined(PRECISION_DP) && !defined(PRECISION_SP)
 
 #ifdef PRECISION_DP
 
+       #ifdef VECTOR_AVX512
+
+               #include <immintrin.h>
+               // Vector size in double-precision floatin-point numbers.
+               #define VSIZE   8
+
+               #define VPDFT                           __m512d
+
+               #define VSET(scalar)            _mm512_set1_pd(scalar)
+               #define VSETI32(scalar)     _mm256_set1_epi32(scalar)
+
+               #define VLD(expr)                       _mm512_load_pd(expr)
+               #define VLDU(expr)                      _mm512_loadu_pd(expr)
+               #define VLIU(expr)                      _mm256_loadu_si256((__m256i const *)expr)
+               #define VLI64(expr)                     _mm512_load_epi64(expr)
+
+               #define VST(dst, src)           _mm512_store_pd(dst, src)
+               #define VSTU(dst, src)          _mm512_storeu_pd(dst, src)
+               #define VSTNT(dst, src)         _mm512_stream_pd(dst, src)
+
+               #define VG32(offsets, base, scale)      _mm512_i32gather_pd(offsets, base, scale)
+               #define VG64(offsets, base, scale)      _mm512_i64gather_pd(offsets, base, scale)
+
+               #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint)
+
+               #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale)
+               #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale)
+
+               #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint)
+
+               #define VMUL(a, b)                      _mm512_mul_pd(a, b)
+               #define VADD(a, b)                      _mm512_add_pd(a, b)
+               #define VADDI32(a,b)            _mm256_add_epi32(a,b)
+               #define VMULI32(a,b)            _mm256_mul_epi32(a,b)
+               #define VSUB(a, b)                      _mm512_sub_pd(a, b)
+       #endif
+
        #ifdef VECTOR_AVX
 
                #include <immintrin.h>
 
 #elif defined(PRECISION_SP)
 
+       #ifdef VECTOR_AVX512
+               #error Single precision intrinsic kernels for AVX512 are currently not implemented.
+       #endif
+
        #ifdef VECTOR_AVX
 
                #include <immintrin.h>
This page took 0.076018 seconds and 5 git commands to generate.