X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FVector.h;fp=src%2FVector.h;h=cc9b7d7a3533a6fee1989b23f581e478a576d173;hp=af12f77868f6da4d3796cdf1f4e8654e3fcfdb3b;hb=8cafd9ea08a6b1103eab29811227a7ae536dffa6;hpb=0fde6e45e9be83893afae896cf49a799777f6d7c diff --git a/src/Vector.h b/src/Vector.h index af12f77..cc9b7d7 100644 --- a/src/Vector.h +++ b/src/Vector.h @@ -1,13 +1,17 @@ // -------------------------------------------------------------------------- // // Copyright -// Markus Wittmann, 2016-2017 +// Markus Wittmann, 2016-2018 // RRZE, University of Erlangen-Nuremberg, Germany // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de // // Viktor Haag, 2016 // LSS, University of Erlangen-Nuremberg, Germany // +// Michael Hussnaetter, 2017-2018 +// University of Erlangen-Nuremberg, Germany +// michael.hussnaetter -at- fau.de +// // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). // // LbmBenchKernels is free software: you can redistribute it and/or modify @@ -27,13 +31,15 @@ #ifndef __VECTOR_H__ #define __VECTOR_H__ -#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE) +#if !defined(VECTOR_AVX512) && !defined(VECTOR_AVX) && !defined(VECTOR_SSE) #warning Defining VECTOR_AVX as no ISA extension was selected. #define VECTOR_AVX #endif -#if defined(VECTOR_AVX) && defined(VECTOR_SSE) - #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time. +#if (defined(VECTOR_AVX512) && defined(VECTOR_AVX)) || \ + (defined(VECTOR_AVX512) && defined(VECTOR_SEE)) || \ + (defined(VECTOR_AVX) && defined(VECTOR_SSE)) + #error Only VECTOR_AVX512 or VECTOR_AVX or VECTOR_SSE can be defined at the same time. #endif #if !defined(PRECISION_DP) && !defined(PRECISION_SP) @@ -46,6 +52,43 @@ #ifdef PRECISION_DP + #ifdef VECTOR_AVX512 + + #include + // Vector size in double-precision floatin-point numbers. + #define VSIZE 8 + + #define VPDFT __m512d + + #define VSET(scalar) _mm512_set1_pd(scalar) + #define VSETI32(scalar) _mm256_set1_epi32(scalar) + + #define VLD(expr) _mm512_load_pd(expr) + #define VLDU(expr) _mm512_loadu_pd(expr) + #define VLIU(expr) _mm256_loadu_si256((__m256i const *)expr) + #define VLI64(expr) _mm512_load_epi64(expr) + + #define VST(dst, src) _mm512_store_pd(dst, src) + #define VSTU(dst, src) _mm512_storeu_pd(dst, src) + #define VSTNT(dst, src) _mm512_stream_pd(dst, src) + + #define VG32(offsets, base, scale) _mm512_i32gather_pd(offsets, base, scale) + #define VG64(offsets, base, scale) _mm512_i64gather_pd(offsets, base, scale) + + #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint) + + #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale) + #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale) + + #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint) + + #define VMUL(a, b) _mm512_mul_pd(a, b) + #define VADD(a, b) _mm512_add_pd(a, b) + #define VADDI32(a,b) _mm256_add_epi32(a,b) + #define VMULI32(a,b) _mm256_mul_epi32(a,b) + #define VSUB(a, b) _mm512_sub_pd(a, b) + #endif + #ifdef VECTOR_AVX #include @@ -91,6 +134,10 @@ #elif defined(PRECISION_SP) + #ifdef VECTOR_AVX512 + #error Single precision intrinsic kernels for AVX512 are currently not implemented. + #endif + #ifdef VECTOR_AVX #include