X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FVector.h;fp=src%2FVector.h;h=490fbd1f9c46a5b94c2973ca2e5cca6626959918;hp=cc9b7d7a3533a6fee1989b23f581e478a576d173;hb=9e0051cb083e4d8575cbd9f4a41d11552358e151;hpb=8cafd9ea08a6b1103eab29811227a7ae536dffa6 diff --git a/src/Vector.h b/src/Vector.h index cc9b7d7..490fbd1 100644 --- a/src/Vector.h +++ b/src/Vector.h @@ -135,7 +135,40 @@ #elif defined(PRECISION_SP) #ifdef VECTOR_AVX512 - #error Single precision intrinsic kernels for AVX512 are currently not implemented. + + #include + // Vector size in double-precision floatin-point numbers. + #define VSIZE 16 + + #define VPDFT __m512 + + #define VSET(scalar) _mm512_set1_ps(scalar) + // #define VSETI32(scalar) _mm256_set1_epi32(scalar) + + #define VLD(expr) _mm512_load_ps(expr) + #define VLDU(expr) _mm512_loadu_ps(expr) + #define VLIU(expr) _mm256_loadu_si256((__m256i const *)expr) + // #define VLI64(expr) _mm512_load_epi64(expr) + + #define VST(dst, src) _mm512_store_ps(dst, src) + #define VSTU(dst, src) _mm512_storeu_ps(dst, src) + #define VSTNT(dst, src) _mm512_stream_ps(dst, src) + + // #define VG32(offsets, base, scale) _mm512_i32gather_pd(offsets, base, scale) + // #define VG64(offsets, base, scale) _mm512_i64gather_pd(offsets, base, scale) + + // #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint) + + // #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale) + // #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale) + + // #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint) + + #define VMUL(a, b) _mm512_mul_ps(a, b) + #define VADD(a, b) _mm512_add_ps(a, b) + // #define VADDI32(a,b) _mm256_add_epi32(a,b) + // #define VMULI32(a,b) _mm256_mul_epi32(a,b) + #define VSUB(a, b) _mm512_sub_ps(a, b) #endif #ifdef VECTOR_AVX