+ #ifdef VECTOR_AVX512
+
+ #include <immintrin.h>
+ // Vector size in double-precision floatin-point numbers.
+ #define VSIZE 8
+
+ #define VPDFT __m512d
+
+ #define VSET(scalar) _mm512_set1_pd(scalar)
+ #define VSETI32(scalar) _mm256_set1_epi32(scalar)
+
+ #define VLD(expr) _mm512_load_pd(expr)
+ #define VLDU(expr) _mm512_loadu_pd(expr)
+ #define VLIU(expr) _mm256_loadu_si256((__m256i const *)expr)
+ #define VLI64(expr) _mm512_load_epi64(expr)
+
+ #define VST(dst, src) _mm512_store_pd(dst, src)
+ #define VSTU(dst, src) _mm512_storeu_pd(dst, src)
+ #define VSTNT(dst, src) _mm512_stream_pd(dst, src)
+
+ #define VG32(offsets, base, scale) _mm512_i32gather_pd(offsets, base, scale)
+ #define VG64(offsets, base, scale) _mm512_i64gather_pd(offsets, base, scale)
+
+ #define VPG32(offsets, base, scale, hint) _mm512_prefetch_i32gather_pd(offsets, base, scale, hint)
+
+ #define VS32(dst_base, dst_offsets, src, scale) _mm512_i32scatter_pd(dst_base, dst_offsets, src, scale)
+ #define VS64(dst_base, dst_offsets, src, scale) _mm512_i64scatter_pd(dst_base, dst_offsets, src, scale)
+
+ #define VPS32(dst_base, dst_offsets, scale, hint) _mm512_prefetch_i32scatter_pd(dst_base, dst_offsets, scale, hint)
+
+ #define VMUL(a, b) _mm512_mul_pd(a, b)
+ #define VADD(a, b) _mm512_add_pd(a, b)
+ #define VADDI32(a,b) _mm256_add_epi32(a,b)
+ #define VMULI32(a,b) _mm256_mul_epi32(a,b)
+ #define VSUB(a, b) _mm512_sub_pd(a, b)
+ #endif
+