#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
#endif
-#ifdef VECTOR_AVX
+#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
+ #error PRECISION_DP or PRECISION_SP must be defined.
+#endif
- #include <immintrin.h>
- // Vector size in double-precision floatin-point numbers.
- #define VSIZE 4
+#if defined(PRECISION_DP) && defined(PRECISION_SP)
+ #error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
+#endif
- #define VPDFT __m256d
+#ifdef PRECISION_DP
- #define VSET(scalar) _mm256_set1_pd(scalar)
+ #ifdef VECTOR_AVX
- #define VLD(expr) _mm256_load_pd(expr)
- #define VLDU(expr) _mm256_loadu_pd(expr)
+ #include <immintrin.h>
+ // Vector size in double-precision floating-point numbers.
+ #define VSIZE 4
- #define VST(dst, src) _mm256_store_pd(dst, src)
- #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
- #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
+ #define VPDFT __m256d
- #define VMUL(a, b) _mm256_mul_pd(a, b)
- #define VADD(a, b) _mm256_add_pd(a, b)
- #define VSUB(a, b) _mm256_sub_pd(a, b)
-#endif
+ #define VSET(scalar) _mm256_set1_pd(scalar)
-#ifdef VECTOR_SSE
- #include <emmintrin.h>
- // Vector size in double-precision floatin-point numbers.
- #define VSIZE 2
+ #define VLD(expr) _mm256_load_pd(expr)
+ #define VLDU(expr) _mm256_loadu_pd(expr)
- #define VPDFT __m128d
+ #define VST(dst, src) _mm256_store_pd(dst, src)
+ #define VSTU(dst, src) _mm256_storeu_pd(dst, src)
+ #define VSTNT(dst, src) _mm256_stream_pd(dst, src)
- #define VSET(scalar) _mm_set1_pd(scalar)
+ #define VMUL(a, b) _mm256_mul_pd(a, b)
+ #define VADD(a, b) _mm256_add_pd(a, b)
+ #define VSUB(a, b) _mm256_sub_pd(a, b)
+ #endif
- #define VLD(expr) _mm_load_pd(expr)
- #define VLDU(expr) _mm_loadu_pd(expr)
+ #ifdef VECTOR_SSE
+ #include <emmintrin.h>
+ // Vector size in double-precision floating-point numbers.
+ #define VSIZE 2
- #define VST(dst, src) _mm_store_pd(dst, src)
- #define VSTU(dst, src) _mm_storeu_pd(dst, src)
- #define VSTNT(dst, src) _mm_stream_pd(dst, src)
+ #define VPDFT __m128d
- #define VMUL(a, b) _mm_mul_pd(a, b)
- #define VADD(a, b) _mm_add_pd(a, b)
- #define VSUB(a, b) _mm_sub_pd(a, b)
-#endif
+ #define VSET(scalar) _mm_set1_pd(scalar)
+
+ #define VLD(expr) _mm_load_pd(expr)
+ #define VLDU(expr) _mm_loadu_pd(expr)
+
+ #define VST(dst, src) _mm_store_pd(dst, src)
+ #define VSTU(dst, src) _mm_storeu_pd(dst, src)
+ #define VSTNT(dst, src) _mm_stream_pd(dst, src)
+
+ #define VMUL(a, b) _mm_mul_pd(a, b)
+ #define VADD(a, b) _mm_add_pd(a, b)
+ #define VSUB(a, b) _mm_sub_pd(a, b)
+ #endif
+
+#elif defined(PRECISION_SP)
+
+ #ifdef VECTOR_AVX
+
+ #include <immintrin.h>
+ // Vector size in double-precision floating-point numbers.
+ #define VSIZE 8
+
+ #define VPDFT __m256
+
+ #define VSET(scalar) _mm256_set1_ps(scalar)
+
+ #define VLD(expr) _mm256_load_ps(expr)
+ #define VLDU(expr) _mm256_loadu_ps(expr)
+
+ #define VST(dst, src) _mm256_store_ps(dst, src)
+ #define VSTU(dst, src) _mm256_storeu_ps(dst, src)
+ #define VSTNT(dst, src) _mm256_stream_ps(dst, src)
+
+ #define VMUL(a, b) _mm256_mul_ps(a, b)
+ #define VADD(a, b) _mm256_add_ps(a, b)
+ #define VSUB(a, b) _mm256_sub_ps(a, b)
+ #endif
+
+ #ifdef VECTOR_SSE
+ #include <emmintrin.h>
+ // Vector size in double-precision floating-point numbers.
+ #define VSIZE 4
+
+ #define VPDFT __m128
+
+ #define VSET(scalar) _mm_set1_ps(scalar)
+
+ #define VLD(expr) _mm_load_ps(expr)
+ #define VLDU(expr) _mm_loadu_ps(expr)
+
+ #define VST(dst, src) _mm_store_ps(dst, src)
+ #define VSTU(dst, src) _mm_storeu_ps(dst, src)
+ #define VSTNT(dst, src) _mm_stream_ps(dst, src)
+
+ #define VMUL(a, b) _mm_mul_ps(a, b)
+ #define VADD(a, b) _mm_add_ps(a, b)
+ #define VSUB(a, b) _mm_sub_ps(a, b)
+ #endif
+#endif // PRECISION
#endif // __VECTOR_H__