X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FVector.h;fp=src%2FVector.h;h=af12f77868f6da4d3796cdf1f4e8654e3fcfdb3b;hp=41b9a7984f6e31c704f1089797bcb1c616e5b1a1;hb=0fde6e45e9be83893afae896cf49a799777f6d7c;hpb=712d0b8fc4a382e1cfe4edef8b0ade11b0a2ce25 diff --git a/src/Vector.h b/src/Vector.h index 41b9a79..af12f77 100644 --- a/src/Vector.h +++ b/src/Vector.h @@ -36,48 +36,104 @@ #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time. #endif -#ifdef VECTOR_AVX +#if !defined(PRECISION_DP) && !defined(PRECISION_SP) + #error PRECISION_DP or PRECISION_SP must be defined. +#endif - #include - // Vector size in double-precision floatin-point numbers. - #define VSIZE 4 +#if defined(PRECISION_DP) && defined(PRECISION_SP) + #error Only PRECISION_DP or PRECISION_SP can be defined at the same time. +#endif - #define VPDFT __m256d +#ifdef PRECISION_DP - #define VSET(scalar) _mm256_set1_pd(scalar) + #ifdef VECTOR_AVX - #define VLD(expr) _mm256_load_pd(expr) - #define VLDU(expr) _mm256_loadu_pd(expr) + #include + // Vector size in double-precision floating-point numbers. + #define VSIZE 4 - #define VST(dst, src) _mm256_store_pd(dst, src) - #define VSTU(dst, src) _mm256_storeu_pd(dst, src) - #define VSTNT(dst, src) _mm256_stream_pd(dst, src) + #define VPDFT __m256d - #define VMUL(a, b) _mm256_mul_pd(a, b) - #define VADD(a, b) _mm256_add_pd(a, b) - #define VSUB(a, b) _mm256_sub_pd(a, b) -#endif + #define VSET(scalar) _mm256_set1_pd(scalar) -#ifdef VECTOR_SSE - #include - // Vector size in double-precision floatin-point numbers. - #define VSIZE 2 + #define VLD(expr) _mm256_load_pd(expr) + #define VLDU(expr) _mm256_loadu_pd(expr) - #define VPDFT __m128d + #define VST(dst, src) _mm256_store_pd(dst, src) + #define VSTU(dst, src) _mm256_storeu_pd(dst, src) + #define VSTNT(dst, src) _mm256_stream_pd(dst, src) - #define VSET(scalar) _mm_set1_pd(scalar) + #define VMUL(a, b) _mm256_mul_pd(a, b) + #define VADD(a, b) _mm256_add_pd(a, b) + #define VSUB(a, b) _mm256_sub_pd(a, b) + #endif - #define VLD(expr) _mm_load_pd(expr) - #define VLDU(expr) _mm_loadu_pd(expr) + #ifdef VECTOR_SSE + #include + // Vector size in double-precision floating-point numbers. + #define VSIZE 2 - #define VST(dst, src) _mm_store_pd(dst, src) - #define VSTU(dst, src) _mm_storeu_pd(dst, src) - #define VSTNT(dst, src) _mm_stream_pd(dst, src) + #define VPDFT __m128d - #define VMUL(a, b) _mm_mul_pd(a, b) - #define VADD(a, b) _mm_add_pd(a, b) - #define VSUB(a, b) _mm_sub_pd(a, b) -#endif + #define VSET(scalar) _mm_set1_pd(scalar) + + #define VLD(expr) _mm_load_pd(expr) + #define VLDU(expr) _mm_loadu_pd(expr) + + #define VST(dst, src) _mm_store_pd(dst, src) + #define VSTU(dst, src) _mm_storeu_pd(dst, src) + #define VSTNT(dst, src) _mm_stream_pd(dst, src) + + #define VMUL(a, b) _mm_mul_pd(a, b) + #define VADD(a, b) _mm_add_pd(a, b) + #define VSUB(a, b) _mm_sub_pd(a, b) + #endif + +#elif defined(PRECISION_SP) + + #ifdef VECTOR_AVX + + #include + // Vector size in double-precision floating-point numbers. + #define VSIZE 8 + + #define VPDFT __m256 + + #define VSET(scalar) _mm256_set1_ps(scalar) + + #define VLD(expr) _mm256_load_ps(expr) + #define VLDU(expr) _mm256_loadu_ps(expr) + + #define VST(dst, src) _mm256_store_ps(dst, src) + #define VSTU(dst, src) _mm256_storeu_ps(dst, src) + #define VSTNT(dst, src) _mm256_stream_ps(dst, src) + + #define VMUL(a, b) _mm256_mul_ps(a, b) + #define VADD(a, b) _mm256_add_ps(a, b) + #define VSUB(a, b) _mm256_sub_ps(a, b) + #endif + + #ifdef VECTOR_SSE + #include + // Vector size in double-precision floating-point numbers. + #define VSIZE 4 + + #define VPDFT __m128 + + #define VSET(scalar) _mm_set1_ps(scalar) + + #define VLD(expr) _mm_load_ps(expr) + #define VLDU(expr) _mm_loadu_ps(expr) + + #define VST(dst, src) _mm_store_ps(dst, src) + #define VSTU(dst, src) _mm_storeu_ps(dst, src) + #define VSTNT(dst, src) _mm_stream_ps(dst, src) + + #define VMUL(a, b) _mm_mul_ps(a, b) + #define VADD(a, b) _mm_add_ps(a, b) + #define VSUB(a, b) _mm_sub_ps(a, b) + #endif +#endif // PRECISION #endif // __VECTOR_H__