X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FVector.h;fp=src%2FVector.h;h=af12f77868f6da4d3796cdf1f4e8654e3fcfdb3b;hp=41b9a7984f6e31c704f1089797bcb1c616e5b1a1;hb=0fde6e45e9be83893afae896cf49a799777f6d7c;hpb=712d0b8fc4a382e1cfe4edef8b0ade11b0a2ce25

diff --git a/src/Vector.h b/src/Vector.h
index 41b9a79..af12f77 100644
--- a/src/Vector.h
+++ b/src/Vector.h
@@ -36,48 +36,104 @@
 	#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
 #endif
 
-#ifdef VECTOR_AVX
+#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
+	#error PRECISION_DP or PRECISION_SP must be defined.
+#endif
 
-	#include <immintrin.h>
-	// Vector size in double-precision floatin-point numbers.
-	#define VSIZE	4
+#if defined(PRECISION_DP) && defined(PRECISION_SP)
+	#error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
+#endif
 
-	#define VPDFT				__m256d
+#ifdef PRECISION_DP
 
-	#define VSET(scalar)		_mm256_set1_pd(scalar)
+	#ifdef VECTOR_AVX
 
-	#define VLD(expr)			_mm256_load_pd(expr)
-	#define VLDU(expr)			_mm256_loadu_pd(expr)
+		#include <immintrin.h>
+		// Vector size in double-precision floating-point numbers.
+		#define VSIZE	4
 
-	#define VST(dst, src)		_mm256_store_pd(dst, src)
-	#define VSTU(dst, src)		_mm256_storeu_pd(dst, src)
-	#define VSTNT(dst, src)		_mm256_stream_pd(dst, src)
+		#define VPDFT				__m256d
 
-	#define VMUL(a, b)			_mm256_mul_pd(a, b)
-	#define VADD(a, b)			_mm256_add_pd(a, b)
-	#define VSUB(a, b)			_mm256_sub_pd(a, b)
-#endif
+		#define VSET(scalar)		_mm256_set1_pd(scalar)
 
-#ifdef VECTOR_SSE
-	#include <emmintrin.h>
-	// Vector size in double-precision floatin-point numbers.
-	#define VSIZE 2
+		#define VLD(expr)			_mm256_load_pd(expr)
+		#define VLDU(expr)			_mm256_loadu_pd(expr)
 
-	#define VPDFT				__m128d
+		#define VST(dst, src)		_mm256_store_pd(dst, src)
+		#define VSTU(dst, src)		_mm256_storeu_pd(dst, src)
+		#define VSTNT(dst, src)		_mm256_stream_pd(dst, src)
 
-	#define VSET(scalar)		_mm_set1_pd(scalar)
+		#define VMUL(a, b)			_mm256_mul_pd(a, b)
+		#define VADD(a, b)			_mm256_add_pd(a, b)
+		#define VSUB(a, b)			_mm256_sub_pd(a, b)
+	#endif
 
-	#define VLD(expr)			_mm_load_pd(expr)
-	#define VLDU(expr)			_mm_loadu_pd(expr)
+	#ifdef VECTOR_SSE
+		#include <emmintrin.h>
+		// Vector size in double-precision floating-point numbers.
+		#define VSIZE 2
 
-	#define VST(dst, src)		_mm_store_pd(dst, src)
-	#define VSTU(dst, src)		_mm_storeu_pd(dst, src)
-	#define VSTNT(dst, src)		_mm_stream_pd(dst, src)
+		#define VPDFT				__m128d
 
-	#define VMUL(a, b)			_mm_mul_pd(a, b)
-	#define VADD(a, b)			_mm_add_pd(a, b)
-	#define VSUB(a, b)			_mm_sub_pd(a, b)
-#endif
+		#define VSET(scalar)		_mm_set1_pd(scalar)
+
+		#define VLD(expr)			_mm_load_pd(expr)
+		#define VLDU(expr)			_mm_loadu_pd(expr)
+
+		#define VST(dst, src)		_mm_store_pd(dst, src)
+		#define VSTU(dst, src)		_mm_storeu_pd(dst, src)
+		#define VSTNT(dst, src)		_mm_stream_pd(dst, src)
+
+		#define VMUL(a, b)			_mm_mul_pd(a, b)
+		#define VADD(a, b)			_mm_add_pd(a, b)
+		#define VSUB(a, b)			_mm_sub_pd(a, b)
+	#endif
+
+#elif defined(PRECISION_SP)
+
+	#ifdef VECTOR_AVX
+
+		#include <immintrin.h>
+		// Vector size in double-precision floating-point numbers.
+		#define VSIZE	8
+
+		#define VPDFT				__m256
+
+		#define VSET(scalar)		_mm256_set1_ps(scalar)
+
+		#define VLD(expr)			_mm256_load_ps(expr)
+		#define VLDU(expr)			_mm256_loadu_ps(expr)
+
+		#define VST(dst, src)		_mm256_store_ps(dst, src)
+		#define VSTU(dst, src)		_mm256_storeu_ps(dst, src)
+		#define VSTNT(dst, src)		_mm256_stream_ps(dst, src)
+
+		#define VMUL(a, b)			_mm256_mul_ps(a, b)
+		#define VADD(a, b)			_mm256_add_ps(a, b)
+		#define VSUB(a, b)			_mm256_sub_ps(a, b)
+	#endif
+
+	#ifdef VECTOR_SSE
+		#include <emmintrin.h>
+		// Vector size in double-precision floating-point numbers.
+		#define VSIZE	4
+
+		#define VPDFT				__m128
+
+		#define VSET(scalar)		_mm_set1_ps(scalar)
+
+		#define VLD(expr)			_mm_load_ps(expr)
+		#define VLDU(expr)			_mm_loadu_ps(expr)
+
+		#define VST(dst, src)		_mm_store_ps(dst, src)
+		#define VSTU(dst, src)		_mm_storeu_ps(dst, src)
+		#define VSTNT(dst, src)		_mm_stream_ps(dst, src)
+
+		#define VMUL(a, b)			_mm_mul_ps(a, b)
+		#define VADD(a, b)			_mm_add_ps(a, b)
+		#define VSUB(a, b)			_mm_sub_ps(a, b)
+	#endif
 
+#endif  // PRECISION
 
 #endif // __VECTOR_H__