[LbmBenchmarkKernelsPublic.git] / src / Vector.h

// --------------------------------------------------------------------------
//
// Copyright
//   Markus Wittmann, 2016-2017
//   RRZE, University of Erlangen-Nuremberg, Germany
//   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
//
//   Viktor Haag, 2016
//   LSS, University of Erlangen-Nuremberg, Germany
//
//  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
//
//  LbmBenchKernels is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  LbmBenchKernels is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
//
// --------------------------------------------------------------------------
#ifndef __VECTOR_H__
#define __VECTOR_H__

#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
	#warning Defining VECTOR_AVX as no ISA extension was selected.
	#define VECTOR_AVX
#endif

#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
	#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
#endif

#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
	#error PRECISION_DP or PRECISION_SP must be defined.
#endif

#if defined(PRECISION_DP) && defined(PRECISION_SP)
	#error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
#endif

#ifdef PRECISION_DP

	#ifdef VECTOR_AVX

		#include <immintrin.h>
		// Vector size in double-precision floating-point numbers.
		#define VSIZE	4

		#define VPDFT				__m256d

		#define VSET(scalar)		_mm256_set1_pd(scalar)

		#define VLD(expr)			_mm256_load_pd(expr)
		#define VLDU(expr)			_mm256_loadu_pd(expr)

		#define VST(dst, src)		_mm256_store_pd(dst, src)
		#define VSTU(dst, src)		_mm256_storeu_pd(dst, src)
		#define VSTNT(dst, src)		_mm256_stream_pd(dst, src)

		#define VMUL(a, b)			_mm256_mul_pd(a, b)
		#define VADD(a, b)			_mm256_add_pd(a, b)
		#define VSUB(a, b)			_mm256_sub_pd(a, b)
	#endif

	#ifdef VECTOR_SSE
		#include <emmintrin.h>
		// Vector size in double-precision floating-point numbers.
		#define VSIZE 2

		#define VPDFT				__m128d

		#define VSET(scalar)		_mm_set1_pd(scalar)

		#define VLD(expr)			_mm_load_pd(expr)
		#define VLDU(expr)			_mm_loadu_pd(expr)

		#define VST(dst, src)		_mm_store_pd(dst, src)
		#define VSTU(dst, src)		_mm_storeu_pd(dst, src)
		#define VSTNT(dst, src)		_mm_stream_pd(dst, src)

		#define VMUL(a, b)			_mm_mul_pd(a, b)
		#define VADD(a, b)			_mm_add_pd(a, b)
		#define VSUB(a, b)			_mm_sub_pd(a, b)
	#endif

#elif defined(PRECISION_SP)

	#ifdef VECTOR_AVX

		#include <immintrin.h>
		// Vector size in double-precision floating-point numbers.
		#define VSIZE	8

		#define VPDFT				__m256

		#define VSET(scalar)		_mm256_set1_ps(scalar)

		#define VLD(expr)			_mm256_load_ps(expr)
		#define VLDU(expr)			_mm256_loadu_ps(expr)

		#define VST(dst, src)		_mm256_store_ps(dst, src)
		#define VSTU(dst, src)		_mm256_storeu_ps(dst, src)
		#define VSTNT(dst, src)		_mm256_stream_ps(dst, src)

		#define VMUL(a, b)			_mm256_mul_ps(a, b)
		#define VADD(a, b)			_mm256_add_ps(a, b)
		#define VSUB(a, b)			_mm256_sub_ps(a, b)
	#endif

	#ifdef VECTOR_SSE
		#include <emmintrin.h>
		// Vector size in double-precision floating-point numbers.
		#define VSIZE	4

		#define VPDFT				__m128

		#define VSET(scalar)		_mm_set1_ps(scalar)

		#define VLD(expr)			_mm_load_ps(expr)
		#define VLDU(expr)			_mm_loadu_ps(expr)

		#define VST(dst, src)		_mm_store_ps(dst, src)
		#define VSTU(dst, src)		_mm_storeu_ps(dst, src)
		#define VSTNT(dst, src)		_mm_stream_ps(dst, src)

		#define VMUL(a, b)			_mm_mul_ps(a, b)
		#define VADD(a, b)			_mm_add_ps(a, b)
		#define VSUB(a, b)			_mm_sub_ps(a, b)
	#endif

#endif  // PRECISION

#endif // __VECTOR_H__
Commit	Line	Data
	1	// --------------------------------------------------------------------------
	2	//
	3	// Copyright
	4	// Markus Wittmann, 2016-2017
	5	// RRZE, University of Erlangen-Nuremberg, Germany
	6	// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
	7	//
	8	// Viktor Haag, 2016
	9	// LSS, University of Erlangen-Nuremberg, Germany
	10	//
	11	// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
	12	//
	13	// LbmBenchKernels is free software: you can redistribute it and/or modify
	14	// it under the terms of the GNU General Public License as published by
	15	// the Free Software Foundation, either version 3 of the License, or
	16	// (at your option) any later version.
	17	//
	18	// LbmBenchKernels is distributed in the hope that it will be useful,
	19	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	20	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	21	// GNU General Public License for more details.
	22	//
	23	// You should have received a copy of the GNU General Public License
	24	// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
	25	//
	26	// --------------------------------------------------------------------------
	27	#ifndef __VECTOR_H__
	28	#define __VECTOR_H__
	29
	30	#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
	31	#warning Defining VECTOR_AVX as no ISA extension was selected.
	32	#define VECTOR_AVX
	33	#endif
	34
	35	#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
	36	#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
	37	#endif
	38
	39	#if !defined(PRECISION_DP) && !defined(PRECISION_SP)
	40	#error PRECISION_DP or PRECISION_SP must be defined.
	41	#endif
	42
	43	#if defined(PRECISION_DP) && defined(PRECISION_SP)
	44	#error Only PRECISION_DP or PRECISION_SP can be defined at the same time.
	45	#endif
	46
	47	#ifdef PRECISION_DP
	48
	49	#ifdef VECTOR_AVX
	50
	51	#include <immintrin.h>
	52	// Vector size in double-precision floating-point numbers.
	53	#define VSIZE 4
	54
	55	#define VPDFT __m256d
	56
	57	#define VSET(scalar) _mm256_set1_pd(scalar)
	58
	59	#define VLD(expr) _mm256_load_pd(expr)
	60	#define VLDU(expr) _mm256_loadu_pd(expr)
	61
	62	#define VST(dst, src) _mm256_store_pd(dst, src)
	63	#define VSTU(dst, src) _mm256_storeu_pd(dst, src)
	64	#define VSTNT(dst, src) _mm256_stream_pd(dst, src)
	65
	66	#define VMUL(a, b) _mm256_mul_pd(a, b)
	67	#define VADD(a, b) _mm256_add_pd(a, b)
	68	#define VSUB(a, b) _mm256_sub_pd(a, b)
	69	#endif
	70
	71	#ifdef VECTOR_SSE
	72	#include <emmintrin.h>
	73	// Vector size in double-precision floating-point numbers.
	74	#define VSIZE 2
	75
	76	#define VPDFT __m128d
	77
	78	#define VSET(scalar) _mm_set1_pd(scalar)
	79
	80	#define VLD(expr) _mm_load_pd(expr)
	81	#define VLDU(expr) _mm_loadu_pd(expr)
	82
	83	#define VST(dst, src) _mm_store_pd(dst, src)
	84	#define VSTU(dst, src) _mm_storeu_pd(dst, src)
	85	#define VSTNT(dst, src) _mm_stream_pd(dst, src)
	86
	87	#define VMUL(a, b) _mm_mul_pd(a, b)
	88	#define VADD(a, b) _mm_add_pd(a, b)
	89	#define VSUB(a, b) _mm_sub_pd(a, b)
	90	#endif
	91
	92	#elif defined(PRECISION_SP)
	93
	94	#ifdef VECTOR_AVX
	95
	96	#include <immintrin.h>
	97	// Vector size in double-precision floating-point numbers.
	98	#define VSIZE 8
	99
	100	#define VPDFT __m256
	101
	102	#define VSET(scalar) _mm256_set1_ps(scalar)
	103
	104	#define VLD(expr) _mm256_load_ps(expr)
	105	#define VLDU(expr) _mm256_loadu_ps(expr)
	106
	107	#define VST(dst, src) _mm256_store_ps(dst, src)
	108	#define VSTU(dst, src) _mm256_storeu_ps(dst, src)
	109	#define VSTNT(dst, src) _mm256_stream_ps(dst, src)
	110
	111	#define VMUL(a, b) _mm256_mul_ps(a, b)
	112	#define VADD(a, b) _mm256_add_ps(a, b)
	113	#define VSUB(a, b) _mm256_sub_ps(a, b)
	114	#endif
	115
	116	#ifdef VECTOR_SSE
	117	#include <emmintrin.h>
	118	// Vector size in double-precision floating-point numbers.
	119	#define VSIZE 4
	120
	121	#define VPDFT __m128
	122
	123	#define VSET(scalar) _mm_set1_ps(scalar)
	124
	125	#define VLD(expr) _mm_load_ps(expr)
	126	#define VLDU(expr) _mm_loadu_ps(expr)
	127
	128	#define VST(dst, src) _mm_store_ps(dst, src)
	129	#define VSTU(dst, src) _mm_storeu_ps(dst, src)
	130	#define VSTNT(dst, src) _mm_stream_ps(dst, src)
	131
	132	#define VMUL(a, b) _mm_mul_ps(a, b)
	133	#define VADD(a, b) _mm_add_ps(a, b)
	134	#define VSUB(a, b) _mm_sub_ps(a, b)
	135	#endif
	136
	137	#endif // PRECISION
	138
	139	#endif // __VECTOR_H__