[LbmBenchmarkKernelsPublic.git] / src / Vector.h

// --------------------------------------------------------------------------
//
// Copyright
//   Markus Wittmann, 2016-2017
//   RRZE, University of Erlangen-Nuremberg, Germany
//   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
//
//   Viktor Haag, 2016
//   LSS, University of Erlangen-Nuremberg, Germany
//
//  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
//
//  LbmBenchKernels is free software: you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation, either version 3 of the License, or
//  (at your option) any later version.
//
//  LbmBenchKernels is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
//
// --------------------------------------------------------------------------
#ifndef __VECTOR_H__
#define __VECTOR_H__

#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
	#warning Defining VECTOR_AVX as no ISA extension was selected.
	#define VECTOR_AVX
#endif

#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
	#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
#endif

#ifdef VECTOR_AVX

	#include <immintrin.h>
	// Vector size in double-precision floatin-point numbers.
	#define VSIZE	4

	#define VPDFT				__m256d

	#define VSET(scalar)		_mm256_set1_pd(scalar)

	#define VLD(expr)			_mm256_load_pd(expr)
	#define VLDU(expr)			_mm256_loadu_pd(expr)

	#define VST(dst, src)		_mm256_store_pd(dst, src)
	#define VSTU(dst, src)		_mm256_storeu_pd(dst, src)
	#define VSTNT(dst, src)		_mm256_stream_pd(dst, src)

	#define VMUL(a, b)			_mm256_mul_pd(a, b)
	#define VADD(a, b)			_mm256_add_pd(a, b)
	#define VSUB(a, b)			_mm256_sub_pd(a, b)
#endif

#ifdef VECTOR_SSE
	#include <emmintrin.h>
	// Vector size in double-precision floatin-point numbers.
	#define VSIZE 2

	#define VPDFT				__m128d

	#define VSET(scalar)		_mm_set1_pd(scalar)

	#define VLD(expr)			_mm_load_pd(expr)
	#define VLDU(expr)			_mm_loadu_pd(expr)

	#define VST(dst, src)		_mm_store_pd(dst, src)
	#define VSTU(dst, src)		_mm_storeu_pd(dst, src)
	#define VSTNT(dst, src)		_mm_stream_pd(dst, src)

	#define VMUL(a, b)			_mm_mul_pd(a, b)
	#define VADD(a, b)			_mm_add_pd(a, b)
	#define VSUB(a, b)			_mm_sub_pd(a, b)
#endif


#endif // __VECTOR_H__
Commit	Line	Data
10988083 MW	1	// --------------------------------------------------------------------------
	2	//
	3	// Copyright
	4	// Markus Wittmann, 2016-2017
	5	// RRZE, University of Erlangen-Nuremberg, Germany
	6	// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
	7	//
	8	// Viktor Haag, 2016
	9	// LSS, University of Erlangen-Nuremberg, Germany
	10	//
	11	// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
	12	//
	13	// LbmBenchKernels is free software: you can redistribute it and/or modify
	14	// it under the terms of the GNU General Public License as published by
	15	// the Free Software Foundation, either version 3 of the License, or
	16	// (at your option) any later version.
	17	//
	18	// LbmBenchKernels is distributed in the hope that it will be useful,
	19	// but WITHOUT ANY WARRANTY; without even the implied warranty of
	20	// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	21	// GNU General Public License for more details.
	22	//
	23	// You should have received a copy of the GNU General Public License
	24	// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
	25	//
	26	// --------------------------------------------------------------------------
	27	#ifndef __VECTOR_H__
	28	#define __VECTOR_H__
	29
	30	#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE)
	31	#warning Defining VECTOR_AVX as no ISA extension was selected.
	32	#define VECTOR_AVX
	33	#endif
	34
	35	#if defined(VECTOR_AVX) && defined(VECTOR_SSE)
	36	#error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time.
	37	#endif
	38
	39	#ifdef VECTOR_AVX
	40
	41	#include <immintrin.h>
	42	// Vector size in double-precision floatin-point numbers.
	43	#define VSIZE 4
	44
	45	#define VPDFT __m256d
	46
	47	#define VSET(scalar) _mm256_set1_pd(scalar)
	48
	49	#define VLD(expr) _mm256_load_pd(expr)
	50	#define VLDU(expr) _mm256_loadu_pd(expr)
	51
	52	#define VST(dst, src) _mm256_store_pd(dst, src)
	53	#define VSTU(dst, src) _mm256_storeu_pd(dst, src)
	54	#define VSTNT(dst, src) _mm256_stream_pd(dst, src)
	55
	56	#define VMUL(a, b) _mm256_mul_pd(a, b)
	57	#define VADD(a, b) _mm256_add_pd(a, b)
	58	#define VSUB(a, b) _mm256_sub_pd(a, b)
	59	#endif
	60
	61	#ifdef VECTOR_SSE
	62	#include <emmintrin.h>
	63	// Vector size in double-precision floatin-point numbers.
	64	#define VSIZE 2
65
66	#define VPDFT __m128d
67
68	#define VSET(scalar) _mm_set1_pd(scalar)
69
70	#define VLD(expr) _mm_load_pd(expr)
71	#define VLDU(expr) _mm_loadu_pd(expr)
72
73	#define VST(dst, src) _mm_store_pd(dst, src)
74	#define VSTU(dst, src) _mm_storeu_pd(dst, src)
75	#define VSTNT(dst, src) _mm_stream_pd(dst, src)
76
77	#define VMUL(a, b) _mm_mul_pd(a, b)
78	#define VADD(a, b) _mm_add_pd(a, b)
79	#define VSUB(a, b) _mm_sub_pd(a, b)
80	#endif
81
82
83	#endif // __VECTOR_H__