X-Git-Url: http://git.rrze.uni-erlangen.de/gitweb/?p=LbmBenchmarkKernelsPublic.git;a=blobdiff_plain;f=src%2FVector.h;fp=src%2FVector.h;h=41b9a7984f6e31c704f1089797bcb1c616e5b1a1;hp=0000000000000000000000000000000000000000;hb=109880839321408644c94a34eb31208460b9f46d;hpb=42cf91486fb5c1ad178b3d21935a1be563e5fa39 diff --git a/src/Vector.h b/src/Vector.h new file mode 100644 index 0000000..41b9a79 --- /dev/null +++ b/src/Vector.h @@ -0,0 +1,83 @@ +// -------------------------------------------------------------------------- +// +// Copyright +// Markus Wittmann, 2016-2017 +// RRZE, University of Erlangen-Nuremberg, Germany +// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de +// +// Viktor Haag, 2016 +// LSS, University of Erlangen-Nuremberg, Germany +// +// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). +// +// LbmBenchKernels is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// LbmBenchKernels is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with LbmBenchKernels. If not, see . +// +// -------------------------------------------------------------------------- +#ifndef __VECTOR_H__ +#define __VECTOR_H__ + +#if !defined(VECTOR_AVX) && !defined(VECTOR_SSE) + #warning Defining VECTOR_AVX as no ISA extension was selected. + #define VECTOR_AVX +#endif + +#if defined(VECTOR_AVX) && defined(VECTOR_SSE) + #error Only VECTOR_AVX or VECTOR_SSE can be defined at the same time. +#endif + +#ifdef VECTOR_AVX + + #include + // Vector size in double-precision floatin-point numbers. + #define VSIZE 4 + + #define VPDFT __m256d + + #define VSET(scalar) _mm256_set1_pd(scalar) + + #define VLD(expr) _mm256_load_pd(expr) + #define VLDU(expr) _mm256_loadu_pd(expr) + + #define VST(dst, src) _mm256_store_pd(dst, src) + #define VSTU(dst, src) _mm256_storeu_pd(dst, src) + #define VSTNT(dst, src) _mm256_stream_pd(dst, src) + + #define VMUL(a, b) _mm256_mul_pd(a, b) + #define VADD(a, b) _mm256_add_pd(a, b) + #define VSUB(a, b) _mm256_sub_pd(a, b) +#endif + +#ifdef VECTOR_SSE + #include + // Vector size in double-precision floatin-point numbers. + #define VSIZE 2 + + #define VPDFT __m128d + + #define VSET(scalar) _mm_set1_pd(scalar) + + #define VLD(expr) _mm_load_pd(expr) + #define VLDU(expr) _mm_loadu_pd(expr) + + #define VST(dst, src) _mm_store_pd(dst, src) + #define VSTU(dst, src) _mm_storeu_pd(dst, src) + #define VSTNT(dst, src) _mm_stream_pd(dst, src) + + #define VMUL(a, b) _mm_mul_pd(a, b) + #define VADD(a, b) _mm_add_pd(a, b) + #define VSUB(a, b) _mm_sub_pd(a, b) +#endif + + +#endif // __VECTOR_H__