projects
/
LbmBenchmarkKernelsPublic.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git]
/
src
/
BenchKernelD3Q19.c
diff --git
a/src/BenchKernelD3Q19.c
b/src/BenchKernelD3Q19.c
index 48529483ca31c1d3ffc52249ba2c7b76d125e6fa..62989c89e747ff32c74fee3906a507c32ce4efa1 100644
(file)
--- a/
src/BenchKernelD3Q19.c
+++ b/
src/BenchKernelD3Q19.c
@@
-101,6
+101,8
@@
void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd
}
#endif
}
#endif
+ X_KERNEL_START(kernelData);
+
for (int iter = 0; iter < maxIterations; ++iter) {
X_LIKWID_START("os");
for (int iter = 0; iter < maxIterations; ++iter) {
X_LIKWID_START("os");
@@
-125,7
+127,7
@@
void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd
#pragma vector always
#pragma simd
#endif
#pragma vector always
#pragma simd
#endif
- for (int z = oZ; z < nZ + oZ; ++z) {
+ for (int z = oZ; z < nZ + oZ; ++z) {
// LOOP os
#define I(x, y, z, dir) P_INDEX_5(gDims, (x), (y), (z), (dir))
#ifdef PROP_MODEL_PUSH
#define I(x, y, z, dir) P_INDEX_5(gDims, (x), (y), (z), (dir))
#ifdef PROP_MODEL_PUSH
@@
-320,6
+322,7
@@
void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd
// Stop counters before bounce back. Else computing loop balance will be incorrect.
X_LIKWID_STOP("os");
// Stop counters before bounce back. Else computing loop balance will be incorrect.
X_LIKWID_STOP("os");
+
// Fixup bounce back PDFs.
#ifdef _OPENMP
#pragma omp parallel for default(none) \
// Fixup bounce back PDFs.
#ifdef _OPENMP
#pragma omp parallel for default(none) \
@@
-355,6
+358,8
@@
void FNAME(D3Q19Kernel)(LatticeDesc * ld, KernelData * kernelData, CaseData * cd
} // for (int iter = 0; ...
} // for (int iter = 0; ...
+ X_KERNEL_END(kernelData);
+
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
@@
-445,6
+450,8
@@
void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData *
nThreads = omp_get_max_threads();
#endif
nThreads = omp_get_max_threads();
#endif
+ X_KERNEL_START(kernelData);
+
for (int iter = 0; iter < maxIterations; ++iter) {
#ifdef _OPENMP
for (int iter = 0; iter < maxIterations; ++iter) {
#ifdef _OPENMP
@@
-729,6
+736,8
@@
void FNAME(D3Q19BlkKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData *
} // for (int iter = 0; ...
} // for (int iter = 0; ...
+ X_KERNEL_END(kernelData);
+
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
This page took
0.067073 seconds
and
5
git commands to generate.