projects
/
LbmBenchmarkKernelsPublic.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git]
/
src
/
BenchKernelD3Q19List.c
diff --git
a/src/BenchKernelD3Q19List.c
b/src/BenchKernelD3Q19List.c
index 4adb858b04784a17bf9535c555614647673e2432..0cb98f8b227d2b99ecf5acb6c463dd0aff21851a 100644
(file)
--- a/
src/BenchKernelD3Q19List.c
+++ b/
src/BenchKernelD3Q19List.c
@@
-93,16
+93,15
@@
void FNAME(D3Q19ListKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData
KernelStatistics(kd, ld, cd, 0);
#endif
KernelStatistics(kd, ld, cd, 0);
#endif
- // TODO: outer openmp parallel
- for(int iter = 0; iter < maxIterations; ++iter) {
-
+ X_KERNEL_START(kernelData);
X_LIKWID_START("list-os");
X_LIKWID_START("list-os");
+ // TODO: outer openmp parallel
#ifdef _OPENMP
#ifdef _OPENMP
- #pragma omp parallel
for
default(none) \
+ #pragma omp parallel default(none) \
shared(nFluid, nCells, kd, kdl, adjList, src, dst, w_0, w_1, w_2, omegaEven, omegaOdd, \
shared(nFluid, nCells, kd, kdl, adjList, src, dst, w_0, w_1, w_2, omegaEven, omegaOdd, \
- w_1_x3, w_2_x3, w_1_nine_half, w_2_nine_half, cd) \
+ w_1_x3, w_2_x3, w_1_nine_half, w_2_nine_half, cd
, ld, tmp, maxIterations
) \
private(ux, uy, uz, ui, dens, dir_indep_trm, adjListIndex, \
pdf_C, \
pdf_N, pdf_E, pdf_S, pdf_W, \
private(ux, uy, uz, ui, dens, dir_indep_trm, adjListIndex, \
pdf_C, \
pdf_N, pdf_E, pdf_S, pdf_W, \
@@
-111,10
+110,16
@@
void FNAME(D3Q19ListKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData
pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
evenPart, oddPart, w_1_indep, w_2_indep)
#endif
pdf_B, pdf_BN, pdf_BE, pdf_BS, pdf_BW, \
evenPart, oddPart, w_1_indep, w_2_indep)
#endif
+{
+ for(int iter = 0; iter < maxIterations; ++iter) {
+
+
+
#ifdef INTEL_OPT_DIRECTIVES
#ifdef INTEL_OPT_DIRECTIVES
- #pragma ivdep
#endif
#endif
- for (int index = 0; index < nFluid; ++index) {
+ #pragma omp for
+ #pragma novector
+ for (int index = 0; index < nFluid; ++index) { // LOOP list-os
#define I(index, dir) P_INDEX_3((nCells), (index), (dir))
#define I(index, dir) P_INDEX_3((nCells), (index), (dir))
@@
-311,31
+316,36
@@
void FNAME(D3Q19ListKernel)(LatticeDesc * ld, KernelData * kernelData, CaseData
#undef I
} // loop over fluid nodes
#undef I
} // loop over fluid nodes
- X_LIKWID_STOP("list-os");
+ #pragma omp single
+ {
+ #ifdef VERIFICATION
+ kd->PdfsActive = dst;
+ KernelAddBodyForce(kd, ld, cd);
+ #endif
- #ifdef VERIFICATION
- kd->PdfsActive = dst;
- KernelAddBodyForce(kd, ld, cd);
- #endif
+ #ifdef VTK_OUTPUT
+ if (cd->VtkOutput && (iter % cd->VtkModulus) == 0) {
+ kd->PdfsActive = dst;
+ VtkWrite(ld, kd, cd, iter);
+ }
+ #endif
- #ifdef VTK_OUTPUT
- if (cd->VtkOutput && (iter % cd->VtkModulus) == 0) {
+ #ifdef STATISTICS
kd->PdfsActive = dst;
kd->PdfsActive = dst;
- VtkWrite(ld, kd, cd, iter);
- }
- #endif
+ KernelStatistics(kd, ld, cd, iter);
+ #endif
- #ifdef STATISTICS
- kd->PdfsActive = dst;
- KernelStatistics(kd, ld, cd, iter);
- #endif
+ // swap grids
+ tmp = src;
+ src = dst;
+ dst = tmp;
+ }
+ }
+ } // for (int iter = 0; ...
- // swap grids
- tmp = src;
- src = dst;
- dst = tmp;
+ X_LIKWID_STOP("list-os");
- } // for (int iter = 0; ...
+ X_KERNEL_END(kernelData);
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
#ifdef VTK_OUTPUT
if (cd->VtkOutput) {
This page took
0.046449 seconds
and
5
git commands to generate.