Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
8cafd9ea MW |
11 | // Michael Hussnaetter, 2017-2018 |
12 | // University of Erlangen-Nuremberg, Germany | |
13 | // michael.hussnaetter -at- fau.de | |
14 | // | |
10988083 MW |
15 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). |
16 | // | |
17 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
18 | // it under the terms of the GNU General Public License as published by | |
19 | // the Free Software Foundation, either version 3 of the License, or | |
20 | // (at your option) any later version. | |
21 | // | |
22 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
23 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
24 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
25 | // GNU General Public License for more details. | |
26 | // | |
27 | // You should have received a copy of the GNU General Public License | |
28 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
29 | // | |
30 | // -------------------------------------------------------------------------- | |
31 | #include <stdio.h> | |
32 | #include <stdlib.h> | |
33 | #include <string.h> | |
34 | #include <strings.h> // strcasecmp | |
35 | ||
36 | #include <inttypes.h> | |
37 | ||
38 | #ifdef _OPENMP | |
39 | #include <omp.h> | |
40 | #endif | |
41 | ||
42 | #include "Base.h" | |
43 | #include "Kernel.h" | |
44 | #include "Memory.h" | |
45 | ||
46 | #include "Lattice.h" | |
47 | #include "Geometry.h" | |
48 | #include "Pinning.h" | |
49 | #include "LikwidIf.h" | |
50 | ||
51 | #include "KernelFunctions.h" | |
52 | ||
53 | #ifdef __x86_64__ | |
54 | #include <xmmintrin.h> | |
55 | ||
56 | ||
57 | #define MXCSR_DAZ 6 | |
58 | #define MXCSR_FTZ 15 | |
59 | ||
60 | ||
61 | int FpIsMxCsrMaskSet(unsigned int mask) | |
62 | { | |
63 | unsigned int mxcsr; | |
64 | unsigned int mxcsrNew; | |
65 | ||
66 | mxcsr = _mm_getcsr(); | |
67 | ||
68 | mxcsrNew = mxcsr & mask; | |
69 | ||
70 | return (mxcsrNew == mask); | |
71 | } | |
72 | ||
73 | int FpGetFtz() | |
74 | { | |
75 | return FpIsMxCsrMaskSet(1 << MXCSR_FTZ); | |
76 | } | |
77 | ||
78 | int FpGetDaz() | |
79 | { | |
80 | return FpIsMxCsrMaskSet(1 << MXCSR_DAZ); | |
81 | } | |
82 | #endif | |
83 | ||
84 | ||
85 | int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ) | |
86 | { | |
87 | char * tmp; | |
88 | ||
89 | *nX = atoi(parameter); | |
90 | ||
91 | if (*nX <= 0) { | |
92 | printf("ERROR: parameter for X dimension must be > 0.\n"); | |
93 | return 0; | |
94 | } | |
95 | ||
96 | tmp = strchr(parameter, 'x'); | |
97 | ||
98 | if (tmp == NULL) { | |
99 | printf("ERROR: parameter for Y dimension is missing.\n"); | |
100 | return 0; | |
101 | } | |
102 | ||
103 | *nY = atoi(tmp + 1); | |
104 | ||
105 | if (*nY <= 0) { | |
106 | printf("ERROR: parameter for Y dimension must be > 0.\n"); | |
107 | return 0; | |
108 | } | |
109 | ||
110 | tmp = strchr(tmp + 1, 'x'); | |
111 | ||
112 | if (tmp == NULL) { | |
113 | printf("ERROR: parameter for Z dimension is missing.\n"); | |
114 | return 0; | |
115 | } | |
116 | ||
117 | *nZ = atoi(tmp + 1); | |
118 | ||
119 | if (*nZ <= 0) { | |
120 | printf("ERROR: parameter for Z dimension must be > 0.\n"); | |
121 | return 0; | |
122 | } | |
123 | ||
124 | return 1; | |
125 | } | |
126 | ||
127 | int main(int argc, char * argv[]) | |
128 | { | |
129 | int dims[3] = { 20, 20, 20 }; // Dimensions in x, y, and z direction | |
130 | const char * geometryType = "channel"; | |
131 | // int latticeDumpAscii = 0; | |
132 | int verify = 0; UNUSED(verify); | |
133 | char * kernelToUse = NULL; | |
134 | int nThreads = 1; | |
135 | const char * pinString = NULL; | |
136 | int periodic[3] = { 0 }; | |
137 | ||
138 | CaseData cd; | |
139 | ||
0fde6e45 MW |
140 | cd.MaxIterations = 10; |
141 | cd.RhoIn = F(1.0); | |
142 | cd.RhoOut = F(1.0); | |
143 | cd.Omega = F(1.0); | |
10988083 MW |
144 | cd.VtkOutput = 0; |
145 | cd.VtkModulus = 100; | |
146 | cd.StatisticsModulus = 100; | |
0fde6e45 | 147 | cd.XForce = F(0.00001); |
10988083 MW |
148 | kernelToUse = "push-soa"; |
149 | ||
150 | Parameters p; | |
151 | p.nArgs = argc; | |
152 | p.Args = argv; | |
153 | p.nKernelArgs = 0; | |
154 | p.KernelArgs = NULL; | |
155 | ||
156 | #define LBM_BENCH_KERNELS_VERSION_MAJOR 0 | |
157 | #define LBM_BENCH_KERNELS_VERSION_MINOR 1 | |
158 | ||
8cafd9ea | 159 | printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017, 2018 LSS, RRZE\n"); |
10988083 MW |
160 | printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n"); |
161 | printf("This is free software, and you are welcome to redistribute it under certain conditions.\n"); | |
162 | printf("\n"); | |
0fde6e45 | 163 | printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n", |
10988083 MW |
164 | LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__, |
165 | #ifdef VERIFICATION | |
166 | "verification" | |
167 | #else | |
168 | "benchmark" | |
169 | #endif | |
170 | ); | |
171 | ||
172 | // ---------------------------------------------------------------------- | |
173 | // Parse command line arguments | |
174 | ||
175 | #define ARG_IS(param) (!strcmp(argv[i], param)) | |
176 | #define NEXT_ARG_PRESENT() \ | |
177 | do { \ | |
178 | if (i + 1 >= argc) { \ | |
179 | printf("ERROR: argument %s requires a parameter.\n", argv[i]); \ | |
180 | return 1; \ | |
181 | } \ | |
182 | } while (0) | |
183 | ||
184 | for (int i = 1; i < argc; ++i) { | |
185 | ||
186 | if (ARG_IS("-dims") || ARG_IS("--dims")) { | |
187 | NEXT_ARG_PRESENT(); | |
188 | ||
189 | ||
190 | if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) { | |
191 | return 1; | |
192 | } | |
193 | } | |
194 | // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) { | |
195 | // latticeDumpAscii = 1; | |
196 | // } | |
197 | else if (ARG_IS("-geometry") || ARG_IS("--geometry")) { | |
198 | NEXT_ARG_PRESENT(); | |
199 | ||
200 | geometryType = argv[++i]; | |
201 | } | |
202 | else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) { | |
203 | NEXT_ARG_PRESENT(); | |
204 | ||
205 | cd.MaxIterations = strtol(argv[++i], NULL, 0); | |
206 | ||
207 | if (cd.MaxIterations <= 0) { | |
208 | printf("ERROR: number of iterations must be > 0.\n"); | |
209 | return 1; | |
210 | } | |
211 | } | |
212 | else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) { | |
213 | NEXT_ARG_PRESENT(); | |
214 | ||
0fde6e45 | 215 | cd.RhoIn = F(strtod(argv[++i], NULL)); |
10988083 MW |
216 | } |
217 | else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) { | |
218 | NEXT_ARG_PRESENT(); | |
219 | ||
0fde6e45 | 220 | cd.RhoOut = F(strtod(argv[++i], NULL)); |
10988083 MW |
221 | } |
222 | else if (ARG_IS("-omega") ||ARG_IS("--omega")) { | |
223 | NEXT_ARG_PRESENT(); | |
224 | ||
0fde6e45 | 225 | cd.Omega = F(strtod(argv[++i], NULL)); |
10988083 MW |
226 | } |
227 | else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) { | |
228 | NEXT_ARG_PRESENT(); | |
229 | ||
0fde6e45 | 230 | cd.XForce = F(strtod(argv[++i], NULL)); |
10988083 MW |
231 | } |
232 | else if (ARG_IS("-verify") || ARG_IS("--verify")) { | |
233 | #ifdef VERIFICATION | |
234 | ||
235 | // Choose this preset for verification. As geometry type "box" is | |
0fde6e45 | 236 | // used but x and y direction are made periodic. |
10988083 MW |
237 | // Everything else can be altered, but enough iterations should be |
238 | // performed in order to receive a fully developed flow field. | |
239 | verify = 1; | |
240 | ||
0fde6e45 MW |
241 | cd.Omega = F(1.0); |
242 | cd.RhoIn = F(1.0); | |
243 | cd.RhoOut = F(1.0); | |
10988083 MW |
244 | geometryType = "box"; |
245 | dims[0] = 16; | |
246 | dims[1] = 16; | |
247 | dims[2] = 16; | |
0fde6e45 | 248 | cd.XForce = F(0.00001); |
10988083 MW |
249 | cd.MaxIterations = 1000; |
250 | periodic[0] = 1; | |
251 | periodic[1] = 1; | |
252 | periodic[2] = 0; | |
253 | ||
254 | printf("#\n"); | |
255 | printf("# VERIFICATION: verifying flow profile of channel flow.\n"); | |
256 | printf("#\n"); | |
257 | ||
258 | // TODO: this is not a good idea as we ignore all other options... | |
259 | ||
260 | #else | |
261 | printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n"); | |
262 | printf(" Recompile with VERIFICATION=on.\n"); | |
263 | return 1; | |
264 | #endif | |
265 | } | |
266 | else if (ARG_IS("-vtk") || ARG_IS("--vtk")) { | |
267 | #ifdef VTK_OUTPUT | |
268 | ||
269 | cd.VtkOutput = 1; | |
270 | ||
271 | // If the next parameter is a number it is used as the itartion count, | |
272 | // if not it is probably another parameter. | |
273 | if (i + 1 < argc) { | |
274 | ||
275 | int vtkModulus = atoi(argv[i+1]); | |
276 | ||
277 | if (vtkModulus > 0) { | |
278 | cd.VtkModulus = vtkModulus; | |
279 | ++i; | |
280 | } | |
281 | } | |
282 | #else | |
283 | printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n"); | |
284 | printf(" Recompile with VTK_OUTPUT=on.\n"); | |
285 | return 1; | |
286 | #endif | |
287 | } | |
288 | else if (ARG_IS("-statistics") || ARG_IS("--statistics")) { | |
289 | #ifdef STATISTICS | |
290 | NEXT_ARG_PRESENT(); | |
291 | ||
292 | cd.StatisticsModulus = atoi(argv[++i]); | |
293 | ||
294 | if (cd.StatisticsModulus <= 0) { | |
295 | printf("ERROR: the iteration count for -statistics must be > 0.\n"); | |
296 | return 1; | |
297 | } | |
298 | #else | |
299 | printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n"); | |
300 | printf(" Recompile with STATISTICS=on.\n"); | |
301 | return 1; | |
302 | #endif | |
303 | } | |
304 | else if (ARG_IS("-kernel") || ARG_IS("--kernel")) { | |
305 | NEXT_ARG_PRESENT(); | |
306 | ||
307 | kernelToUse = argv[++i]; | |
308 | } | |
309 | else if (ARG_IS("-list") || ARG_IS("--list")) { | |
310 | printf("Available kernels to benchmark:\n"); | |
311 | ||
312 | for (int j = 0; j < N_ELEMS(g_kernels); ++j) { | |
313 | printf(" %s\n", g_kernels[j].Name); | |
314 | } | |
315 | ||
316 | return 0; | |
317 | } | |
318 | else if (ARG_IS("-pin") || ARG_IS("--pin")) { | |
319 | NEXT_ARG_PRESENT(); | |
320 | ||
321 | pinString = argv[++i]; | |
322 | } | |
323 | else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) { | |
324 | #ifdef _OPENMP | |
325 | NEXT_ARG_PRESENT(); | |
326 | ||
327 | nThreads = atoi(argv[++i]); | |
328 | ||
329 | if (nThreads <= 0) { | |
330 | printf("ERROR: number of threads must be > 0.\n"); | |
331 | return 1; | |
332 | } | |
333 | #else | |
334 | printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n"); | |
335 | return 1; | |
336 | #endif | |
337 | } | |
338 | else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) { | |
339 | periodic[0] = 1; | |
340 | } | |
341 | else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) { | |
342 | periodic[1] = 1; | |
343 | } | |
344 | else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) { | |
345 | periodic[2] = 1; | |
346 | } | |
347 | else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) { | |
348 | printf("ERROR: unknown argument: %s\n", argv[i]); | |
349 | printf("\n"); | |
350 | printf("Usage:\n"); | |
351 | printf("./lbmbenchk -list\n"); | |
352 | printf("./lbmbenchk \n"); | |
8cafd9ea | 353 | printf(" [-dims XxYyZ] [-geometry box|channel|pipe|blocks[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n"); |
10988083 MW |
354 | printf(" [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n"); |
355 | printf(" [-periodic-x]\n"); | |
356 | #ifdef STATISTICS | |
357 | printf(" [-statistics <every-n-iteration>]\n"); | |
358 | #endif | |
359 | #ifdef VTK_OUTPUT | |
360 | printf(" [-vtk [<every-n-iteration>]]\n"); | |
361 | #endif | |
362 | #ifdef _OPENMP | |
363 | printf(" [-t <number of threads>]\n"); | |
364 | #endif | |
365 | printf(" [-pin core{,core}*]\n"); | |
366 | #ifdef VERIFICATION | |
367 | printf(" [-verify]\n"); | |
368 | #endif | |
369 | printf(" -- <kernel specific parameters>\n"); | |
370 | printf("\n"); | |
371 | printf("-list List available kernels.\n"); | |
372 | printf("\n"); | |
373 | printf("-dims XxYxZ Specify geometry dimensions.\n"); | |
374 | printf("\n"); | |
8cafd9ea | 375 | printf("-geometry blocks-<value>\n"); |
10988083 MW |
376 | printf(" Geometetry with blocks of size <value> regularily layout out.\n"); |
377 | printf("\n"); | |
378 | return 1; | |
379 | } | |
380 | else if (ARG_IS("--")) { | |
381 | // printf("# kernel args start with %s these are %d args.\n", argv[i + 1], argc - i - 1); | |
382 | p.KernelArgs = &argv[++i]; | |
383 | p.nKernelArgs = argc - i; | |
384 | break; | |
385 | } | |
386 | else { | |
387 | printf("ERROR: unknown parameter: %s.\n", argv[i]); | |
388 | exit(1); | |
389 | } | |
390 | } | |
391 | ||
392 | #undef ARG_IS | |
393 | #undef NEXT_ARG_PRESENT | |
394 | ||
395 | ||
396 | // ---------------------------------------------------------------------- | |
397 | // Check if we exceed our index addressing PDFs. | |
398 | ||
399 | { | |
400 | uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2]; | |
401 | ||
402 | if (nPdfs > ((2LU << 31) - 1)) { | |
403 | printf("ERROR: number of PDFs exceed 2^31.\n"); | |
404 | exit(1); | |
405 | } | |
406 | } | |
407 | ||
408 | // ---------------------------------------------------------------------- | |
409 | ||
410 | #ifdef _OPENMP | |
411 | omp_set_num_threads(nThreads); | |
412 | #endif | |
413 | ||
10988083 | 414 | const char * defines[] = { |
0fde6e45 MW |
415 | #ifdef DEBUG |
416 | "DEBUG", | |
417 | #endif | |
10988083 MW |
418 | #ifdef VTK_OUTPUT |
419 | "VTK_OUTPUT", | |
420 | #endif | |
421 | #ifdef STATISTICS | |
422 | "STATISTICS", | |
423 | #endif | |
424 | #ifdef VERIFICATION | |
425 | "VERIFICATION", | |
426 | #endif | |
427 | #ifdef _OPENMP | |
428 | "_OPENMP", | |
429 | #endif | |
430 | #ifdef HAVE_LIKWID | |
431 | "HAVE_LIKWID", | |
0fde6e45 MW |
432 | #endif |
433 | #ifdef INTEL_OPT_DIRECTIVES | |
434 | "INTEL_OPT_DIRECTIVES", | |
10988083 MW |
435 | #endif |
436 | }; | |
437 | ||
0fde6e45 MW |
438 | printf("#\n"); |
439 | ||
440 | #ifdef PRECISION_DP | |
441 | printf("# - floating point: double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT)); | |
442 | #elif defined(PRECISION_SP) | |
443 | printf("# - floating point: single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT)); | |
444 | #else | |
445 | printf("# - floating point: UNKNOWN (%lu b)\n", sizeof(PdfT)); | |
446 | #endif | |
447 | ||
8cafd9ea MW |
448 | #if defined(VECTOR_AVX512) |
449 | printf("# - intrinsics: AVX512 (VECTOR_AVX512 defined)\n"); | |
450 | #elif defined(VECTOR_AVX) | |
0fde6e45 MW |
451 | printf("# - intrinsics: AVX (VECTOR_AVX defined)\n"); |
452 | #elif defined(VECTOR_SSE) | |
453 | printf("# - intrinsics: SSE (VECTOR_SSE defined)\n"); | |
454 | #else | |
455 | printf("# - intrinsics: UNKNOWN\n"); | |
456 | #endif | |
457 | ||
9e0051cb MW |
458 | #if defined(VECTOR_AVX512_GATHER) |
459 | printf("# - intrinsics: AVX512 gather (VECTOR_AVX512_GATHER defined)\n"); | |
460 | #endif | |
461 | ||
0fde6e45 | 462 | printf("# - defines: "); |
10988083 MW |
463 | for (int j = 0; j < N_ELEMS(defines); ++j) { |
464 | printf("%s ", defines[j]); | |
465 | } | |
466 | printf("\n"); | |
467 | ||
0fde6e45 MW |
468 | #ifdef __x86_64__ |
469 | printf("# - fp status: DAZ: %d FTZ: %d\n", FpGetDaz(), FpGetFtz()); | |
470 | #endif | |
471 | ||
472 | printf("# - iterations: %d\n", cd.MaxIterations); | |
473 | ||
474 | LatticeDesc ld; | |
475 | ||
476 | GeoCreateByStr(geometryType, dims, periodic, &ld); | |
477 | ||
478 | printf("# - geometry:\n"); | |
479 | printf("# type: %s\n", ld.Name); | |
480 | printf("# dimensions: %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]); | |
481 | ||
8cafd9ea MW |
482 | printf("# nodes total: % 10d\n", ld.nObst + ld.nFluid); |
483 | printf("# nodes fluid: % 10d (including inlet & outlet)\n", ld.nFluid); | |
484 | printf("# nodes obstacles: % 10d\n", ld.nObst); | |
485 | printf("# nodes inlet: % 10d\n", ld.nInlet); | |
486 | printf("# nodes outlet: % 10d\n", ld.nOutlet); | |
0fde6e45 | 487 | printf("# periodicity: x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ); |
10988083 MW |
488 | |
489 | #ifdef VTK_OUTPUT | |
0fde6e45 | 490 | printf("# - VTK output: %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus); |
10988083 MW |
491 | #endif |
492 | #ifdef STATISTICS | |
0fde6e45 | 493 | printf("# - statistics: every %d iteration\n", cd.StatisticsModulus); |
10988083 MW |
494 | #endif |
495 | ||
0fde6e45 MW |
496 | printf("# - flow:\n"); |
497 | printf("# omega: %f\n", cd.Omega); | |
498 | printf("# initial density at inlet/outlet:\n"); | |
499 | printf("# rho in: %e\n", cd.RhoIn); | |
500 | printf("# rho out: %e\n", cd.RhoOut); | |
10988083 MW |
501 | |
502 | #ifdef _OPENMP | |
0fde6e45 | 503 | printf("# - OpenMP threads: %d\n", omp_get_max_threads()); |
10988083 MW |
504 | |
505 | if (pinString != NULL) { | |
506 | #pragma omp parallel | |
507 | { | |
508 | int threadId = omp_get_thread_num(); | |
509 | int err; | |
510 | ||
e3f82424 | 511 | err = PinCurrentThreadByCpuList(pinString, threadId); |
10988083 MW |
512 | |
513 | if (err) { | |
514 | printf("ERROR [thread %d]: pinning failed.\n", threadId); | |
515 | exit(1); | |
516 | } | |
517 | ||
518 | const char * cpuList = PinCpuListAsString(); | |
519 | Assert(cpuList != NULL); | |
520 | ||
521 | // Not so nice hack to print the thread ids ordered. | |
522 | #pragma omp for ordered | |
523 | for (int i = 0; i < omp_get_num_threads(); ++i) { | |
524 | #pragma omp ordered | |
0fde6e45 | 525 | printf("# thread %2d pinned to core(s): %s\n", threadId, cpuList); |
10988083 MW |
526 | } |
527 | ||
528 | free((void *)cpuList); | |
529 | } | |
530 | } | |
531 | #endif | |
532 | ||
533 | KernelData * kd; | |
534 | ||
535 | KernelFunctions * kf = NULL; | |
536 | ||
537 | if (kernelToUse == NULL) { | |
538 | kf = &g_kernels[0]; | |
539 | } | |
540 | else { | |
541 | for (int j = 0; j < N_ELEMS(g_kernels); ++j) { | |
542 | ||
543 | if (!strcasecmp(kernelToUse, g_kernels[j].Name)) { | |
544 | kf = &g_kernels[j]; | |
545 | break; | |
546 | } | |
547 | } | |
548 | } | |
549 | ||
550 | if (kf == NULL) { | |
551 | printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse); | |
552 | exit(1); | |
553 | } | |
554 | ||
555 | printf("#\n"); | |
0fde6e45 | 556 | printf("# - kernel: %s\n", kf->Name); |
10988083 MW |
557 | printf("#\n"); |
558 | ||
559 | // Initialize kernel by calling its own initialization function | |
560 | kf->Init(&ld, &kd, &p); | |
561 | ||
562 | #ifdef VERIFICATION | |
563 | if (verify) { | |
564 | KernelSetInitialDensity( &ld, kd, &cd); | |
565 | KernelSetInitialVelocity(&ld, kd, &cd); | |
566 | } | |
567 | #endif | |
568 | ||
569 | printf("# starting kernel...\n"); | |
570 | ||
571 | X_LIKWID_INIT(); | |
572 | ||
10988083 MW |
573 | // Call the LBM kernel |
574 | kd->Kernel(&ld, kd, &cd); | |
575 | ||
10988083 MW |
576 | X_LIKWID_DEINIT(); |
577 | ||
578 | // Print some statistics... | |
579 | KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */); | |
580 | ||
581 | #ifdef VERIFICATION | |
582 | PdfT errorNorm = -1.0; | |
583 | KernelVerifiy(&ld, kd, &cd, &errorNorm); | |
584 | #endif | |
585 | ||
8cafd9ea MW |
586 | double duration = kd->Duration; |
587 | double loopBalance = kd->LoopBalance; | |
588 | double dataVolGByte = loopBalance * ld.nFluid * cd.MaxIterations / 1024. / 1024. / 1024.; | |
589 | double bandwidthGBytePerS = dataVolGByte / duration; | |
590 | ||
10988083 MW |
591 | // Deinitialize kernel by calling its own deinitialization function |
592 | kf->Deinit(&ld, &kd); | |
593 | ||
10988083 MW |
594 | double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6; |
595 | ||
8cafd9ea MW |
596 | printf("#\n"); |
597 | printf("# Evaluation Stats\n"); | |
598 | #ifdef VERIFICATION | |
599 | printf("# runtype: \t%s \n", "verification"); | |
600 | #else | |
601 | printf("# runtype: \t%s \n", "benchmark"); | |
602 | #endif | |
603 | printf("# runtime: \t%.3f s\n", duration); | |
604 | printf("# iterations: \t%d \n", cd.MaxIterations); | |
605 | printf("# fluid cells: \t%d \n", ld.nFluid); | |
606 | printf("# Derived metrics\n"); | |
607 | printf("# MEM data vol.: \t%.2f GByte\n", dataVolGByte); | |
608 | printf("# MEM bandwidth: \t%.2f GByte/s\n", bandwidthGBytePerS); | |
609 | printf("# performance: \t%.3f MFLUP/s\n", perf); | |
610 | ||
611 | ||
0fde6e45 | 612 | printf("P: %f MFLUP/s t: %d d: %f s iter: %d fnodes: %f x1e6 geo: %s kernel: %s %s %s\n", |
10988083 MW |
613 | perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6, |
614 | geometryType, kernelToUse, | |
615 | #ifdef VERIFICATION | |
0fde6e45 MW |
616 | "VERIFICATION", |
617 | #else | |
618 | "B", | |
619 | #endif | |
620 | #ifdef PRECISION_DP | |
621 | "dp" | |
622 | #elif defined(PRECISION_SP) | |
623 | "sp" | |
10988083 | 624 | #else |
0fde6e45 | 625 | "unknown-precision" |
10988083 MW |
626 | #endif |
627 | ); | |
628 | ||
629 | int exitCode = 0; | |
630 | ||
631 | #ifdef VERIFICATION | |
632 | ||
633 | if (verify) { | |
634 | printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm); | |
635 | ||
636 | if (errorNorm > 0.1) { | |
637 | printf("# VERIFICATION FAILED.\n"); | |
638 | exitCode = 1; | |
639 | } | |
640 | else { | |
641 | printf("# VERIFICATION SUCCEEDED.\n"); | |
642 | } | |
643 | } | |
644 | #else | |
645 | // printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm); | |
646 | // printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n"); | |
647 | #endif | |
648 | ||
649 | MemFree((void **)&ld.Lattice); | |
650 | ||
651 | return exitCode; | |
652 | } |