merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git] / src / Main.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
8cafd9ea
MW
11// Michael Hussnaetter, 2017-2018
12// University of Erlangen-Nuremberg, Germany
13// michael.hussnaetter -at- fau.de
14//
10988083
MW
15// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
16//
17// LbmBenchKernels is free software: you can redistribute it and/or modify
18// it under the terms of the GNU General Public License as published by
19// the Free Software Foundation, either version 3 of the License, or
20// (at your option) any later version.
21//
22// LbmBenchKernels is distributed in the hope that it will be useful,
23// but WITHOUT ANY WARRANTY; without even the implied warranty of
24// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25// GNU General Public License for more details.
26//
27// You should have received a copy of the GNU General Public License
28// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
29//
30// --------------------------------------------------------------------------
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34#include <strings.h> // strcasecmp
35
36#include <inttypes.h>
37
38#ifdef _OPENMP
39#include <omp.h>
40#endif
41
42#include "Base.h"
43#include "Kernel.h"
44#include "Memory.h"
45
46#include "Lattice.h"
47#include "Geometry.h"
48#include "Pinning.h"
49#include "LikwidIf.h"
50
51#include "KernelFunctions.h"
52
53#ifdef __x86_64__
54 #include <xmmintrin.h>
55
56
57 #define MXCSR_DAZ 6
58 #define MXCSR_FTZ 15
59
60
61 int FpIsMxCsrMaskSet(unsigned int mask)
62 {
63 unsigned int mxcsr;
64 unsigned int mxcsrNew;
65
66 mxcsr = _mm_getcsr();
67
68 mxcsrNew = mxcsr & mask;
69
70 return (mxcsrNew == mask);
71 }
72
73 int FpGetFtz()
74 {
75 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
76 }
77
78 int FpGetDaz()
79 {
80 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
81 }
82#endif
83
84
85int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
86{
87 char * tmp;
88
89 *nX = atoi(parameter);
90
91 if (*nX <= 0) {
92 printf("ERROR: parameter for X dimension must be > 0.\n");
93 return 0;
94 }
95
96 tmp = strchr(parameter, 'x');
97
98 if (tmp == NULL) {
99 printf("ERROR: parameter for Y dimension is missing.\n");
100 return 0;
101 }
102
103 *nY = atoi(tmp + 1);
104
105 if (*nY <= 0) {
106 printf("ERROR: parameter for Y dimension must be > 0.\n");
107 return 0;
108 }
109
110 tmp = strchr(tmp + 1, 'x');
111
112 if (tmp == NULL) {
113 printf("ERROR: parameter for Z dimension is missing.\n");
114 return 0;
115 }
116
117 *nZ = atoi(tmp + 1);
118
119 if (*nZ <= 0) {
120 printf("ERROR: parameter for Z dimension must be > 0.\n");
121 return 0;
122 }
123
124 return 1;
125}
126
127int main(int argc, char * argv[])
128{
129 int dims[3] = { 20, 20, 20 }; // Dimensions in x, y, and z direction
130 const char * geometryType = "channel";
131 // int latticeDumpAscii = 0;
132 int verify = 0; UNUSED(verify);
133 char * kernelToUse = NULL;
134 int nThreads = 1;
135 const char * pinString = NULL;
136 int periodic[3] = { 0 };
137
138 CaseData cd;
139
0fde6e45
MW
140 cd.MaxIterations = 10;
141 cd.RhoIn = F(1.0);
142 cd.RhoOut = F(1.0);
143 cd.Omega = F(1.0);
10988083
MW
144 cd.VtkOutput = 0;
145 cd.VtkModulus = 100;
146 cd.StatisticsModulus = 100;
0fde6e45 147 cd.XForce = F(0.00001);
10988083
MW
148 kernelToUse = "push-soa";
149
150 Parameters p;
151 p.nArgs = argc;
152 p.Args = argv;
153 p.nKernelArgs = 0;
154 p.KernelArgs = NULL;
155
156#define LBM_BENCH_KERNELS_VERSION_MAJOR 0
157#define LBM_BENCH_KERNELS_VERSION_MINOR 1
158
8cafd9ea 159 printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017, 2018 LSS, RRZE\n");
10988083
MW
160 printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
161 printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
162 printf("\n");
0fde6e45 163 printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
10988083
MW
164 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
165#ifdef VERIFICATION
166 "verification"
167#else
168 "benchmark"
169#endif
170 );
171
172 // ----------------------------------------------------------------------
173 // Parse command line arguments
174
175 #define ARG_IS(param) (!strcmp(argv[i], param))
176 #define NEXT_ARG_PRESENT() \
177 do { \
178 if (i + 1 >= argc) { \
179 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
180 return 1; \
181 } \
182 } while (0)
183
184 for (int i = 1; i < argc; ++i) {
185
186 if (ARG_IS("-dims") || ARG_IS("--dims")) {
187 NEXT_ARG_PRESENT();
188
189
190 if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
191 return 1;
192 }
193 }
194 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
195 // latticeDumpAscii = 1;
196 // }
197 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
198 NEXT_ARG_PRESENT();
199
200 geometryType = argv[++i];
201 }
202 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
203 NEXT_ARG_PRESENT();
204
205 cd.MaxIterations = strtol(argv[++i], NULL, 0);
206
207 if (cd.MaxIterations <= 0) {
208 printf("ERROR: number of iterations must be > 0.\n");
209 return 1;
210 }
211 }
212 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
213 NEXT_ARG_PRESENT();
214
0fde6e45 215 cd.RhoIn = F(strtod(argv[++i], NULL));
10988083
MW
216 }
217 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
218 NEXT_ARG_PRESENT();
219
0fde6e45 220 cd.RhoOut = F(strtod(argv[++i], NULL));
10988083
MW
221 }
222 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
223 NEXT_ARG_PRESENT();
224
0fde6e45 225 cd.Omega = F(strtod(argv[++i], NULL));
10988083
MW
226 }
227 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
228 NEXT_ARG_PRESENT();
229
0fde6e45 230 cd.XForce = F(strtod(argv[++i], NULL));
10988083
MW
231 }
232 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
233#ifdef VERIFICATION
234
235 // Choose this preset for verification. As geometry type "box" is
0fde6e45 236 // used but x and y direction are made periodic.
10988083
MW
237 // Everything else can be altered, but enough iterations should be
238 // performed in order to receive a fully developed flow field.
239 verify = 1;
240
0fde6e45
MW
241 cd.Omega = F(1.0);
242 cd.RhoIn = F(1.0);
243 cd.RhoOut = F(1.0);
10988083
MW
244 geometryType = "box";
245 dims[0] = 16;
246 dims[1] = 16;
247 dims[2] = 16;
0fde6e45 248 cd.XForce = F(0.00001);
10988083
MW
249 cd.MaxIterations = 1000;
250 periodic[0] = 1;
251 periodic[1] = 1;
252 periodic[2] = 0;
253
254 printf("#\n");
255 printf("# VERIFICATION: verifying flow profile of channel flow.\n");
256 printf("#\n");
257
258 // TODO: this is not a good idea as we ignore all other options...
259
260#else
261 printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
262 printf(" Recompile with VERIFICATION=on.\n");
263 return 1;
264#endif
265 }
266 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
267#ifdef VTK_OUTPUT
268
269 cd.VtkOutput = 1;
270
271 // If the next parameter is a number it is used as the itartion count,
272 // if not it is probably another parameter.
273 if (i + 1 < argc) {
274
275 int vtkModulus = atoi(argv[i+1]);
276
277 if (vtkModulus > 0) {
278 cd.VtkModulus = vtkModulus;
279 ++i;
280 }
281 }
282#else
283 printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
284 printf(" Recompile with VTK_OUTPUT=on.\n");
285 return 1;
286#endif
287 }
288 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
289#ifdef STATISTICS
290 NEXT_ARG_PRESENT();
291
292 cd.StatisticsModulus = atoi(argv[++i]);
293
294 if (cd.StatisticsModulus <= 0) {
295 printf("ERROR: the iteration count for -statistics must be > 0.\n");
296 return 1;
297 }
298#else
299 printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
300 printf(" Recompile with STATISTICS=on.\n");
301 return 1;
302#endif
303 }
304 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
305 NEXT_ARG_PRESENT();
306
307 kernelToUse = argv[++i];
308 }
309 else if (ARG_IS("-list") || ARG_IS("--list")) {
310 printf("Available kernels to benchmark:\n");
311
312 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
313 printf(" %s\n", g_kernels[j].Name);
314 }
315
316 return 0;
317 }
318 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
319 NEXT_ARG_PRESENT();
320
321 pinString = argv[++i];
322 }
323 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
324#ifdef _OPENMP
325 NEXT_ARG_PRESENT();
326
327 nThreads = atoi(argv[++i]);
328
329 if (nThreads <= 0) {
330 printf("ERROR: number of threads must be > 0.\n");
331 return 1;
332 }
333#else
334 printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
335 return 1;
336#endif
337 }
338 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
339 periodic[0] = 1;
340 }
341 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
342 periodic[1] = 1;
343 }
344 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
345 periodic[2] = 1;
346 }
347 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
348 printf("ERROR: unknown argument: %s\n", argv[i]);
349 printf("\n");
350 printf("Usage:\n");
351 printf("./lbmbenchk -list\n");
352 printf("./lbmbenchk \n");
8cafd9ea 353 printf(" [-dims XxYyZ] [-geometry box|channel|pipe|blocks[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
10988083
MW
354 printf(" [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
355 printf(" [-periodic-x]\n");
356#ifdef STATISTICS
357 printf(" [-statistics <every-n-iteration>]\n");
358#endif
359#ifdef VTK_OUTPUT
360 printf(" [-vtk [<every-n-iteration>]]\n");
361#endif
362#ifdef _OPENMP
363 printf(" [-t <number of threads>]\n");
364#endif
365 printf(" [-pin core{,core}*]\n");
366#ifdef VERIFICATION
367 printf(" [-verify]\n");
368#endif
369 printf(" -- <kernel specific parameters>\n");
370 printf("\n");
371 printf("-list List available kernels.\n");
372 printf("\n");
373 printf("-dims XxYxZ Specify geometry dimensions.\n");
374 printf("\n");
8cafd9ea 375 printf("-geometry blocks-<value>\n");
10988083
MW
376 printf(" Geometetry with blocks of size <value> regularily layout out.\n");
377 printf("\n");
378 return 1;
379 }
380 else if (ARG_IS("--")) {
381 // printf("# kernel args start with %s these are %d args.\n", argv[i + 1], argc - i - 1);
382 p.KernelArgs = &argv[++i];
383 p.nKernelArgs = argc - i;
384 break;
385 }
386 else {
387 printf("ERROR: unknown parameter: %s.\n", argv[i]);
388 exit(1);
389 }
390 }
391
392 #undef ARG_IS
393 #undef NEXT_ARG_PRESENT
394
395
396 // ----------------------------------------------------------------------
397 // Check if we exceed our index addressing PDFs.
398
399 {
400 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
401
402 if (nPdfs > ((2LU << 31) - 1)) {
403 printf("ERROR: number of PDFs exceed 2^31.\n");
404 exit(1);
405 }
406 }
407
408 // ----------------------------------------------------------------------
409
410#ifdef _OPENMP
411 omp_set_num_threads(nThreads);
412#endif
413
10988083 414 const char * defines[] = {
0fde6e45
MW
415#ifdef DEBUG
416 "DEBUG",
417#endif
10988083
MW
418#ifdef VTK_OUTPUT
419 "VTK_OUTPUT",
420#endif
421#ifdef STATISTICS
422 "STATISTICS",
423#endif
424#ifdef VERIFICATION
425 "VERIFICATION",
426#endif
427#ifdef _OPENMP
428 "_OPENMP",
429#endif
430#ifdef HAVE_LIKWID
431 "HAVE_LIKWID",
0fde6e45
MW
432#endif
433#ifdef INTEL_OPT_DIRECTIVES
434 "INTEL_OPT_DIRECTIVES",
10988083
MW
435#endif
436 };
437
0fde6e45
MW
438 printf("#\n");
439
440#ifdef PRECISION_DP
441 printf("# - floating point: double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT));
442#elif defined(PRECISION_SP)
443 printf("# - floating point: single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT));
444#else
445 printf("# - floating point: UNKNOWN (%lu b)\n", sizeof(PdfT));
446#endif
447
8cafd9ea
MW
448#if defined(VECTOR_AVX512)
449 printf("# - intrinsics: AVX512 (VECTOR_AVX512 defined)\n");
450#elif defined(VECTOR_AVX)
0fde6e45
MW
451 printf("# - intrinsics: AVX (VECTOR_AVX defined)\n");
452#elif defined(VECTOR_SSE)
453 printf("# - intrinsics: SSE (VECTOR_SSE defined)\n");
454#else
455 printf("# - intrinsics: UNKNOWN\n");
456#endif
457
458 printf("# - defines: ");
10988083
MW
459 for (int j = 0; j < N_ELEMS(defines); ++j) {
460 printf("%s ", defines[j]);
461 }
462 printf("\n");
463
0fde6e45
MW
464#ifdef __x86_64__
465 printf("# - fp status: DAZ: %d FTZ: %d\n", FpGetDaz(), FpGetFtz());
466#endif
467
468 printf("# - iterations: %d\n", cd.MaxIterations);
469
470 LatticeDesc ld;
471
472 GeoCreateByStr(geometryType, dims, periodic, &ld);
473
474 printf("# - geometry:\n");
475 printf("# type: %s\n", ld.Name);
476 printf("# dimensions: %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]);
477
8cafd9ea
MW
478 printf("# nodes total: % 10d\n", ld.nObst + ld.nFluid);
479 printf("# nodes fluid: % 10d (including inlet & outlet)\n", ld.nFluid);
480 printf("# nodes obstacles: % 10d\n", ld.nObst);
481 printf("# nodes inlet: % 10d\n", ld.nInlet);
482 printf("# nodes outlet: % 10d\n", ld.nOutlet);
0fde6e45 483 printf("# periodicity: x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
10988083
MW
484
485#ifdef VTK_OUTPUT
0fde6e45 486 printf("# - VTK output: %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
10988083
MW
487#endif
488#ifdef STATISTICS
0fde6e45 489 printf("# - statistics: every %d iteration\n", cd.StatisticsModulus);
10988083
MW
490#endif
491
0fde6e45
MW
492 printf("# - flow:\n");
493 printf("# omega: %f\n", cd.Omega);
494 printf("# initial density at inlet/outlet:\n");
495 printf("# rho in: %e\n", cd.RhoIn);
496 printf("# rho out: %e\n", cd.RhoOut);
10988083
MW
497
498#ifdef _OPENMP
0fde6e45 499 printf("# - OpenMP threads: %d\n", omp_get_max_threads());
10988083
MW
500
501 if (pinString != NULL) {
502 #pragma omp parallel
503 {
504 int threadId = omp_get_thread_num();
505 int err;
506
e3f82424 507 err = PinCurrentThreadByCpuList(pinString, threadId);
10988083
MW
508
509 if (err) {
510 printf("ERROR [thread %d]: pinning failed.\n", threadId);
511 exit(1);
512 }
513
514 const char * cpuList = PinCpuListAsString();
515 Assert(cpuList != NULL);
516
517 // Not so nice hack to print the thread ids ordered.
518 #pragma omp for ordered
519 for (int i = 0; i < omp_get_num_threads(); ++i) {
520 #pragma omp ordered
0fde6e45 521 printf("# thread %2d pinned to core(s): %s\n", threadId, cpuList);
10988083
MW
522 }
523
524 free((void *)cpuList);
525 }
526 }
527#endif
528
529 KernelData * kd;
530
531 KernelFunctions * kf = NULL;
532
533 if (kernelToUse == NULL) {
534 kf = &g_kernels[0];
535 }
536 else {
537 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
538
539 if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
540 kf = &g_kernels[j];
541 break;
542 }
543 }
544 }
545
546 if (kf == NULL) {
547 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
548 exit(1);
549 }
550
551 printf("#\n");
0fde6e45 552 printf("# - kernel: %s\n", kf->Name);
10988083
MW
553 printf("#\n");
554
555 // Initialize kernel by calling its own initialization function
556 kf->Init(&ld, &kd, &p);
557
558#ifdef VERIFICATION
559 if (verify) {
560 KernelSetInitialDensity( &ld, kd, &cd);
561 KernelSetInitialVelocity(&ld, kd, &cd);
562 }
563#endif
564
565 printf("# starting kernel...\n");
566
567 X_LIKWID_INIT();
568
10988083
MW
569 // Call the LBM kernel
570 kd->Kernel(&ld, kd, &cd);
571
10988083
MW
572 X_LIKWID_DEINIT();
573
574 // Print some statistics...
575 KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
576
577#ifdef VERIFICATION
578 PdfT errorNorm = -1.0;
579 KernelVerifiy(&ld, kd, &cd, &errorNorm);
580#endif
581
8cafd9ea
MW
582 double duration = kd->Duration;
583 double loopBalance = kd->LoopBalance;
584 double dataVolGByte = loopBalance * ld.nFluid * cd.MaxIterations / 1024. / 1024. / 1024.;
585 double bandwidthGBytePerS = dataVolGByte / duration;
586
10988083
MW
587 // Deinitialize kernel by calling its own deinitialization function
588 kf->Deinit(&ld, &kd);
589
10988083
MW
590 double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
591
8cafd9ea
MW
592 printf("#\n");
593 printf("# Evaluation Stats\n");
594#ifdef VERIFICATION
595 printf("# runtype: \t%s \n", "verification");
596#else
597 printf("# runtype: \t%s \n", "benchmark");
598#endif
599 printf("# runtime: \t%.3f s\n", duration);
600 printf("# iterations: \t%d \n", cd.MaxIterations);
601 printf("# fluid cells: \t%d \n", ld.nFluid);
602 printf("# Derived metrics\n");
603 printf("# MEM data vol.: \t%.2f GByte\n", dataVolGByte);
604 printf("# MEM bandwidth: \t%.2f GByte/s\n", bandwidthGBytePerS);
605 printf("# performance: \t%.3f MFLUP/s\n", perf);
606
607
0fde6e45 608 printf("P: %f MFLUP/s t: %d d: %f s iter: %d fnodes: %f x1e6 geo: %s kernel: %s %s %s\n",
10988083
MW
609 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
610 geometryType, kernelToUse,
611#ifdef VERIFICATION
0fde6e45
MW
612 "VERIFICATION",
613#else
614 "B",
615#endif
616#ifdef PRECISION_DP
617 "dp"
618#elif defined(PRECISION_SP)
619 "sp"
10988083 620#else
0fde6e45 621 "unknown-precision"
10988083
MW
622#endif
623 );
624
625 int exitCode = 0;
626
627#ifdef VERIFICATION
628
629 if (verify) {
630 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
631
632 if (errorNorm > 0.1) {
633 printf("# VERIFICATION FAILED.\n");
634 exitCode = 1;
635 }
636 else {
637 printf("# VERIFICATION SUCCEEDED.\n");
638 }
639 }
640#else
641// printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
642// printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
643#endif
644
645 MemFree((void **)&ld.Lattice);
646
647 return exitCode;
648}
This page took 0.096909 seconds and 5 git commands to generate.