add single precision, add aa-vec-sl-soa kernel, updated doc
[LbmBenchmarkKernelsPublic.git] / src / Main.c
CommitLineData
10988083
MW
1// --------------------------------------------------------------------------
2//
3// Copyright
4// Markus Wittmann, 2016-2017
5// RRZE, University of Erlangen-Nuremberg, Germany
6// markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7//
8// Viktor Haag, 2016
9// LSS, University of Erlangen-Nuremberg, Germany
10//
11// This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12//
13// LbmBenchKernels is free software: you can redistribute it and/or modify
14// it under the terms of the GNU General Public License as published by
15// the Free Software Foundation, either version 3 of the License, or
16// (at your option) any later version.
17//
18// LbmBenchKernels is distributed in the hope that it will be useful,
19// but WITHOUT ANY WARRANTY; without even the implied warranty of
20// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21// GNU General Public License for more details.
22//
23// You should have received a copy of the GNU General Public License
24// along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
25//
26// --------------------------------------------------------------------------
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <strings.h> // strcasecmp
31
32#include <inttypes.h>
33
34#ifdef _OPENMP
35#include <omp.h>
36#endif
37
38#include "Base.h"
39#include "Kernel.h"
40#include "Memory.h"
41
42#include "Lattice.h"
43#include "Geometry.h"
44#include "Pinning.h"
45#include "LikwidIf.h"
46
47#include "KernelFunctions.h"
48
49#ifdef __x86_64__
50 #include <xmmintrin.h>
51
52
53 #define MXCSR_DAZ 6
54 #define MXCSR_FTZ 15
55
56
57 int FpIsMxCsrMaskSet(unsigned int mask)
58 {
59 unsigned int mxcsr;
60 unsigned int mxcsrNew;
61
62 mxcsr = _mm_getcsr();
63
64 mxcsrNew = mxcsr & mask;
65
66 return (mxcsrNew == mask);
67 }
68
69 int FpGetFtz()
70 {
71 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
72 }
73
74 int FpGetDaz()
75 {
76 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
77 }
78#endif
79
80
81int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
82{
83 char * tmp;
84
85 *nX = atoi(parameter);
86
87 if (*nX <= 0) {
88 printf("ERROR: parameter for X dimension must be > 0.\n");
89 return 0;
90 }
91
92 tmp = strchr(parameter, 'x');
93
94 if (tmp == NULL) {
95 printf("ERROR: parameter for Y dimension is missing.\n");
96 return 0;
97 }
98
99 *nY = atoi(tmp + 1);
100
101 if (*nY <= 0) {
102 printf("ERROR: parameter for Y dimension must be > 0.\n");
103 return 0;
104 }
105
106 tmp = strchr(tmp + 1, 'x');
107
108 if (tmp == NULL) {
109 printf("ERROR: parameter for Z dimension is missing.\n");
110 return 0;
111 }
112
113 *nZ = atoi(tmp + 1);
114
115 if (*nZ <= 0) {
116 printf("ERROR: parameter for Z dimension must be > 0.\n");
117 return 0;
118 }
119
120 return 1;
121}
122
123int main(int argc, char * argv[])
124{
125 int dims[3] = { 20, 20, 20 }; // Dimensions in x, y, and z direction
126 const char * geometryType = "channel";
127 // int latticeDumpAscii = 0;
128 int verify = 0; UNUSED(verify);
129 char * kernelToUse = NULL;
130 int nThreads = 1;
131 const char * pinString = NULL;
132 int periodic[3] = { 0 };
133
134 CaseData cd;
135
0fde6e45
MW
136 cd.MaxIterations = 10;
137 cd.RhoIn = F(1.0);
138 cd.RhoOut = F(1.0);
139 cd.Omega = F(1.0);
10988083
MW
140 cd.VtkOutput = 0;
141 cd.VtkModulus = 100;
142 cd.StatisticsModulus = 100;
0fde6e45 143 cd.XForce = F(0.00001);
10988083
MW
144 kernelToUse = "push-soa";
145
146 Parameters p;
147 p.nArgs = argc;
148 p.Args = argv;
149 p.nKernelArgs = 0;
150 p.KernelArgs = NULL;
151
152#define LBM_BENCH_KERNELS_VERSION_MAJOR 0
153#define LBM_BENCH_KERNELS_VERSION_MINOR 1
154
155 printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017 LSS, RRZE\n");
156 printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
157 printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
158 printf("\n");
0fde6e45 159 printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
10988083
MW
160 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
161#ifdef VERIFICATION
162 "verification"
163#else
164 "benchmark"
165#endif
166 );
167
168 // ----------------------------------------------------------------------
169 // Parse command line arguments
170
171 #define ARG_IS(param) (!strcmp(argv[i], param))
172 #define NEXT_ARG_PRESENT() \
173 do { \
174 if (i + 1 >= argc) { \
175 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
176 return 1; \
177 } \
178 } while (0)
179
180 for (int i = 1; i < argc; ++i) {
181
182 if (ARG_IS("-dims") || ARG_IS("--dims")) {
183 NEXT_ARG_PRESENT();
184
185
186 if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
187 return 1;
188 }
189 }
190 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
191 // latticeDumpAscii = 1;
192 // }
193 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
194 NEXT_ARG_PRESENT();
195
196 geometryType = argv[++i];
197 }
198 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
199 NEXT_ARG_PRESENT();
200
201 cd.MaxIterations = strtol(argv[++i], NULL, 0);
202
203 if (cd.MaxIterations <= 0) {
204 printf("ERROR: number of iterations must be > 0.\n");
205 return 1;
206 }
207 }
208 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
209 NEXT_ARG_PRESENT();
210
0fde6e45 211 cd.RhoIn = F(strtod(argv[++i], NULL));
10988083
MW
212 }
213 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
214 NEXT_ARG_PRESENT();
215
0fde6e45 216 cd.RhoOut = F(strtod(argv[++i], NULL));
10988083
MW
217 }
218 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
219 NEXT_ARG_PRESENT();
220
0fde6e45 221 cd.Omega = F(strtod(argv[++i], NULL));
10988083
MW
222 }
223 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
224 NEXT_ARG_PRESENT();
225
0fde6e45 226 cd.XForce = F(strtod(argv[++i], NULL));
10988083
MW
227 }
228 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
229#ifdef VERIFICATION
230
231 // Choose this preset for verification. As geometry type "box" is
0fde6e45 232 // used but x and y direction are made periodic.
10988083
MW
233 // Everything else can be altered, but enough iterations should be
234 // performed in order to receive a fully developed flow field.
235 verify = 1;
236
0fde6e45
MW
237 cd.Omega = F(1.0);
238 cd.RhoIn = F(1.0);
239 cd.RhoOut = F(1.0);
10988083
MW
240 geometryType = "box";
241 dims[0] = 16;
242 dims[1] = 16;
243 dims[2] = 16;
0fde6e45 244 cd.XForce = F(0.00001);
10988083
MW
245 cd.MaxIterations = 1000;
246 periodic[0] = 1;
247 periodic[1] = 1;
248 periodic[2] = 0;
249
250 printf("#\n");
251 printf("# VERIFICATION: verifying flow profile of channel flow.\n");
252 printf("#\n");
253
254 // TODO: this is not a good idea as we ignore all other options...
255
256#else
257 printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
258 printf(" Recompile with VERIFICATION=on.\n");
259 return 1;
260#endif
261 }
262 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
263#ifdef VTK_OUTPUT
264
265 cd.VtkOutput = 1;
266
267 // If the next parameter is a number it is used as the itartion count,
268 // if not it is probably another parameter.
269 if (i + 1 < argc) {
270
271 int vtkModulus = atoi(argv[i+1]);
272
273 if (vtkModulus > 0) {
274 cd.VtkModulus = vtkModulus;
275 ++i;
276 }
277 }
278#else
279 printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
280 printf(" Recompile with VTK_OUTPUT=on.\n");
281 return 1;
282#endif
283 }
284 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
285#ifdef STATISTICS
286 NEXT_ARG_PRESENT();
287
288 cd.StatisticsModulus = atoi(argv[++i]);
289
290 if (cd.StatisticsModulus <= 0) {
291 printf("ERROR: the iteration count for -statistics must be > 0.\n");
292 return 1;
293 }
294#else
295 printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
296 printf(" Recompile with STATISTICS=on.\n");
297 return 1;
298#endif
299 }
300 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
301 NEXT_ARG_PRESENT();
302
303 kernelToUse = argv[++i];
304 }
305 else if (ARG_IS("-list") || ARG_IS("--list")) {
306 printf("Available kernels to benchmark:\n");
307
308 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
309 printf(" %s\n", g_kernels[j].Name);
310 }
311
312 return 0;
313 }
314 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
315 NEXT_ARG_PRESENT();
316
317 pinString = argv[++i];
318 }
319 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
320#ifdef _OPENMP
321 NEXT_ARG_PRESENT();
322
323 nThreads = atoi(argv[++i]);
324
325 if (nThreads <= 0) {
326 printf("ERROR: number of threads must be > 0.\n");
327 return 1;
328 }
329#else
330 printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
331 return 1;
332#endif
333 }
334 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
335 periodic[0] = 1;
336 }
337 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
338 periodic[1] = 1;
339 }
340 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
341 periodic[2] = 1;
342 }
343 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
344 printf("ERROR: unknown argument: %s\n", argv[i]);
345 printf("\n");
346 printf("Usage:\n");
347 printf("./lbmbenchk -list\n");
348 printf("./lbmbenchk \n");
349 printf(" [-dims XxYyZ] [-geometry box|channel|pipe|porosity[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
350 printf(" [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
351 printf(" [-periodic-x]\n");
352#ifdef STATISTICS
353 printf(" [-statistics <every-n-iteration>]\n");
354#endif
355#ifdef VTK_OUTPUT
356 printf(" [-vtk [<every-n-iteration>]]\n");
357#endif
358#ifdef _OPENMP
359 printf(" [-t <number of threads>]\n");
360#endif
361 printf(" [-pin core{,core}*]\n");
362#ifdef VERIFICATION
363 printf(" [-verify]\n");
364#endif
365 printf(" -- <kernel specific parameters>\n");
366 printf("\n");
367 printf("-list List available kernels.\n");
368 printf("\n");
369 printf("-dims XxYxZ Specify geometry dimensions.\n");
370 printf("\n");
371 printf("-geometry porosity-<value>\n");
372 printf(" Geometetry with blocks of size <value> regularily layout out.\n");
373 printf("\n");
374 return 1;
375 }
376 else if (ARG_IS("--")) {
377 // printf("# kernel args start with %s these are %d args.\n", argv[i + 1], argc - i - 1);
378 p.KernelArgs = &argv[++i];
379 p.nKernelArgs = argc - i;
380 break;
381 }
382 else {
383 printf("ERROR: unknown parameter: %s.\n", argv[i]);
384 exit(1);
385 }
386 }
387
388 #undef ARG_IS
389 #undef NEXT_ARG_PRESENT
390
391
392 // ----------------------------------------------------------------------
393 // Check if we exceed our index addressing PDFs.
394
395 {
396 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
397
398 if (nPdfs > ((2LU << 31) - 1)) {
399 printf("ERROR: number of PDFs exceed 2^31.\n");
400 exit(1);
401 }
402 }
403
404 // ----------------------------------------------------------------------
405
406#ifdef _OPENMP
407 omp_set_num_threads(nThreads);
408#endif
409
10988083 410 const char * defines[] = {
0fde6e45
MW
411#ifdef DEBUG
412 "DEBUG",
413#endif
10988083
MW
414#ifdef VTK_OUTPUT
415 "VTK_OUTPUT",
416#endif
417#ifdef STATISTICS
418 "STATISTICS",
419#endif
420#ifdef VERIFICATION
421 "VERIFICATION",
422#endif
423#ifdef _OPENMP
424 "_OPENMP",
425#endif
426#ifdef HAVE_LIKWID
427 "HAVE_LIKWID",
0fde6e45
MW
428#endif
429#ifdef INTEL_OPT_DIRECTIVES
430 "INTEL_OPT_DIRECTIVES",
10988083
MW
431#endif
432 };
433
0fde6e45
MW
434 printf("#\n");
435
436#ifdef PRECISION_DP
437 printf("# - floating point: double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT));
438#elif defined(PRECISION_SP)
439 printf("# - floating point: single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT));
440#else
441 printf("# - floating point: UNKNOWN (%lu b)\n", sizeof(PdfT));
442#endif
443
444#ifdef VECTOR_AVX
445 printf("# - intrinsics: AVX (VECTOR_AVX defined)\n");
446#elif defined(VECTOR_SSE)
447 printf("# - intrinsics: SSE (VECTOR_SSE defined)\n");
448#else
449 printf("# - intrinsics: UNKNOWN\n");
450#endif
451
452 printf("# - defines: ");
10988083
MW
453 for (int j = 0; j < N_ELEMS(defines); ++j) {
454 printf("%s ", defines[j]);
455 }
456 printf("\n");
457
0fde6e45
MW
458#ifdef __x86_64__
459 printf("# - fp status: DAZ: %d FTZ: %d\n", FpGetDaz(), FpGetFtz());
460#endif
461
462 printf("# - iterations: %d\n", cd.MaxIterations);
463
464 LatticeDesc ld;
465
466 GeoCreateByStr(geometryType, dims, periodic, &ld);
467
468 printf("# - geometry:\n");
469 printf("# type: %s\n", ld.Name);
470 printf("# dimensions: %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]);
471
472 printf("# nodes total: %d\n", ld.nObst + ld.nFluid);
473 printf("# nodes fluid: %d (including inlet & outlet)\n", ld.nFluid);
474 printf("# nodes obstacles: %d\n", ld.nObst);
475 printf("# nodes inlet: %d\n", ld.nInlet);
476 printf("# nodes outlet: %d\n", ld.nOutlet);
477 printf("# periodicity: x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
10988083
MW
478
479#ifdef VTK_OUTPUT
0fde6e45 480 printf("# - VTK output: %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
10988083
MW
481#endif
482#ifdef STATISTICS
0fde6e45 483 printf("# - statistics: every %d iteration\n", cd.StatisticsModulus);
10988083
MW
484#endif
485
0fde6e45
MW
486 printf("# - flow:\n");
487 printf("# omega: %f\n", cd.Omega);
488 printf("# initial density at inlet/outlet:\n");
489 printf("# rho in: %e\n", cd.RhoIn);
490 printf("# rho out: %e\n", cd.RhoOut);
10988083
MW
491
492#ifdef _OPENMP
0fde6e45 493 printf("# - OpenMP threads: %d\n", omp_get_max_threads());
10988083
MW
494
495 if (pinString != NULL) {
496 #pragma omp parallel
497 {
498 int threadId = omp_get_thread_num();
499 int err;
500
e3f82424 501 err = PinCurrentThreadByCpuList(pinString, threadId);
10988083
MW
502
503 if (err) {
504 printf("ERROR [thread %d]: pinning failed.\n", threadId);
505 exit(1);
506 }
507
508 const char * cpuList = PinCpuListAsString();
509 Assert(cpuList != NULL);
510
511 // Not so nice hack to print the thread ids ordered.
512 #pragma omp for ordered
513 for (int i = 0; i < omp_get_num_threads(); ++i) {
514 #pragma omp ordered
0fde6e45 515 printf("# thread %2d pinned to core(s): %s\n", threadId, cpuList);
10988083
MW
516 }
517
518 free((void *)cpuList);
519 }
520 }
521#endif
522
523 KernelData * kd;
524
525 KernelFunctions * kf = NULL;
526
527 if (kernelToUse == NULL) {
528 kf = &g_kernels[0];
529 }
530 else {
531 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
532
533 if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
534 kf = &g_kernels[j];
535 break;
536 }
537 }
538 }
539
540 if (kf == NULL) {
541 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
542 exit(1);
543 }
544
545 printf("#\n");
0fde6e45 546 printf("# - kernel: %s\n", kf->Name);
10988083
MW
547 printf("#\n");
548
549 // Initialize kernel by calling its own initialization function
550 kf->Init(&ld, &kd, &p);
551
552#ifdef VERIFICATION
553 if (verify) {
554 KernelSetInitialDensity( &ld, kd, &cd);
555 KernelSetInitialVelocity(&ld, kd, &cd);
556 }
557#endif
558
559 printf("# starting kernel...\n");
560
561 X_LIKWID_INIT();
562
563 double timeStart = Time();
564
565 // Call the LBM kernel
566 kd->Kernel(&ld, kd, &cd);
567
568 double duration = Time() - timeStart;
569
570 X_LIKWID_DEINIT();
571
572 // Print some statistics...
573 KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
574
575#ifdef VERIFICATION
576 PdfT errorNorm = -1.0;
577 KernelVerifiy(&ld, kd, &cd, &errorNorm);
578#endif
579
580 // Deinitialize kernel by calling its own deinitialization function
581 kf->Deinit(&ld, &kd);
582
583
584 double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
585
0fde6e45 586 printf("P: %f MFLUP/s t: %d d: %f s iter: %d fnodes: %f x1e6 geo: %s kernel: %s %s %s\n",
10988083
MW
587 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
588 geometryType, kernelToUse,
589#ifdef VERIFICATION
0fde6e45
MW
590 "VERIFICATION",
591#else
592 "B",
593#endif
594#ifdef PRECISION_DP
595 "dp"
596#elif defined(PRECISION_SP)
597 "sp"
10988083 598#else
0fde6e45 599 "unknown-precision"
10988083
MW
600#endif
601 );
602
603 int exitCode = 0;
604
605#ifdef VERIFICATION
606
607 if (verify) {
608 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
609
610 if (errorNorm > 0.1) {
611 printf("# VERIFICATION FAILED.\n");
612 exitCode = 1;
613 }
614 else {
615 printf("# VERIFICATION SUCCEEDED.\n");
616 }
617 }
618#else
619// printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
620// printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
621#endif
622
623 MemFree((void **)&ld.Lattice);
624
625 return exitCode;
626}
This page took 0.101589 seconds and 5 git commands to generate.