merge with kernels from MH's master thesis
[LbmBenchmarkKernelsPublic.git] / src / Main.c
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //   Michael Hussnaetter, 2017-2018
12 //   University of Erlangen-Nuremberg, Germany
13 //   michael.hussnaetter -at- fau.de
14 //
15 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
16 //
17 //  LbmBenchKernels is free software: you can redistribute it and/or modify
18 //  it under the terms of the GNU General Public License as published by
19 //  the Free Software Foundation, either version 3 of the License, or
20 //  (at your option) any later version.
21 //
22 //  LbmBenchKernels is distributed in the hope that it will be useful,
23 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
24 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25 //  GNU General Public License for more details.
26 //
27 //  You should have received a copy of the GNU General Public License
28 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
29 //
30 // --------------------------------------------------------------------------
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <strings.h>  // strcasecmp
35
36 #include <inttypes.h>
37
38 #ifdef _OPENMP
39 #include <omp.h>
40 #endif
41
42 #include "Base.h"
43 #include "Kernel.h"
44 #include "Memory.h"
45
46 #include "Lattice.h"
47 #include "Geometry.h"
48 #include "Pinning.h"
49 #include "LikwidIf.h"
50
51 #include "KernelFunctions.h"
52
53 #ifdef __x86_64__
54         #include <xmmintrin.h>
55
56
57         #define MXCSR_DAZ                                       6
58         #define MXCSR_FTZ                                       15
59
60
61         int FpIsMxCsrMaskSet(unsigned int mask)
62         {
63                 unsigned int mxcsr;
64                 unsigned int mxcsrNew;
65
66                 mxcsr = _mm_getcsr();
67
68                 mxcsrNew = mxcsr & mask;
69
70                 return (mxcsrNew == mask);
71         }
72
73         int FpGetFtz()
74         {
75                 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
76         }
77
78         int FpGetDaz()
79         {
80                 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
81         }
82 #endif
83
84
85 int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
86 {
87         char * tmp;
88
89         *nX = atoi(parameter);
90
91         if (*nX <= 0) {
92                 printf("ERROR: parameter for X dimension must be > 0.\n");
93                 return 0;
94         }
95
96         tmp = strchr(parameter, 'x');
97
98         if (tmp == NULL) {
99                 printf("ERROR: parameter for Y dimension is missing.\n");
100                 return 0;
101         }
102
103         *nY = atoi(tmp + 1);
104
105         if (*nY <= 0) {
106                 printf("ERROR: parameter for Y dimension must be > 0.\n");
107                 return 0;
108         }
109
110         tmp = strchr(tmp + 1, 'x');
111
112         if (tmp == NULL) {
113                 printf("ERROR: parameter for Z dimension is missing.\n");
114                 return 0;
115         }
116
117         *nZ = atoi(tmp + 1);
118
119         if (*nZ <= 0) {
120                 printf("ERROR: parameter for Z dimension must be > 0.\n");
121                 return 0;
122         }
123
124         return 1;
125 }
126
127 int main(int argc, char * argv[])
128 {
129         int dims[3] = { 20, 20, 20 };           // Dimensions in x, y, and z direction
130         const char * geometryType = "channel";
131         // int latticeDumpAscii = 0;
132         int verify = 0; UNUSED(verify);
133         char * kernelToUse = NULL;
134         int nThreads = 1;
135         const char * pinString = NULL;
136         int periodic[3] = { 0 };
137
138         CaseData cd;
139
140         cd.MaxIterations                = 10;
141         cd.RhoIn                                = F(1.0);
142         cd.RhoOut                               = F(1.0);
143         cd.Omega                                = F(1.0);
144         cd.VtkOutput                    = 0;
145         cd.VtkModulus                   = 100;
146         cd.StatisticsModulus    = 100;
147         cd.XForce                               = F(0.00001);
148         kernelToUse                             = "push-soa";
149
150         Parameters p;
151         p.nArgs        = argc;
152         p.Args         = argv;
153         p.nKernelArgs  = 0;
154         p.KernelArgs   = NULL;
155
156 #define LBM_BENCH_KERNELS_VERSION_MAJOR 0
157 #define LBM_BENCH_KERNELS_VERSION_MINOR 1
158
159     printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017, 2018 LSS, RRZE\n");
160     printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
161         printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
162         printf("\n");
163         printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
164                 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
165 #ifdef VERIFICATION
166                 "verification"
167 #else
168                 "benchmark"
169 #endif
170         );
171
172         // ----------------------------------------------------------------------
173         // Parse command line arguments
174
175         #define ARG_IS(param)                   (!strcmp(argv[i], param))
176         #define NEXT_ARG_PRESENT() \
177                 do { \
178                         if (i + 1 >= argc) { \
179                                 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
180                                 return 1; \
181                         } \
182                 } while (0)
183
184     for (int i = 1; i < argc; ++i) {
185
186         if (ARG_IS("-dims") || ARG_IS("--dims")) {
187             NEXT_ARG_PRESENT();
188
189
190             if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
191                 return 1;
192             }
193         }
194                 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
195                 //      latticeDumpAscii = 1;
196                 // }
197                 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
198                         NEXT_ARG_PRESENT();
199
200                         geometryType = argv[++i];
201                 }
202                 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
203                         NEXT_ARG_PRESENT();
204
205                         cd.MaxIterations = strtol(argv[++i], NULL, 0);
206
207                         if (cd.MaxIterations <= 0) {
208                                 printf("ERROR: number of iterations must be > 0.\n");
209                                 return 1;
210                         }
211                 }
212                 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
213                         NEXT_ARG_PRESENT();
214
215                         cd.RhoIn = F(strtod(argv[++i], NULL));
216                 }
217                 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
218                         NEXT_ARG_PRESENT();
219
220                         cd.RhoOut = F(strtod(argv[++i], NULL));
221                 }
222                 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
223                         NEXT_ARG_PRESENT();
224
225                         cd.Omega = F(strtod(argv[++i], NULL));
226                 }
227                 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
228                         NEXT_ARG_PRESENT();
229
230                         cd.XForce = F(strtod(argv[++i], NULL));
231                 }
232                 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
233 #ifdef VERIFICATION
234
235                         // Choose this preset for verification. As geometry type "box" is
236                         // used but x and y direction are made periodic.
237                         // Everything else can be altered, but enough iterations should be
238                         // performed in order to receive a fully developed flow field.
239                         verify = 1;
240
241                         cd.Omega  = F(1.0);
242                         cd.RhoIn  = F(1.0);
243                         cd.RhoOut = F(1.0);
244                         geometryType = "box";
245                         dims[0] = 16;
246                         dims[1] = 16;
247                         dims[2] = 16;
248                         cd.XForce = F(0.00001);
249                         cd.MaxIterations = 1000;
250                         periodic[0] = 1;
251                         periodic[1] = 1;
252                         periodic[2] = 0;
253
254                         printf("#\n");
255                         printf("# VERIFICATION: verifying flow profile of channel flow.\n");
256                         printf("#\n");
257
258                         // TODO: this is not a good idea as we ignore all other options...
259
260 #else
261                         printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
262                         printf("       Recompile with VERIFICATION=on.\n");
263                         return 1;
264 #endif
265                 }
266                 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
267 #ifdef VTK_OUTPUT
268
269                         cd.VtkOutput = 1;
270
271                         // If the next parameter is a number it is used as the itartion count,
272                         // if not it is probably another parameter.
273                         if (i + 1 < argc) {
274
275                                 int vtkModulus = atoi(argv[i+1]);
276
277                                 if (vtkModulus > 0) {
278                                         cd.VtkModulus = vtkModulus;
279                                         ++i;
280                                 }
281                         }
282 #else
283                         printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
284                         printf("       Recompile with VTK_OUTPUT=on.\n");
285                         return 1;
286 #endif
287                 }
288                 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
289 #ifdef STATISTICS
290                         NEXT_ARG_PRESENT();
291
292                         cd.StatisticsModulus = atoi(argv[++i]);
293
294                         if (cd.StatisticsModulus <= 0) {
295                                 printf("ERROR: the iteration count for -statistics must be > 0.\n");
296                                 return 1;
297                         }
298 #else
299                         printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
300                         printf("       Recompile with STATISTICS=on.\n");
301                         return 1;
302 #endif
303                 }
304                 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
305                         NEXT_ARG_PRESENT();
306
307                         kernelToUse = argv[++i];
308                 }
309                 else if (ARG_IS("-list") || ARG_IS("--list")) {
310                         printf("Available kernels to benchmark:\n");
311
312                         for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
313                                 printf("   %s\n", g_kernels[j].Name);
314                         }
315
316                         return 0;
317                 }
318                 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
319                         NEXT_ARG_PRESENT();
320
321                         pinString = argv[++i];
322                 }
323                 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
324 #ifdef _OPENMP
325                         NEXT_ARG_PRESENT();
326
327                         nThreads = atoi(argv[++i]);
328
329                         if (nThreads <= 0) {
330                                 printf("ERROR: number of threads must be > 0.\n");
331                                 return 1;
332                         }
333 #else
334                         printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
335                         return 1;
336 #endif
337                 }
338                 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
339                         periodic[0] = 1;
340                 }
341                 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
342                         periodic[1] = 1;
343                 }
344                 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
345                         periodic[2] = 1;
346                 }
347                 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
348                         printf("ERROR: unknown argument: %s\n", argv[i]);
349                         printf("\n");
350                         printf("Usage:\n");
351                         printf("./lbmbenchk -list\n");
352                         printf("./lbmbenchk \n");
353                         printf("      [-dims XxYyZ] [-geometry box|channel|pipe|blocks[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
354                         printf("      [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
355                         printf("      [-periodic-x]\n");
356 #ifdef STATISTICS
357                         printf("      [-statistics <every-n-iteration>]\n");
358 #endif
359 #ifdef VTK_OUTPUT
360                         printf("      [-vtk [<every-n-iteration>]]\n");
361 #endif
362 #ifdef _OPENMP
363                         printf("      [-t <number of threads>]\n");
364 #endif
365                         printf("      [-pin core{,core}*]\n");
366 #ifdef VERIFICATION
367                         printf("      [-verify]\n");
368 #endif
369                         printf("      -- <kernel specific parameters>\n");
370                         printf("\n");
371                         printf("-list           List available kernels.\n");
372                         printf("\n");
373                         printf("-dims XxYxZ             Specify geometry dimensions.\n");
374                         printf("\n");
375                         printf("-geometry blocks-<value>\n");
376                         printf("                Geometetry with blocks of size <value> regularily layout out.\n");
377                         printf("\n");
378                         return 1;
379                 }
380                 else if (ARG_IS("--")) {
381                         // printf("# kernel args start with %s  these are %d args.\n", argv[i + 1], argc - i - 1);
382                         p.KernelArgs = &argv[++i];
383                         p.nKernelArgs = argc - i;
384                         break;
385                 }
386                 else {
387                         printf("ERROR: unknown parameter: %s.\n", argv[i]);
388                         exit(1);
389                 }
390         }
391
392         #undef ARG_IS
393         #undef NEXT_ARG_PRESENT
394
395
396         // ----------------------------------------------------------------------
397         // Check if we exceed our index addressing PDFs.
398
399         {
400                 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
401
402                 if (nPdfs > ((2LU << 31) - 1)) {
403                         printf("ERROR: number of PDFs exceed 2^31.\n");
404                         exit(1);
405                 }
406         }
407
408         // ----------------------------------------------------------------------
409
410 #ifdef _OPENMP
411         omp_set_num_threads(nThreads);
412 #endif
413
414         const char * defines[] = {
415 #ifdef DEBUG
416         "DEBUG",
417 #endif
418 #ifdef VTK_OUTPUT
419         "VTK_OUTPUT",
420 #endif
421 #ifdef STATISTICS
422         "STATISTICS",
423 #endif
424 #ifdef VERIFICATION
425         "VERIFICATION",
426 #endif
427 #ifdef _OPENMP
428         "_OPENMP",
429 #endif
430 #ifdef HAVE_LIKWID
431         "HAVE_LIKWID",
432 #endif
433 #ifdef INTEL_OPT_DIRECTIVES
434         "INTEL_OPT_DIRECTIVES",
435 #endif
436         };
437
438         printf("#\n");
439
440 #ifdef PRECISION_DP
441         printf("# - floating point:    double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT));
442 #elif defined(PRECISION_SP)
443         printf("# - floating point:    single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT));
444 #else
445         printf("# - floating point:    UNKNOWN (%lu b)\n", sizeof(PdfT));
446 #endif
447
448 #if defined(VECTOR_AVX512)
449         printf("# - intrinsics:        AVX512 (VECTOR_AVX512 defined)\n");
450 #elif defined(VECTOR_AVX)
451         printf("# - intrinsics:        AVX (VECTOR_AVX defined)\n");
452 #elif defined(VECTOR_SSE)
453         printf("# - intrinsics:        SSE (VECTOR_SSE defined)\n");
454 #else
455         printf("# - intrinsics:        UNKNOWN\n");
456 #endif
457
458         printf("# - defines:           ");
459         for (int j = 0; j < N_ELEMS(defines); ++j) {
460                 printf("%s ", defines[j]);
461         }
462         printf("\n");
463
464 #ifdef __x86_64__
465         printf("# - fp status:         DAZ: %d  FTZ: %d\n", FpGetDaz(), FpGetFtz());
466 #endif
467
468         printf("# - iterations:        %d\n", cd.MaxIterations);
469
470         LatticeDesc ld;
471
472         GeoCreateByStr(geometryType, dims, periodic, &ld);
473
474         printf("# - geometry:\n");
475         printf("#   type:              %s\n", ld.Name);
476         printf("#   dimensions:        %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]);
477
478         printf("#   nodes total:       % 10d\n", ld.nObst + ld.nFluid);
479         printf("#   nodes fluid:       % 10d (including inlet & outlet)\n", ld.nFluid);
480         printf("#   nodes obstacles:   % 10d\n", ld.nObst);
481         printf("#   nodes inlet:       % 10d\n", ld.nInlet);
482         printf("#   nodes outlet:      % 10d\n", ld.nOutlet);
483         printf("#   periodicity:       x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
484
485 #ifdef VTK_OUTPUT
486         printf("# - VTK output:        %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
487 #endif
488 #ifdef STATISTICS
489         printf("# - statistics:        every %d iteration\n", cd.StatisticsModulus);
490 #endif
491
492         printf("# - flow:\n");
493         printf("#   omega:             %f\n", cd.Omega);
494         printf("#   initial density at inlet/outlet:\n");
495         printf("#     rho in:          %e\n", cd.RhoIn);
496     printf("#     rho out:         %e\n", cd.RhoOut);
497
498 #ifdef _OPENMP
499         printf("# - OpenMP threads:    %d\n", omp_get_max_threads());
500
501         if (pinString != NULL) {
502                 #pragma omp parallel
503                 {
504                         int threadId = omp_get_thread_num();
505                         int err;
506
507                         err = PinCurrentThreadByCpuList(pinString, threadId);
508
509                         if (err) {
510                                 printf("ERROR [thread %d]: pinning failed.\n", threadId);
511                                 exit(1);
512                         }
513
514                         const char * cpuList = PinCpuListAsString();
515                         Assert(cpuList != NULL);
516
517                         // Not so nice hack to print the thread ids ordered.
518                         #pragma omp for ordered
519                         for (int i = 0; i < omp_get_num_threads(); ++i) {
520                                 #pragma omp ordered
521                                 printf("#   thread %2d  pinned to core(s):  %s\n", threadId, cpuList);
522                         }
523
524                         free((void *)cpuList);
525                 }
526         }
527 #endif
528
529         KernelData * kd;
530
531         KernelFunctions * kf = NULL;
532
533         if (kernelToUse == NULL) {
534                 kf = &g_kernels[0];
535         }
536         else {
537                 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
538
539                         if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
540                                 kf = &g_kernels[j];
541                                 break;
542                         }
543                 }
544         }
545
546         if (kf == NULL) {
547                 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
548                 exit(1);
549         }
550
551         printf("#\n");
552         printf("# - kernel:            %s\n", kf->Name);
553         printf("#\n");
554
555         // Initialize kernel by calling its own initialization function
556         kf->Init(&ld, &kd, &p);
557
558 #ifdef VERIFICATION
559         if (verify) {
560                 KernelSetInitialDensity( &ld, kd, &cd);
561                 KernelSetInitialVelocity(&ld, kd, &cd);
562         }
563 #endif
564
565         printf("# starting kernel...\n");
566
567         X_LIKWID_INIT();
568
569         // Call the LBM kernel
570         kd->Kernel(&ld, kd, &cd);
571
572         X_LIKWID_DEINIT();
573
574         // Print some statistics...
575         KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
576
577 #ifdef VERIFICATION
578         PdfT errorNorm = -1.0;
579         KernelVerifiy(&ld, kd, &cd, &errorNorm);
580 #endif
581
582         double duration                         = kd->Duration;
583         double loopBalance                      = kd->LoopBalance;
584         double dataVolGByte             = loopBalance * ld.nFluid * cd.MaxIterations / 1024. / 1024. / 1024.;
585         double bandwidthGBytePerS       = dataVolGByte / duration;
586
587         // Deinitialize kernel by calling its own deinitialization function
588         kf->Deinit(&ld, &kd);
589
590         double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
591
592         printf("#\n");
593         printf("# Evaluation Stats\n");
594 #ifdef VERIFICATION
595         printf("#   runtype:     \t%s  \n", "verification");
596 #else
597         printf("#   runtype:     \t%s  \n", "benchmark");
598 #endif
599         printf("#   runtime:           \t%.3f s\n", duration);
600         printf("#   iterations:        \t%d  \n", cd.MaxIterations);
601         printf("#   fluid cells:       \t%d \n", ld.nFluid);
602         printf("# Derived metrics\n");
603         printf("#   MEM data vol.:     \t%.2f GByte\n", dataVolGByte);
604         printf("#   MEM bandwidth:     \t%.2f GByte/s\n", bandwidthGBytePerS);
605         printf("#   performance:       \t%.3f MFLUP/s\n", perf);
606
607
608         printf("P:   %f MFLUP/s  t: %d  d: %f s  iter: %d  fnodes: %f x1e6  geo: %s  kernel: %s  %s  %s\n",
609                 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
610                 geometryType, kernelToUse,
611 #ifdef VERIFICATION
612                 "VERIFICATION",
613 #else
614                 "B",
615 #endif
616 #ifdef PRECISION_DP
617                 "dp"
618 #elif defined(PRECISION_SP)
619                 "sp"
620 #else
621                 "unknown-precision"
622 #endif
623         );
624
625         int exitCode = 0;
626
627 #ifdef VERIFICATION
628
629         if (verify) {
630                 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
631
632                 if (errorNorm > 0.1) {
633                         printf("# VERIFICATION FAILED.\n");
634                         exitCode = 1;
635                 }
636                 else {
637                         printf("# VERIFICATION SUCCEEDED.\n");
638                 }
639         }
640 #else
641 //      printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
642 //      printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
643 #endif
644
645         MemFree((void **)&ld.Lattice);
646
647         return exitCode;
648 }
This page took 0.084293 seconds and 4 git commands to generate.