10dfbbe9c16367d2eb6897ac3ac0ff1110a81115
[LbmBenchmarkKernelsPublic.git] / src / Main.c
1 // --------------------------------------------------------------------------
2 //
3 // Copyright
4 //   Markus Wittmann, 2016-2017
5 //   RRZE, University of Erlangen-Nuremberg, Germany
6 //   markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
7 //
8 //   Viktor Haag, 2016
9 //   LSS, University of Erlangen-Nuremberg, Germany
10 //
11 //  This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
12 //
13 //  LbmBenchKernels is free software: you can redistribute it and/or modify
14 //  it under the terms of the GNU General Public License as published by
15 //  the Free Software Foundation, either version 3 of the License, or
16 //  (at your option) any later version.
17 //
18 //  LbmBenchKernels is distributed in the hope that it will be useful,
19 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
20 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
21 //  GNU General Public License for more details.
22 //
23 //  You should have received a copy of the GNU General Public License
24 //  along with LbmBenchKernels.  If not, see <http://www.gnu.org/licenses/>.
25 //
26 // --------------------------------------------------------------------------
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <strings.h>  // strcasecmp
31
32 #include <inttypes.h>
33
34 #ifdef _OPENMP
35 #include <omp.h>
36 #endif
37
38 #include "Base.h"
39 #include "Kernel.h"
40 #include "Memory.h"
41
42 #include "Lattice.h"
43 #include "Geometry.h"
44 #include "Pinning.h"
45 #include "LikwidIf.h"
46
47 #include "KernelFunctions.h"
48
49 #ifdef __x86_64__
50         #include <xmmintrin.h>
51
52
53         #define MXCSR_DAZ                                       6
54         #define MXCSR_FTZ                                       15
55
56
57         int FpIsMxCsrMaskSet(unsigned int mask)
58         {
59                 unsigned int mxcsr;
60                 unsigned int mxcsrNew;
61
62                 mxcsr = _mm_getcsr();
63
64                 mxcsrNew = mxcsr & mask;
65
66                 return (mxcsrNew == mask);
67         }
68
69         int FpGetFtz()
70         {
71                 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
72         }
73
74         int FpGetDaz()
75         {
76                 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
77         }
78 #endif
79
80
81 int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
82 {
83         char * tmp;
84
85         *nX = atoi(parameter);
86
87         if (*nX <= 0) {
88                 printf("ERROR: parameter for X dimension must be > 0.\n");
89                 return 0;
90         }
91
92         tmp = strchr(parameter, 'x');
93
94         if (tmp == NULL) {
95                 printf("ERROR: parameter for Y dimension is missing.\n");
96                 return 0;
97         }
98
99         *nY = atoi(tmp + 1);
100
101         if (*nY <= 0) {
102                 printf("ERROR: parameter for Y dimension must be > 0.\n");
103                 return 0;
104         }
105
106         tmp = strchr(tmp + 1, 'x');
107
108         if (tmp == NULL) {
109                 printf("ERROR: parameter for Z dimension is missing.\n");
110                 return 0;
111         }
112
113         *nZ = atoi(tmp + 1);
114
115         if (*nZ <= 0) {
116                 printf("ERROR: parameter for Z dimension must be > 0.\n");
117                 return 0;
118         }
119
120         return 1;
121 }
122
123 int main(int argc, char * argv[])
124 {
125         int dims[3] = { 20, 20, 20 };           // Dimensions in x, y, and z direction
126         const char * geometryType = "channel";
127         // int latticeDumpAscii = 0;
128         int verify = 0; UNUSED(verify);
129         char * kernelToUse = NULL;
130         int nThreads = 1;
131         const char * pinString = NULL;
132         int periodic[3] = { 0 };
133
134         CaseData cd;
135
136         cd.MaxIterations                = 10;
137         cd.RhoIn                                = F(1.0);
138         cd.RhoOut                               = F(1.0);
139         cd.Omega                                = F(1.0);
140         cd.VtkOutput                    = 0;
141         cd.VtkModulus                   = 100;
142         cd.StatisticsModulus    = 100;
143         cd.XForce                               = F(0.00001);
144         kernelToUse                             = "push-soa";
145
146         Parameters p;
147         p.nArgs        = argc;
148         p.Args         = argv;
149         p.nKernelArgs  = 0;
150         p.KernelArgs   = NULL;
151
152 #define LBM_BENCH_KERNELS_VERSION_MAJOR 0
153 #define LBM_BENCH_KERNELS_VERSION_MINOR 1
154
155     printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017 LSS, RRZE\n");
156     printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
157         printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
158         printf("\n");
159         printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
160                 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
161 #ifdef VERIFICATION
162                 "verification"
163 #else
164                 "benchmark"
165 #endif
166         );
167
168         // ----------------------------------------------------------------------
169         // Parse command line arguments
170
171         #define ARG_IS(param)                   (!strcmp(argv[i], param))
172         #define NEXT_ARG_PRESENT() \
173                 do { \
174                         if (i + 1 >= argc) { \
175                                 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
176                                 return 1; \
177                         } \
178                 } while (0)
179
180     for (int i = 1; i < argc; ++i) {
181
182         if (ARG_IS("-dims") || ARG_IS("--dims")) {
183             NEXT_ARG_PRESENT();
184
185
186             if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
187                 return 1;
188             }
189         }
190                 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
191                 //      latticeDumpAscii = 1;
192                 // }
193                 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
194                         NEXT_ARG_PRESENT();
195
196                         geometryType = argv[++i];
197                 }
198                 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
199                         NEXT_ARG_PRESENT();
200
201                         cd.MaxIterations = strtol(argv[++i], NULL, 0);
202
203                         if (cd.MaxIterations <= 0) {
204                                 printf("ERROR: number of iterations must be > 0.\n");
205                                 return 1;
206                         }
207                 }
208                 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
209                         NEXT_ARG_PRESENT();
210
211                         cd.RhoIn = F(strtod(argv[++i], NULL));
212                 }
213                 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
214                         NEXT_ARG_PRESENT();
215
216                         cd.RhoOut = F(strtod(argv[++i], NULL));
217                 }
218                 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
219                         NEXT_ARG_PRESENT();
220
221                         cd.Omega = F(strtod(argv[++i], NULL));
222                 }
223                 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
224                         NEXT_ARG_PRESENT();
225
226                         cd.XForce = F(strtod(argv[++i], NULL));
227                 }
228                 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
229 #ifdef VERIFICATION
230
231                         // Choose this preset for verification. As geometry type "box" is
232                         // used but x and y direction are made periodic.
233                         // Everything else can be altered, but enough iterations should be
234                         // performed in order to receive a fully developed flow field.
235                         verify = 1;
236
237                         cd.Omega  = F(1.0);
238                         cd.RhoIn  = F(1.0);
239                         cd.RhoOut = F(1.0);
240                         geometryType = "box";
241                         dims[0] = 16;
242                         dims[1] = 16;
243                         dims[2] = 16;
244                         cd.XForce = F(0.00001);
245                         cd.MaxIterations = 1000;
246                         periodic[0] = 1;
247                         periodic[1] = 1;
248                         periodic[2] = 0;
249
250                         printf("#\n");
251                         printf("# VERIFICATION: verifying flow profile of channel flow.\n");
252                         printf("#\n");
253
254                         // TODO: this is not a good idea as we ignore all other options...
255
256 #else
257                         printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
258                         printf("       Recompile with VERIFICATION=on.\n");
259                         return 1;
260 #endif
261                 }
262                 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
263 #ifdef VTK_OUTPUT
264
265                         cd.VtkOutput = 1;
266
267                         // If the next parameter is a number it is used as the itartion count,
268                         // if not it is probably another parameter.
269                         if (i + 1 < argc) {
270
271                                 int vtkModulus = atoi(argv[i+1]);
272
273                                 if (vtkModulus > 0) {
274                                         cd.VtkModulus = vtkModulus;
275                                         ++i;
276                                 }
277                         }
278 #else
279                         printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
280                         printf("       Recompile with VTK_OUTPUT=on.\n");
281                         return 1;
282 #endif
283                 }
284                 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
285 #ifdef STATISTICS
286                         NEXT_ARG_PRESENT();
287
288                         cd.StatisticsModulus = atoi(argv[++i]);
289
290                         if (cd.StatisticsModulus <= 0) {
291                                 printf("ERROR: the iteration count for -statistics must be > 0.\n");
292                                 return 1;
293                         }
294 #else
295                         printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
296                         printf("       Recompile with STATISTICS=on.\n");
297                         return 1;
298 #endif
299                 }
300                 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
301                         NEXT_ARG_PRESENT();
302
303                         kernelToUse = argv[++i];
304                 }
305                 else if (ARG_IS("-list") || ARG_IS("--list")) {
306                         printf("Available kernels to benchmark:\n");
307
308                         for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
309                                 printf("   %s\n", g_kernels[j].Name);
310                         }
311
312                         return 0;
313                 }
314                 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
315                         NEXT_ARG_PRESENT();
316
317                         pinString = argv[++i];
318                 }
319                 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
320 #ifdef _OPENMP
321                         NEXT_ARG_PRESENT();
322
323                         nThreads = atoi(argv[++i]);
324
325                         if (nThreads <= 0) {
326                                 printf("ERROR: number of threads must be > 0.\n");
327                                 return 1;
328                         }
329 #else
330                         printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
331                         return 1;
332 #endif
333                 }
334                 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
335                         periodic[0] = 1;
336                 }
337                 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
338                         periodic[1] = 1;
339                 }
340                 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
341                         periodic[2] = 1;
342                 }
343                 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
344                         printf("ERROR: unknown argument: %s\n", argv[i]);
345                         printf("\n");
346                         printf("Usage:\n");
347                         printf("./lbmbenchk -list\n");
348                         printf("./lbmbenchk \n");
349                         printf("      [-dims XxYyZ] [-geometry box|channel|pipe|porosity[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
350                         printf("      [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
351                         printf("      [-periodic-x]\n");
352 #ifdef STATISTICS
353                         printf("      [-statistics <every-n-iteration>]\n");
354 #endif
355 #ifdef VTK_OUTPUT
356                         printf("      [-vtk [<every-n-iteration>]]\n");
357 #endif
358 #ifdef _OPENMP
359                         printf("      [-t <number of threads>]\n");
360 #endif
361                         printf("      [-pin core{,core}*]\n");
362 #ifdef VERIFICATION
363                         printf("      [-verify]\n");
364 #endif
365                         printf("      -- <kernel specific parameters>\n");
366                         printf("\n");
367                         printf("-list           List available kernels.\n");
368                         printf("\n");
369                         printf("-dims XxYxZ             Specify geometry dimensions.\n");
370                         printf("\n");
371                         printf("-geometry porosity-<value>\n");
372                         printf("                Geometetry with blocks of size <value> regularily layout out.\n");
373                         printf("\n");
374                         return 1;
375                 }
376                 else if (ARG_IS("--")) {
377                         // printf("# kernel args start with %s  these are %d args.\n", argv[i + 1], argc - i - 1);
378                         p.KernelArgs = &argv[++i];
379                         p.nKernelArgs = argc - i;
380                         break;
381                 }
382                 else {
383                         printf("ERROR: unknown parameter: %s.\n", argv[i]);
384                         exit(1);
385                 }
386         }
387
388         #undef ARG_IS
389         #undef NEXT_ARG_PRESENT
390
391
392         // ----------------------------------------------------------------------
393         // Check if we exceed our index addressing PDFs.
394
395         {
396                 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
397
398                 if (nPdfs > ((2LU << 31) - 1)) {
399                         printf("ERROR: number of PDFs exceed 2^31.\n");
400                         exit(1);
401                 }
402         }
403
404         // ----------------------------------------------------------------------
405
406 #ifdef _OPENMP
407         omp_set_num_threads(nThreads);
408 #endif
409
410         const char * defines[] = {
411 #ifdef DEBUG
412         "DEBUG",
413 #endif
414 #ifdef VTK_OUTPUT
415         "VTK_OUTPUT",
416 #endif
417 #ifdef STATISTICS
418         "STATISTICS",
419 #endif
420 #ifdef VERIFICATION
421         "VERIFICATION",
422 #endif
423 #ifdef _OPENMP
424         "_OPENMP",
425 #endif
426 #ifdef HAVE_LIKWID
427         "HAVE_LIKWID",
428 #endif
429 #ifdef INTEL_OPT_DIRECTIVES
430         "INTEL_OPT_DIRECTIVES",
431 #endif
432         };
433
434         printf("#\n");
435
436 #ifdef PRECISION_DP
437         printf("# - floating point:    double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT));
438 #elif defined(PRECISION_SP)
439         printf("# - floating point:    single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT));
440 #else
441         printf("# - floating point:    UNKNOWN (%lu b)\n", sizeof(PdfT));
442 #endif
443
444 #ifdef VECTOR_AVX
445         printf("# - intrinsics:        AVX (VECTOR_AVX defined)\n");
446 #elif defined(VECTOR_SSE)
447         printf("# - intrinsics:        SSE (VECTOR_SSE defined)\n");
448 #else
449         printf("# - intrinsics:        UNKNOWN\n");
450 #endif
451
452         printf("# - defines:           ");
453         for (int j = 0; j < N_ELEMS(defines); ++j) {
454                 printf("%s ", defines[j]);
455         }
456         printf("\n");
457
458 #ifdef __x86_64__
459         printf("# - fp status:         DAZ: %d  FTZ: %d\n", FpGetDaz(), FpGetFtz());
460 #endif
461
462         printf("# - iterations:        %d\n", cd.MaxIterations);
463
464         LatticeDesc ld;
465
466         GeoCreateByStr(geometryType, dims, periodic, &ld);
467
468         printf("# - geometry:\n");
469         printf("#   type:              %s\n", ld.Name);
470         printf("#   dimensions:        %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]);
471
472         printf("#   nodes total:       %d\n", ld.nObst + ld.nFluid);
473         printf("#   nodes fluid:       %d (including inlet & outlet)\n", ld.nFluid);
474         printf("#   nodes obstacles:   %d\n", ld.nObst);
475         printf("#   nodes inlet:       %d\n", ld.nInlet);
476         printf("#   nodes outlet:      %d\n", ld.nOutlet);
477         printf("#   periodicity:       x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
478
479 #ifdef VTK_OUTPUT
480         printf("# - VTK output:        %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
481 #endif
482 #ifdef STATISTICS
483         printf("# - statistics:        every %d iteration\n", cd.StatisticsModulus);
484 #endif
485
486         printf("# - flow:\n");
487         printf("#   omega:             %f\n", cd.Omega);
488         printf("#   initial density at inlet/outlet:\n");
489         printf("#     rho in:          %e\n", cd.RhoIn);
490     printf("#     rho out:         %e\n", cd.RhoOut);
491
492 #ifdef _OPENMP
493         printf("# - OpenMP threads:    %d\n", omp_get_max_threads());
494
495         if (pinString != NULL) {
496                 #pragma omp parallel
497                 {
498                         int threadId = omp_get_thread_num();
499                         int err;
500
501                         err = PinCurrentThreadByCpuList(pinString, threadId);
502
503                         if (err) {
504                                 printf("ERROR [thread %d]: pinning failed.\n", threadId);
505                                 exit(1);
506                         }
507
508                         const char * cpuList = PinCpuListAsString();
509                         Assert(cpuList != NULL);
510
511                         // Not so nice hack to print the thread ids ordered.
512                         #pragma omp for ordered
513                         for (int i = 0; i < omp_get_num_threads(); ++i) {
514                                 #pragma omp ordered
515                                 printf("#   thread %2d  pinned to core(s):  %s\n", threadId, cpuList);
516                         }
517
518                         free((void *)cpuList);
519                 }
520         }
521 #endif
522
523         KernelData * kd;
524
525         KernelFunctions * kf = NULL;
526
527         if (kernelToUse == NULL) {
528                 kf = &g_kernels[0];
529         }
530         else {
531                 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
532
533                         if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
534                                 kf = &g_kernels[j];
535                                 break;
536                         }
537                 }
538         }
539
540         if (kf == NULL) {
541                 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
542                 exit(1);
543         }
544
545         printf("#\n");
546         printf("# - kernel:            %s\n", kf->Name);
547         printf("#\n");
548
549         // Initialize kernel by calling its own initialization function
550         kf->Init(&ld, &kd, &p);
551
552 #ifdef VERIFICATION
553         if (verify) {
554                 KernelSetInitialDensity( &ld, kd, &cd);
555                 KernelSetInitialVelocity(&ld, kd, &cd);
556         }
557 #endif
558
559         printf("# starting kernel...\n");
560
561         X_LIKWID_INIT();
562
563         double timeStart = Time();
564
565         // Call the LBM kernel
566         kd->Kernel(&ld, kd, &cd);
567
568         double duration = Time() - timeStart;
569
570         X_LIKWID_DEINIT();
571
572         // Print some statistics...
573         KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
574
575 #ifdef VERIFICATION
576         PdfT errorNorm = -1.0;
577         KernelVerifiy(&ld, kd, &cd, &errorNorm);
578 #endif
579
580         // Deinitialize kernel by calling its own deinitialization function
581         kf->Deinit(&ld, &kd);
582
583
584         double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
585
586         printf("P:   %f MFLUP/s  t: %d  d: %f s  iter: %d  fnodes: %f x1e6  geo: %s  kernel: %s  %s  %s\n",
587                 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
588                 geometryType, kernelToUse,
589 #ifdef VERIFICATION
590                 "VERIFICATION",
591 #else
592                 "B",
593 #endif
594 #ifdef PRECISION_DP
595                 "dp"
596 #elif defined(PRECISION_SP)
597                 "sp"
598 #else
599                 "unknown-precision"
600 #endif
601         );
602
603         int exitCode = 0;
604
605 #ifdef VERIFICATION
606
607         if (verify) {
608                 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
609
610                 if (errorNorm > 0.1) {
611                         printf("# VERIFICATION FAILED.\n");
612                         exitCode = 1;
613                 }
614                 else {
615                         printf("# VERIFICATION SUCCEEDED.\n");
616                 }
617         }
618 #else
619 //      printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
620 //      printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
621 #endif
622
623         MemFree((void **)&ld.Lattice);
624
625         return exitCode;
626 }
This page took 0.06645 seconds and 3 git commands to generate.