1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // Michael Hussnaetter, 2017-2018
12 // University of Erlangen-Nuremberg, Germany
13 // michael.hussnaetter -at- fau.de
15 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
17 // LbmBenchKernels is free software: you can redistribute it and/or modify
18 // it under the terms of the GNU General Public License as published by
19 // the Free Software Foundation, either version 3 of the License, or
20 // (at your option) any later version.
22 // LbmBenchKernels is distributed in the hope that it will be useful,
23 // but WITHOUT ANY WARRANTY; without even the implied warranty of
24 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 // GNU General Public License for more details.
27 // You should have received a copy of the GNU General Public License
28 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
30 // --------------------------------------------------------------------------
34 #include <strings.h> // strcasecmp
51 #include "KernelFunctions.h"
54 #include <xmmintrin.h>
61 int FpIsMxCsrMaskSet(unsigned int mask)
64 unsigned int mxcsrNew;
68 mxcsrNew = mxcsr & mask;
70 return (mxcsrNew == mask);
75 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
80 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
85 int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
89 *nX = atoi(parameter);
92 printf("ERROR: parameter for X dimension must be > 0.\n");
96 tmp = strchr(parameter, 'x');
99 printf("ERROR: parameter for Y dimension is missing.\n");
106 printf("ERROR: parameter for Y dimension must be > 0.\n");
110 tmp = strchr(tmp + 1, 'x');
113 printf("ERROR: parameter for Z dimension is missing.\n");
120 printf("ERROR: parameter for Z dimension must be > 0.\n");
127 int main(int argc, char * argv[])
129 int dims[3] = { 20, 20, 20 }; // Dimensions in x, y, and z direction
130 const char * geometryType = "channel";
131 // int latticeDumpAscii = 0;
132 int verify = 0; UNUSED(verify);
133 char * kernelToUse = NULL;
135 const char * pinString = NULL;
136 int periodic[3] = { 0 };
140 cd.MaxIterations = 10;
146 cd.StatisticsModulus = 100;
147 cd.XForce = F(0.00001);
148 kernelToUse = "push-soa";
156 #define LBM_BENCH_KERNELS_VERSION_MAJOR 0
157 #define LBM_BENCH_KERNELS_VERSION_MINOR 1
159 printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017, 2018 LSS, RRZE\n");
160 printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
161 printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
163 printf("# LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
164 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
172 // ----------------------------------------------------------------------
173 // Parse command line arguments
175 #define ARG_IS(param) (!strcmp(argv[i], param))
176 #define NEXT_ARG_PRESENT() \
178 if (i + 1 >= argc) { \
179 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
184 for (int i = 1; i < argc; ++i) {
186 if (ARG_IS("-dims") || ARG_IS("--dims")) {
190 if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
194 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
195 // latticeDumpAscii = 1;
197 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
200 geometryType = argv[++i];
202 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
205 cd.MaxIterations = strtol(argv[++i], NULL, 0);
207 if (cd.MaxIterations <= 0) {
208 printf("ERROR: number of iterations must be > 0.\n");
212 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
215 cd.RhoIn = F(strtod(argv[++i], NULL));
217 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
220 cd.RhoOut = F(strtod(argv[++i], NULL));
222 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
225 cd.Omega = F(strtod(argv[++i], NULL));
227 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
230 cd.XForce = F(strtod(argv[++i], NULL));
232 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
235 // Choose this preset for verification. As geometry type "box" is
236 // used but x and y direction are made periodic.
237 // Everything else can be altered, but enough iterations should be
238 // performed in order to receive a fully developed flow field.
244 geometryType = "box";
248 cd.XForce = F(0.00001);
249 cd.MaxIterations = 1000;
255 printf("# VERIFICATION: verifying flow profile of channel flow.\n");
258 // TODO: this is not a good idea as we ignore all other options...
261 printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
262 printf(" Recompile with VERIFICATION=on.\n");
266 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
271 // If the next parameter is a number it is used as the itartion count,
272 // if not it is probably another parameter.
275 int vtkModulus = atoi(argv[i+1]);
277 if (vtkModulus > 0) {
278 cd.VtkModulus = vtkModulus;
283 printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
284 printf(" Recompile with VTK_OUTPUT=on.\n");
288 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
292 cd.StatisticsModulus = atoi(argv[++i]);
294 if (cd.StatisticsModulus <= 0) {
295 printf("ERROR: the iteration count for -statistics must be > 0.\n");
299 printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
300 printf(" Recompile with STATISTICS=on.\n");
304 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
307 kernelToUse = argv[++i];
309 else if (ARG_IS("-list") || ARG_IS("--list")) {
310 printf("Available kernels to benchmark:\n");
312 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
313 printf(" %s\n", g_kernels[j].Name);
318 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
321 pinString = argv[++i];
323 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
327 nThreads = atoi(argv[++i]);
330 printf("ERROR: number of threads must be > 0.\n");
334 printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
338 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
341 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
344 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
347 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
348 printf("ERROR: unknown argument: %s\n", argv[i]);
351 printf("./lbmbenchk -list\n");
352 printf("./lbmbenchk \n");
353 printf(" [-dims XxYyZ] [-geometry box|channel|pipe|blocks[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
354 printf(" [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
355 printf(" [-periodic-x]\n");
357 printf(" [-statistics <every-n-iteration>]\n");
360 printf(" [-vtk [<every-n-iteration>]]\n");
363 printf(" [-t <number of threads>]\n");
365 printf(" [-pin core{,core}*]\n");
367 printf(" [-verify]\n");
369 printf(" -- <kernel specific parameters>\n");
371 printf("-list List available kernels.\n");
373 printf("-dims XxYxZ Specify geometry dimensions.\n");
375 printf("-geometry blocks-<value>\n");
376 printf(" Geometetry with blocks of size <value> regularily layout out.\n");
380 else if (ARG_IS("--")) {
381 // printf("# kernel args start with %s these are %d args.\n", argv[i + 1], argc - i - 1);
382 p.KernelArgs = &argv[++i];
383 p.nKernelArgs = argc - i;
387 printf("ERROR: unknown parameter: %s.\n", argv[i]);
393 #undef NEXT_ARG_PRESENT
396 // ----------------------------------------------------------------------
397 // Check if we exceed our index addressing PDFs.
400 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
402 if (nPdfs > ((2LU << 31) - 1)) {
403 printf("ERROR: number of PDFs exceed 2^31.\n");
408 // ----------------------------------------------------------------------
411 omp_set_num_threads(nThreads);
414 const char * defines[] = {
433 #ifdef INTEL_OPT_DIRECTIVES
434 "INTEL_OPT_DIRECTIVES",
441 printf("# - floating point: double precision (%lu b, PRECISION_DP defined)\n", sizeof(PdfT));
442 #elif defined(PRECISION_SP)
443 printf("# - floating point: single precision (%lu b, PRECISION_SP defined)\n", sizeof(PdfT));
445 printf("# - floating point: UNKNOWN (%lu b)\n", sizeof(PdfT));
448 #if defined(VECTOR_AVX512)
449 printf("# - intrinsics: AVX512 (VECTOR_AVX512 defined)\n");
450 #elif defined(VECTOR_AVX)
451 printf("# - intrinsics: AVX (VECTOR_AVX defined)\n");
452 #elif defined(VECTOR_SSE)
453 printf("# - intrinsics: SSE (VECTOR_SSE defined)\n");
455 printf("# - intrinsics: UNKNOWN\n");
458 #if defined(VECTOR_AVX512_GATHER)
459 printf("# - intrinsics: AVX512 gather (VECTOR_AVX512_GATHER defined)\n");
462 printf("# - defines: ");
463 for (int j = 0; j < N_ELEMS(defines); ++j) {
464 printf("%s ", defines[j]);
469 printf("# - fp status: DAZ: %d FTZ: %d\n", FpGetDaz(), FpGetFtz());
472 printf("# - iterations: %d\n", cd.MaxIterations);
476 GeoCreateByStr(geometryType, dims, periodic, &ld);
478 printf("# - geometry:\n");
479 printf("# type: %s\n", ld.Name);
480 printf("# dimensions: %d x %d x %d (x, y, z)\n", ld.Dims[0], ld.Dims[1], ld.Dims[2]);
482 printf("# nodes total: % 10d\n", ld.nObst + ld.nFluid);
483 printf("# nodes fluid: % 10d (including inlet & outlet)\n", ld.nFluid);
484 printf("# nodes obstacles: % 10d\n", ld.nObst);
485 printf("# nodes inlet: % 10d\n", ld.nInlet);
486 printf("# nodes outlet: % 10d\n", ld.nOutlet);
487 printf("# periodicity: x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
490 printf("# - VTK output: %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
493 printf("# - statistics: every %d iteration\n", cd.StatisticsModulus);
496 printf("# - flow:\n");
497 printf("# omega: %f\n", cd.Omega);
498 printf("# initial density at inlet/outlet:\n");
499 printf("# rho in: %e\n", cd.RhoIn);
500 printf("# rho out: %e\n", cd.RhoOut);
503 printf("# - OpenMP threads: %d\n", omp_get_max_threads());
505 if (pinString != NULL) {
508 int threadId = omp_get_thread_num();
511 err = PinCurrentThreadByCpuList(pinString, threadId);
514 printf("ERROR [thread %d]: pinning failed.\n", threadId);
518 const char * cpuList = PinCpuListAsString();
519 Assert(cpuList != NULL);
521 // Not so nice hack to print the thread ids ordered.
522 #pragma omp for ordered
523 for (int i = 0; i < omp_get_num_threads(); ++i) {
525 printf("# thread %2d pinned to core(s): %s\n", threadId, cpuList);
528 free((void *)cpuList);
535 KernelFunctions * kf = NULL;
537 if (kernelToUse == NULL) {
541 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
543 if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
551 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
556 printf("# - kernel: %s\n", kf->Name);
559 // Initialize kernel by calling its own initialization function
560 kf->Init(&ld, &kd, &p);
564 KernelSetInitialDensity( &ld, kd, &cd);
565 KernelSetInitialVelocity(&ld, kd, &cd);
569 printf("# starting kernel...\n");
573 // Call the LBM kernel
574 kd->Kernel(&ld, kd, &cd);
578 // Print some statistics...
579 KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
582 PdfT errorNorm = -1.0;
583 KernelVerifiy(&ld, kd, &cd, &errorNorm);
586 double duration = kd->Duration;
587 double loopBalance = kd->LoopBalance;
588 double dataVolGByte = loopBalance * ld.nFluid * cd.MaxIterations / 1024. / 1024. / 1024.;
589 double bandwidthGBytePerS = dataVolGByte / duration;
591 // Deinitialize kernel by calling its own deinitialization function
592 kf->Deinit(&ld, &kd);
594 double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
597 printf("# Evaluation Stats\n");
599 printf("# runtype: \t%s \n", "verification");
601 printf("# runtype: \t%s \n", "benchmark");
603 printf("# runtime: \t%.3f s\n", duration);
604 printf("# iterations: \t%d \n", cd.MaxIterations);
605 printf("# fluid cells: \t%d \n", ld.nFluid);
606 printf("# Derived metrics\n");
607 printf("# MEM data vol.: \t%.2f GByte\n", dataVolGByte);
608 printf("# MEM bandwidth: \t%.2f GByte/s\n", bandwidthGBytePerS);
609 printf("# performance: \t%.3f MFLUP/s\n", perf);
612 printf("P: %f MFLUP/s t: %d d: %f s iter: %d fnodes: %f x1e6 geo: %s kernel: %s %s %s\n",
613 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
614 geometryType, kernelToUse,
622 #elif defined(PRECISION_SP)
634 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
636 if (errorNorm > 0.1) {
637 printf("# VERIFICATION FAILED.\n");
641 printf("# VERIFICATION SUCCEEDED.\n");
645 // printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
646 // printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
649 MemFree((void **)&ld.Lattice);