1 // --------------------------------------------------------------------------
4 // Markus Wittmann, 2016-2017
5 // RRZE, University of Erlangen-Nuremberg, Germany
6 // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de
9 // LSS, University of Erlangen-Nuremberg, Germany
11 // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels).
13 // LbmBenchKernels is free software: you can redistribute it and/or modify
14 // it under the terms of the GNU General Public License as published by
15 // the Free Software Foundation, either version 3 of the License, or
16 // (at your option) any later version.
18 // LbmBenchKernels is distributed in the hope that it will be useful,
19 // but WITHOUT ANY WARRANTY; without even the implied warranty of
20 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 // GNU General Public License for more details.
23 // You should have received a copy of the GNU General Public License
24 // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>.
26 // --------------------------------------------------------------------------
30 #include <strings.h> // strcasecmp
47 #include "KernelFunctions.h"
50 #include <xmmintrin.h>
57 int FpIsMxCsrMaskSet(unsigned int mask)
60 unsigned int mxcsrNew;
64 mxcsrNew = mxcsr & mask;
66 return (mxcsrNew == mask);
71 return FpIsMxCsrMaskSet(1 << MXCSR_FTZ);
76 return FpIsMxCsrMaskSet(1 << MXCSR_DAZ);
81 int ParseDimensions(const char * parameter, int * nX, int * nY, int * nZ)
85 *nX = atoi(parameter);
88 printf("ERROR: parameter for X dimension must be > 0.\n");
92 tmp = strchr(parameter, 'x');
95 printf("ERROR: parameter for Y dimension is missing.\n");
102 printf("ERROR: parameter for Y dimension must be > 0.\n");
106 tmp = strchr(tmp + 1, 'x');
109 printf("ERROR: parameter for Z dimension is missing.\n");
116 printf("ERROR: parameter for Z dimension must be > 0.\n");
123 int main(int argc, char * argv[])
125 int dims[3] = { 20, 20, 20 }; // Dimensions in x, y, and z direction
126 const char * geometryType = "channel";
127 // int latticeDumpAscii = 0;
128 int verify = 0; UNUSED(verify);
129 char * kernelToUse = NULL;
131 const char * pinString = NULL;
132 int periodic[3] = { 0 };
136 cd.MaxIterations = 1000;
142 cd.StatisticsModulus = 100;
144 kernelToUse = "push-soa";
152 #define LBM_BENCH_KERNELS_VERSION_MAJOR 0
153 #define LBM_BENCH_KERNELS_VERSION_MINOR 1
155 printf("Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017 LSS, RRZE\n");
156 printf("This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE.\n");
157 printf("This is free software, and you are welcome to redistribute it under certain conditions.\n");
159 printf("LBM Benchmark Kernels %d.%d, compiled %s %s, type: %s\n",
160 LBM_BENCH_KERNELS_VERSION_MAJOR, LBM_BENCH_KERNELS_VERSION_MINOR, __DATE__, __TIME__,
168 // ----------------------------------------------------------------------
169 // Parse command line arguments
171 #define ARG_IS(param) (!strcmp(argv[i], param))
172 #define NEXT_ARG_PRESENT() \
174 if (i + 1 >= argc) { \
175 printf("ERROR: argument %s requires a parameter.\n", argv[i]); \
180 for (int i = 1; i < argc; ++i) {
182 if (ARG_IS("-dims") || ARG_IS("--dims")) {
186 if (!ParseDimensions(argv[++i], &dims[0], &dims[1], &dims[2])) {
190 // else if (ARG_IS("-lattice-dump-ascii") || ARG_IS("--lattice-dump-ascii")) {
191 // latticeDumpAscii = 1;
193 else if (ARG_IS("-geometry") || ARG_IS("--geometry")) {
196 geometryType = argv[++i];
198 else if (ARG_IS("-iterations") ||ARG_IS("--iterations")) {
201 cd.MaxIterations = strtol(argv[++i], NULL, 0);
203 if (cd.MaxIterations <= 0) {
204 printf("ERROR: number of iterations must be > 0.\n");
208 else if (ARG_IS("-rho-in") ||ARG_IS("--rho-in")) {
211 cd.RhoIn = strtod(argv[++i], NULL);
213 else if (ARG_IS("-rho-out") ||ARG_IS("--rho-out")) {
216 cd.RhoOut = strtod(argv[++i], NULL);
218 else if (ARG_IS("-omega") ||ARG_IS("--omega")) {
221 cd.Omega = strtod(argv[++i], NULL);
223 else if (ARG_IS("-x-force") ||ARG_IS("--x-force")) {
226 cd.XForce = strtod(argv[++i], NULL);
228 else if (ARG_IS("-verify") || ARG_IS("--verify")) {
231 // Choose this preset for verification. As geometry type "box" is
232 // used but x and y direction are made pridoc.
233 // Everything else can be altered, but enough iterations should be
234 // performed in order to receive a fully developed flow field.
240 geometryType = "box";
245 cd.MaxIterations = 1000;
251 printf("# VERIFICATION: verifying flow profile of channel flow.\n");
254 // TODO: this is not a good idea as we ignore all other options...
257 printf("ERROR: in order to use -verify VERIFICATION must be defined during compilation.\n");
258 printf(" Recompile with VERIFICATION=on.\n");
262 else if (ARG_IS("-vtk") || ARG_IS("--vtk")) {
267 // If the next parameter is a number it is used as the itartion count,
268 // if not it is probably another parameter.
271 int vtkModulus = atoi(argv[i+1]);
273 if (vtkModulus > 0) {
274 cd.VtkModulus = vtkModulus;
279 printf("ERROR: in order to use -vtk VTK_OUTPUT must be defined during compilation.\n");
280 printf(" Recompile with VTK_OUTPUT=on.\n");
284 else if (ARG_IS("-statistics") || ARG_IS("--statistics")) {
288 cd.StatisticsModulus = atoi(argv[++i]);
290 if (cd.StatisticsModulus <= 0) {
291 printf("ERROR: the iteration count for -statistics must be > 0.\n");
295 printf("ERROR: in order to use -statistics STATISTICS must be defined during compilation.\n");
296 printf(" Recompile with STATISTICS=on.\n");
300 else if (ARG_IS("-kernel") || ARG_IS("--kernel")) {
303 kernelToUse = argv[++i];
305 else if (ARG_IS("-list") || ARG_IS("--list")) {
306 printf("Available kernels to benchmark:\n");
308 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
309 printf(" %s\n", g_kernels[j].Name);
314 else if (ARG_IS("-pin") || ARG_IS("--pin")) {
317 pinString = argv[++i];
319 else if (ARG_IS("-t") || ARG_IS("-threads") || ARG_IS("--threads")) {
323 nThreads = atoi(argv[++i]);
326 printf("ERROR: number of threads must be > 0.\n");
330 printf("ERROR: specifying number of threads is only available when compiled with OpenMP support.\n");
334 else if (ARG_IS("-periodic-x") || ARG_IS("--periodic-x")) {
337 else if (ARG_IS("-periodic-y") || ARG_IS("--periodic-y")) {
340 else if (ARG_IS("-periodic-z") || ARG_IS("--periodic-z")) {
343 else if (ARG_IS("-h") || ARG_IS("-help") || ARG_IS("--help")) {
344 printf("ERROR: unknown argument: %s\n", argv[i]);
347 printf("./lbmbenchk -list\n");
348 printf("./lbmbenchk \n");
349 printf(" [-dims XxYyZ] [-geometry box|channel|pipe|porosity[-value]] [-iterations <iterations>] [-lattice-dump-ascii]\n");
350 printf(" [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>]\n");
351 printf(" [-periodic-x]\n");
353 printf(" [-statistics <every-n-iteration>]\n");
356 printf(" [-vtk [<every-n-iteration>]]\n");
359 printf(" [-t <number of threads>]\n");
361 printf(" [-pin core{,core}*]\n");
363 printf(" [-verify]\n");
365 printf(" -- <kernel specific parameters>\n");
367 printf("-list List available kernels.\n");
369 printf("-dims XxYxZ Specify geometry dimensions.\n");
371 printf("-geometry porosity-<value>\n");
372 printf(" Geometetry with blocks of size <value> regularily layout out.\n");
376 else if (ARG_IS("--")) {
377 // printf("# kernel args start with %s these are %d args.\n", argv[i + 1], argc - i - 1);
378 p.KernelArgs = &argv[++i];
379 p.nKernelArgs = argc - i;
383 printf("ERROR: unknown parameter: %s.\n", argv[i]);
389 #undef NEXT_ARG_PRESENT
392 // ----------------------------------------------------------------------
393 // Check if we exceed our index addressing PDFs.
396 uint64_t nPdfs = ((uint64_t)19) * dims[0] * dims[1] * dims[2];
398 if (nPdfs > ((2LU << 31) - 1)) {
399 printf("ERROR: number of PDFs exceed 2^31.\n");
404 // ----------------------------------------------------------------------
407 omp_set_num_threads(nThreads);
412 GeoCreateByStr(geometryType, dims, periodic, &ld);
414 const char * defines[] = {
432 printf("# defines: ");
433 for (int j = 0; j < N_ELEMS(defines); ++j) {
434 printf("%s ", defines[j]);
438 printf("# nodes total: % 10d\n", ld.nObst + ld.nFluid);
439 printf("# nodes fluid: % 10d (including inlet & outlet)\n", ld.nFluid);
440 printf("# nodes obstacles: % 10d\n", ld.nObst);
441 printf("# nodes inlet: % 10d\n", ld.nInlet);
442 printf("# nodes outlet: % 10d\n", ld.nOutlet);
443 printf("# periodicity: x: %d y: %d z: %d\n", ld.PeriodicX, ld.PeriodicY, ld.PeriodicZ);
446 printf("# VTK output: %d (every %d iteration)\n", cd.VtkOutput, cd.VtkModulus);
449 printf("# statistics: every %d iteration\n", cd.StatisticsModulus);
452 printf("# omega: %f\n", cd.Omega);
453 printf("# initial density at inlet/outlet:\n");
454 printf("# rho in: %e\n", cd.RhoIn);
455 printf("# rho out: %e\n", cd.RhoOut);
456 printf("# iterations: %d\n", cd.MaxIterations);
459 printf("# fp status: DAZ: %d FTZ: %d\n", FpGetDaz(), FpGetFtz());
463 printf("# OpenMP threads: %d\n", omp_get_max_threads());
465 if (pinString != NULL) {
468 int threadId = omp_get_thread_num();
471 err = PinCurrentThreadByCpuList(pinString, threadId);
474 printf("ERROR [thread %d]: pinning failed.\n", threadId);
478 const char * cpuList = PinCpuListAsString();
479 Assert(cpuList != NULL);
481 // Not so nice hack to print the thread ids ordered.
482 #pragma omp for ordered
483 for (int i = 0; i < omp_get_num_threads(); ++i) {
485 printf("# thread %2d pinned to core(s): %s\n", threadId, cpuList);
488 free((void *)cpuList);
495 KernelFunctions * kf = NULL;
497 if (kernelToUse == NULL) {
501 for (int j = 0; j < N_ELEMS(g_kernels); ++j) {
503 if (!strcasecmp(kernelToUse, g_kernels[j].Name)) {
511 printf("ERROR: requested kernel \"%s\" not found.\n", kernelToUse);
516 printf("# kernel: %s\n", kf->Name);
519 // Initialize kernel by calling its own initialization function
520 kf->Init(&ld, &kd, &p);
524 KernelSetInitialDensity( &ld, kd, &cd);
525 KernelSetInitialVelocity(&ld, kd, &cd);
529 printf("# starting kernel...\n");
533 double timeStart = Time();
535 // Call the LBM kernel
536 kd->Kernel(&ld, kd, &cd);
538 double duration = Time() - timeStart;
542 // Print some statistics...
543 KernelStatisticsAdv(kd, &ld, &cd, cd.MaxIterations, 1 /* force output */);
546 PdfT errorNorm = -1.0;
547 KernelVerifiy(&ld, kd, &cd, &errorNorm);
550 // Deinitialize kernel by calling its own deinitialization function
551 kf->Deinit(&ld, &kd);
554 double perf = (double)ld.nFluid * (double)cd.MaxIterations / duration / 1.e6;
556 printf("P: %f MFLUP/s t: %d d: %f s iter: %d fnodes: %f x1e6 geo: %s kernel: %s %s\n",
557 perf, nThreads, duration, cd.MaxIterations, ld.nFluid / 1e6,
558 geometryType, kernelToUse,
571 printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
573 if (errorNorm > 0.1) {
574 printf("# VERIFICATION FAILED.\n");
578 printf("# VERIFICATION SUCCEEDED.\n");
582 // printf("# VERIFICATION: deviation from analytical solution: %e\n", errorNorm);
583 // printf("# VERIFICATION: this is only valid for pipe geometry with enough iterations performed.\n");
586 MemFree((void **)&ld.Lattice);