| 1 | <?xml version="1.0" encoding="utf-8" ?> |
| 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> |
| 3 | <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> |
| 4 | <head> |
| 5 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" /> |
| 6 | <meta name="generator" content="Docutils 0.12: http://docutils.sourceforge.net/" /> |
| 7 | <title>LBM Benchmark Kernels Documentation</title> |
| 8 | <style type="text/css"> |
| 9 | |
| 10 | /* |
| 11 | :Author: David Goodger (goodger@python.org) |
| 12 | :Id: $Id: html4css1.css 7614 2013-02-21 15:55:51Z milde $ |
| 13 | :Copyright: This stylesheet has been placed in the public domain. |
| 14 | |
| 15 | Default cascading style sheet for the HTML output of Docutils. |
| 16 | |
| 17 | See http://docutils.sf.net/docs/howto/html-stylesheets.html for how to |
| 18 | customize this style sheet. |
| 19 | */ |
| 20 | |
| 21 | /* used to remove borders from tables and images */ |
| 22 | .borderless, table.borderless td, table.borderless th { |
| 23 | border: 0 } |
| 24 | |
| 25 | table.borderless td, table.borderless th { |
| 26 | /* Override padding for "table.docutils td" with "! important". |
| 27 | The right padding separates the table cells. */ |
| 28 | padding: 0 0.5em 0 0 ! important } |
| 29 | |
| 30 | .first { |
| 31 | /* Override more specific margin styles with "! important". */ |
| 32 | margin-top: 0 ! important } |
| 33 | |
| 34 | .last, .with-subtitle { |
| 35 | margin-bottom: 0 ! important } |
| 36 | |
| 37 | .hidden { |
| 38 | display: none } |
| 39 | |
| 40 | a.toc-backref { |
| 41 | text-decoration: none ; |
| 42 | color: black } |
| 43 | |
| 44 | blockquote.epigraph { |
| 45 | margin: 2em 5em ; } |
| 46 | |
| 47 | dl.docutils dd { |
| 48 | margin-bottom: 0.5em } |
| 49 | |
| 50 | object[type="image/svg+xml"], object[type="application/x-shockwave-flash"] { |
| 51 | overflow: hidden; |
| 52 | } |
| 53 | |
| 54 | /* Uncomment (and remove this text!) to get bold-faced definition list terms |
| 55 | dl.docutils dt { |
| 56 | font-weight: bold } |
| 57 | */ |
| 58 | |
| 59 | div.abstract { |
| 60 | margin: 2em 5em } |
| 61 | |
| 62 | div.abstract p.topic-title { |
| 63 | font-weight: bold ; |
| 64 | text-align: center } |
| 65 | |
| 66 | div.admonition, div.attention, div.caution, div.danger, div.error, |
| 67 | div.hint, div.important, div.note, div.tip, div.warning { |
| 68 | margin: 2em ; |
| 69 | border: medium outset ; |
| 70 | padding: 1em } |
| 71 | |
| 72 | div.admonition p.admonition-title, div.hint p.admonition-title, |
| 73 | div.important p.admonition-title, div.note p.admonition-title, |
| 74 | div.tip p.admonition-title { |
| 75 | font-weight: bold ; |
| 76 | font-family: sans-serif } |
| 77 | |
| 78 | div.attention p.admonition-title, div.caution p.admonition-title, |
| 79 | div.danger p.admonition-title, div.error p.admonition-title, |
| 80 | div.warning p.admonition-title, .code .error { |
| 81 | color: red ; |
| 82 | font-weight: bold ; |
| 83 | font-family: sans-serif } |
| 84 | |
| 85 | /* Uncomment (and remove this text!) to get reduced vertical space in |
| 86 | compound paragraphs. |
| 87 | div.compound .compound-first, div.compound .compound-middle { |
| 88 | margin-bottom: 0.5em } |
| 89 | |
| 90 | div.compound .compound-last, div.compound .compound-middle { |
| 91 | margin-top: 0.5em } |
| 92 | */ |
| 93 | |
| 94 | div.dedication { |
| 95 | margin: 2em 5em ; |
| 96 | text-align: center ; |
| 97 | font-style: italic } |
| 98 | |
| 99 | div.dedication p.topic-title { |
| 100 | font-weight: bold ; |
| 101 | font-style: normal } |
| 102 | |
| 103 | div.figure { |
| 104 | margin-left: 2em ; |
| 105 | margin-right: 2em } |
| 106 | |
| 107 | div.footer, div.header { |
| 108 | clear: both; |
| 109 | font-size: smaller } |
| 110 | |
| 111 | div.line-block { |
| 112 | display: block ; |
| 113 | margin-top: 1em ; |
| 114 | margin-bottom: 1em } |
| 115 | |
| 116 | div.line-block div.line-block { |
| 117 | margin-top: 0 ; |
| 118 | margin-bottom: 0 ; |
| 119 | margin-left: 1.5em } |
| 120 | |
| 121 | div.sidebar { |
| 122 | margin: 0 0 0.5em 1em ; |
| 123 | border: medium outset ; |
| 124 | padding: 1em ; |
| 125 | background-color: #ffffee ; |
| 126 | width: 40% ; |
| 127 | float: right ; |
| 128 | clear: right } |
| 129 | |
| 130 | div.sidebar p.rubric { |
| 131 | font-family: sans-serif ; |
| 132 | font-size: medium } |
| 133 | |
| 134 | div.system-messages { |
| 135 | margin: 5em } |
| 136 | |
| 137 | div.system-messages h1 { |
| 138 | color: red } |
| 139 | |
| 140 | div.system-message { |
| 141 | border: medium outset ; |
| 142 | padding: 1em } |
| 143 | |
| 144 | div.system-message p.system-message-title { |
| 145 | color: red ; |
| 146 | font-weight: bold } |
| 147 | |
| 148 | div.topic { |
| 149 | margin: 2em } |
| 150 | |
| 151 | h1.section-subtitle, h2.section-subtitle, h3.section-subtitle, |
| 152 | h4.section-subtitle, h5.section-subtitle, h6.section-subtitle { |
| 153 | margin-top: 0.4em } |
| 154 | |
| 155 | h1.title { |
| 156 | text-align: center } |
| 157 | |
| 158 | h2.subtitle { |
| 159 | text-align: center } |
| 160 | |
| 161 | hr.docutils { |
| 162 | width: 75% } |
| 163 | |
| 164 | img.align-left, .figure.align-left, object.align-left { |
| 165 | clear: left ; |
| 166 | float: left ; |
| 167 | margin-right: 1em } |
| 168 | |
| 169 | img.align-right, .figure.align-right, object.align-right { |
| 170 | clear: right ; |
| 171 | float: right ; |
| 172 | margin-left: 1em } |
| 173 | |
| 174 | img.align-center, .figure.align-center, object.align-center { |
| 175 | display: block; |
| 176 | margin-left: auto; |
| 177 | margin-right: auto; |
| 178 | } |
| 179 | |
| 180 | .align-left { |
| 181 | text-align: left } |
| 182 | |
| 183 | .align-center { |
| 184 | clear: both ; |
| 185 | text-align: center } |
| 186 | |
| 187 | .align-right { |
| 188 | text-align: right } |
| 189 | |
| 190 | /* reset inner alignment in figures */ |
| 191 | div.align-right { |
| 192 | text-align: inherit } |
| 193 | |
| 194 | /* div.align-center * { */ |
| 195 | /* text-align: left } */ |
| 196 | |
| 197 | ol.simple, ul.simple { |
| 198 | margin-bottom: 1em } |
| 199 | |
| 200 | ol.arabic { |
| 201 | list-style: decimal } |
| 202 | |
| 203 | ol.loweralpha { |
| 204 | list-style: lower-alpha } |
| 205 | |
| 206 | ol.upperalpha { |
| 207 | list-style: upper-alpha } |
| 208 | |
| 209 | ol.lowerroman { |
| 210 | list-style: lower-roman } |
| 211 | |
| 212 | ol.upperroman { |
| 213 | list-style: upper-roman } |
| 214 | |
| 215 | p.attribution { |
| 216 | text-align: right ; |
| 217 | margin-left: 50% } |
| 218 | |
| 219 | p.caption { |
| 220 | font-style: italic } |
| 221 | |
| 222 | p.credits { |
| 223 | font-style: italic ; |
| 224 | font-size: smaller } |
| 225 | |
| 226 | p.label { |
| 227 | white-space: nowrap } |
| 228 | |
| 229 | p.rubric { |
| 230 | font-weight: bold ; |
| 231 | font-size: larger ; |
| 232 | color: maroon ; |
| 233 | text-align: center } |
| 234 | |
| 235 | p.sidebar-title { |
| 236 | font-family: sans-serif ; |
| 237 | font-weight: bold ; |
| 238 | font-size: larger } |
| 239 | |
| 240 | p.sidebar-subtitle { |
| 241 | font-family: sans-serif ; |
| 242 | font-weight: bold } |
| 243 | |
| 244 | p.topic-title { |
| 245 | font-weight: bold } |
| 246 | |
| 247 | pre.address { |
| 248 | margin-bottom: 0 ; |
| 249 | margin-top: 0 ; |
| 250 | font: inherit } |
| 251 | |
| 252 | pre.literal-block, pre.doctest-block, pre.math, pre.code { |
| 253 | margin-left: 2em ; |
| 254 | margin-right: 2em } |
| 255 | |
| 256 | pre.code .ln { color: grey; } /* line numbers */ |
| 257 | pre.code, code { background-color: #eeeeee } |
| 258 | pre.code .comment, code .comment { color: #5C6576 } |
| 259 | pre.code .keyword, code .keyword { color: #3B0D06; font-weight: bold } |
| 260 | pre.code .literal.string, code .literal.string { color: #0C5404 } |
| 261 | pre.code .name.builtin, code .name.builtin { color: #352B84 } |
| 262 | pre.code .deleted, code .deleted { background-color: #DEB0A1} |
| 263 | pre.code .inserted, code .inserted { background-color: #A3D289} |
| 264 | |
| 265 | span.classifier { |
| 266 | font-family: sans-serif ; |
| 267 | font-style: oblique } |
| 268 | |
| 269 | span.classifier-delimiter { |
| 270 | font-family: sans-serif ; |
| 271 | font-weight: bold } |
| 272 | |
| 273 | span.interpreted { |
| 274 | font-family: sans-serif } |
| 275 | |
| 276 | span.option { |
| 277 | white-space: nowrap } |
| 278 | |
| 279 | span.pre { |
| 280 | white-space: pre } |
| 281 | |
| 282 | span.problematic { |
| 283 | color: red } |
| 284 | |
| 285 | span.section-subtitle { |
| 286 | /* font-size relative to parent (h1..h6 element) */ |
| 287 | font-size: 80% } |
| 288 | |
| 289 | table.citation { |
| 290 | border-left: solid 1px gray; |
| 291 | margin-left: 1px } |
| 292 | |
| 293 | table.docinfo { |
| 294 | margin: 2em 4em } |
| 295 | |
| 296 | table.docutils { |
| 297 | margin-top: 0.5em ; |
| 298 | margin-bottom: 0.5em } |
| 299 | |
| 300 | table.footnote { |
| 301 | border-left: solid 1px black; |
| 302 | margin-left: 1px } |
| 303 | |
| 304 | table.docutils td, table.docutils th, |
| 305 | table.docinfo td, table.docinfo th { |
| 306 | padding-left: 0.5em ; |
| 307 | padding-right: 0.5em ; |
| 308 | vertical-align: top } |
| 309 | |
| 310 | table.docutils th.field-name, table.docinfo th.docinfo-name { |
| 311 | font-weight: bold ; |
| 312 | text-align: left ; |
| 313 | white-space: nowrap ; |
| 314 | padding-left: 0 } |
| 315 | |
| 316 | /* "booktabs" style (no vertical lines) */ |
| 317 | table.docutils.booktabs { |
| 318 | border: 0px; |
| 319 | border-top: 2px solid; |
| 320 | border-bottom: 2px solid; |
| 321 | border-collapse: collapse; |
| 322 | } |
| 323 | table.docutils.booktabs * { |
| 324 | border: 0px; |
| 325 | } |
| 326 | table.docutils.booktabs th { |
| 327 | border-bottom: thin solid; |
| 328 | text-align: left; |
| 329 | } |
| 330 | |
| 331 | h1 tt.docutils, h2 tt.docutils, h3 tt.docutils, |
| 332 | h4 tt.docutils, h5 tt.docutils, h6 tt.docutils { |
| 333 | font-size: 100% } |
| 334 | |
| 335 | ul.auto-toc { |
| 336 | list-style-type: none } |
| 337 | |
| 338 | </style> |
| 339 | </head> |
| 340 | <body> |
| 341 | <div class="document" id="lbm-benchmark-kernels-documentation"> |
| 342 | <h1 class="title">LBM Benchmark Kernels Documentation</h1> |
| 343 | |
| 344 | <!-- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
| 345 | # |
| 346 | # Copyright |
| 347 | # Markus Wittmann, 2016-2017 |
| 348 | # RRZE, University of Erlangen-Nuremberg, Germany |
| 349 | # markus.wittmann -at- fau.de or hpc -at- rrze.fau.de |
| 350 | # |
| 351 | # Viktor Haag, 2016 |
| 352 | # LSS, University of Erlangen-Nuremberg, Germany |
| 353 | # |
| 354 | # This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). |
| 355 | # |
| 356 | # LbmBenchKernels is free software: you can redistribute it and/or modify |
| 357 | # it under the terms of the GNU General Public License as published by |
| 358 | # the Free Software Foundation, either version 3 of the License, or |
| 359 | # (at your option) any later version. |
| 360 | # |
| 361 | # LbmBenchKernels is distributed in the hope that it will be useful, |
| 362 | # but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 363 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 364 | # GNU General Public License for more details. |
| 365 | # |
| 366 | # You should have received a copy of the GNU General Public License |
| 367 | # along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. |
| 368 | # |
| 369 | # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - --> |
| 370 | <div class="contents topic" id="contents"> |
| 371 | <p class="topic-title first">Contents</p> |
| 372 | <ul class="auto-toc simple"> |
| 373 | <li><a class="reference internal" href="#compilation" id="id2">1 Compilation</a><ul class="auto-toc"> |
| 374 | <li><a class="reference internal" href="#debug-and-verification" id="id3">1.1 Debug and Verification</a></li> |
| 375 | <li><a class="reference internal" href="#benchmarking" id="id4">1.2 Benchmarking</a></li> |
| 376 | <li><a class="reference internal" href="#release-and-verification" id="id5">1.3 Release and Verification</a></li> |
| 377 | <li><a class="reference internal" href="#compilers" id="id6">1.4 Compilers</a></li> |
| 378 | <li><a class="reference internal" href="#options-summary" id="id7">1.5 Options Summary</a></li> |
| 379 | </ul> |
| 380 | </li> |
| 381 | <li><a class="reference internal" href="#invocation" id="id8">2 Invocation</a><ul class="auto-toc"> |
| 382 | <li><a class="reference internal" href="#command-line-parameters" id="id9">2.1 Command Line Parameters</a></li> |
| 383 | </ul> |
| 384 | </li> |
| 385 | <li><a class="reference internal" href="#id1" id="id10">3 Benchmarking</a></li> |
| 386 | <li><a class="reference internal" href="#acknowledgements" id="id11">4 Acknowledgements</a></li> |
| 387 | </ul> |
| 388 | </div> |
| 389 | <div class="section" id="compilation"> |
| 390 | <h1><a class="toc-backref" href="#id2">1 Compilation</a></h1> |
| 391 | <p>The benchmark framework currently supports only Linux systems and the GCC and |
| 392 | Intel compilers. Every other configuration probably requires adjustment inside |
| 393 | the code and the makefiles. Further some code might be platform or at least |
| 394 | POSIX specific.</p> |
| 395 | <p>The benchmark can be build via <tt class="docutils literal">make</tt> from the <tt class="docutils literal">src</tt> subdirectory. This will |
| 396 | generate one binary which hosts all implemented benchmark kernels.</p> |
| 397 | <p>Binaries are located under the <tt class="docutils literal">bin</tt> subdirectory and will have different names |
| 398 | depending on compiler and build configuration.</p> |
| 399 | <div class="section" id="debug-and-verification"> |
| 400 | <h2><a class="toc-backref" href="#id3">1.1 Debug and Verification</a></h2> |
| 401 | <pre class="literal-block"> |
| 402 | make |
| 403 | </pre> |
| 404 | <p>Running <tt class="docutils literal">make</tt> without any arguments builds the debug version (BUILD=debug) of |
| 405 | the benchmark kernels, where no optimizations are performed, line numbers and |
| 406 | debug symbols are included as well as <tt class="docutils literal">DEBUG</tt> will be defined. The resulting |
| 407 | binary will be found in the <tt class="docutils literal">bin</tt> subdirectory and named |
| 408 | <tt class="docutils literal"><span class="pre">lbmbenchk-linux-<compiler>-debug</span></tt>.</p> |
| 409 | <p>Without any further specification the binary includes verification |
| 410 | (<tt class="docutils literal">VERIFICATION=on</tt>), statistics (<tt class="docutils literal">STATISTICS</tt>), and VTK output |
| 411 | (<tt class="docutils literal">VTK_OUTPUT=on</tt>) enabled.</p> |
| 412 | <p>Please note that the generated binary will therefore |
| 413 | exhibit a poor performance.</p> |
| 414 | </div> |
| 415 | <div class="section" id="benchmarking"> |
| 416 | <h2><a class="toc-backref" href="#id4">1.2 Benchmarking</a></h2> |
| 417 | <p>To generate a binary for benchmarking run make with</p> |
| 418 | <pre class="literal-block"> |
| 419 | make BENCHMARK=on BUILD=release |
| 420 | </pre> |
| 421 | <p>Here BUILD=release turns optimizations on and BENCHMARK=on disables |
| 422 | verfification, statistics, and VTK output.</p> |
| 423 | </div> |
| 424 | <div class="section" id="release-and-verification"> |
| 425 | <h2><a class="toc-backref" href="#id5">1.3 Release and Verification</a></h2> |
| 426 | <p>Verification with the debug builds can be extremely slow. Hence verification |
| 427 | capabilities can be build with release builds:</p> |
| 428 | <pre class="literal-block"> |
| 429 | make BUILD=release |
| 430 | </pre> |
| 431 | </div> |
| 432 | <div class="section" id="compilers"> |
| 433 | <h2><a class="toc-backref" href="#id6">1.4 Compilers</a></h2> |
| 434 | <p>Currently only the GCC and Intel compiler under Linux are supported. Between |
| 435 | both configuration can be chosen via <tt class="docutils literal"><span class="pre">CONFIG=linux-gcc</span></tt> or |
| 436 | <tt class="docutils literal"><span class="pre">CONFIG=linux-intel</span></tt>.</p> |
| 437 | </div> |
| 438 | <div class="section" id="options-summary"> |
| 439 | <h2><a class="toc-backref" href="#id7">1.5 Options Summary</a></h2> |
| 440 | <p>Options that can be specified when building the framework with make:</p> |
| 441 | <table border="1" class="docutils"> |
| 442 | <colgroup> |
| 443 | <col width="8%" /> |
| 444 | <col width="13%" /> |
| 445 | <col width="7%" /> |
| 446 | <col width="72%" /> |
| 447 | </colgroup> |
| 448 | <tbody valign="top"> |
| 449 | <tr><td>name</td> |
| 450 | <td>values</td> |
| 451 | <td>default</td> |
| 452 | <td>description</td> |
| 453 | </tr> |
| 454 | <tr><td>TARCH</td> |
| 455 | <td>--</td> |
| 456 | <td>--</td> |
| 457 | <td>Via TARCH the architecture the compiler generates code for can be overriden. The value depends on the chose compiler.</td> |
| 458 | </tr> |
| 459 | <tr><td>BENCHMARK</td> |
| 460 | <td>on, off</td> |
| 461 | <td>off</td> |
| 462 | <td>If enabled, disables VERIFICATION, STATISTICS, VTK_OUTPUT.</td> |
| 463 | </tr> |
| 464 | <tr><td>BUILD</td> |
| 465 | <td>debug, release</td> |
| 466 | <td>debug</td> |
| 467 | <td>No optimization, debug symbols, DEBUG defined.</td> |
| 468 | </tr> |
| 469 | <tr><td>CONFIG</td> |
| 470 | <td>linux-gcc, linux-intel</td> |
| 471 | <td>linux-intel</td> |
| 472 | <td>Select GCC or Intel compiler.</td> |
| 473 | </tr> |
| 474 | <tr><td>ISA</td> |
| 475 | <td>avx, sse</td> |
| 476 | <td>avx</td> |
| 477 | <td>Determines which ISA extension is used for macro definitions. This is <em>not</em> the architecture the compiler generates code for.</td> |
| 478 | </tr> |
| 479 | <tr><td>OPENMP</td> |
| 480 | <td>on, off</td> |
| 481 | <td>on</td> |
| 482 | <td>OpenMP, i.,e.. threading support.</td> |
| 483 | </tr> |
| 484 | <tr><td>STATISTICS</td> |
| 485 | <td>on, off</td> |
| 486 | <td>off</td> |
| 487 | <td>View statistics, like density etc, during simulation.</td> |
| 488 | </tr> |
| 489 | <tr><td>VERIFICATION</td> |
| 490 | <td>on, off</td> |
| 491 | <td>off</td> |
| 492 | <td>Turn verification on/off.</td> |
| 493 | </tr> |
| 494 | <tr><td>VTK_OUTPUT</td> |
| 495 | <td>on, off</td> |
| 496 | <td>off</td> |
| 497 | <td>Enable/Disable VTK file output.</td> |
| 498 | </tr> |
| 499 | </tbody> |
| 500 | </table> |
| 501 | </div> |
| 502 | </div> |
| 503 | <div class="section" id="invocation"> |
| 504 | <h1><a class="toc-backref" href="#id8">2 Invocation</a></h1> |
| 505 | <p>Running the binary will print among the GPL licence header a line like the following:</p> |
| 506 | <blockquote> |
| 507 | LBM Benchmark Kernels 0.1, compiled Jul 5 2017 21:59:22, type: verification</blockquote> |
| 508 | <p>if verfication was enabled during compilation or</p> |
| 509 | <blockquote> |
| 510 | LBM Benchmark Kernels 0.1, compiled Jul 5 2017 21:59:22, type: benchmark</blockquote> |
| 511 | <p>if verfication was disabled during compilation.</p> |
| 512 | <div class="section" id="command-line-parameters"> |
| 513 | <h2><a class="toc-backref" href="#id9">2.1 Command Line Parameters</a></h2> |
| 514 | <p>Running the binary with <tt class="docutils literal"><span class="pre">-h</span></tt> list all available parameters:</p> |
| 515 | <pre class="literal-block"> |
| 516 | Usage: |
| 517 | ./lbmbenchk -list |
| 518 | ./lbmbenchk |
| 519 | [-dims XxYyZ] [-geometry box|channel|pipe|blocks[-<block size>]] [-iterations <iterations>] [-lattice-dump-ascii] |
| 520 | [-rho-in <density>] [-rho-out <density] [-omega <omega>] [-kernel <kernel>] |
| 521 | [-periodic-x] |
| 522 | [-t <number of threads>] |
| 523 | [-pin core{,core}*] |
| 524 | [-verify] |
| 525 | -- <kernel specific parameters> |
| 526 | |
| 527 | -list List available kernels. |
| 528 | |
| 529 | -dims XxYxZ Specify geometry dimensions. |
| 530 | |
| 531 | -geometry blocks-<block size> |
| 532 | Geometetry with blocks of size <block size> regularily layout out. |
| 533 | </pre> |
| 534 | <p>If an option is specified multiple times the last one overrides previous ones. |
| 535 | This holds also true for <tt class="docutils literal"><span class="pre">-verify</span></tt> which sets geometry dimensions, |
| 536 | iterations, etc, which can afterward be override, e.g.:</p> |
| 537 | <pre class="literal-block"> |
| 538 | $ bin/lbmbenchk-linux-intel-release -verfiy -dims 32x32x32 |
| 539 | </pre> |
| 540 | <p>Kernel specific parameters can be opatained via selecting the specific kernel |
| 541 | and passing <tt class="docutils literal"><span class="pre">-h</span></tt> as parameter:</p> |
| 542 | <pre class="literal-block"> |
| 543 | $ bin/lbmbenchk-linux-intel-release -kernel -- -h |
| 544 | ... |
| 545 | Kernel parameters: |
| 546 | [-blk <n>] [-blk-[xyz] <n>] |
| 547 | </pre> |
| 548 | <p>A list of all available kernels can be obtained via <tt class="docutils literal"><span class="pre">-list</span></tt>:</p> |
| 549 | <pre class="literal-block"> |
| 550 | $ ../bin/lbmbenchk-linux-gcc-debug -list |
| 551 | Lattice Boltzmann Benchmark Kernels (LbmBenchKernels) Copyright (C) 2016, 2017 LSS, RRZE |
| 552 | This program comes with ABSOLUTELY NO WARRANTY; for details see LICENSE. |
| 553 | This is free software, and you are welcome to redistribute it under certain conditions. |
| 554 | |
| 555 | LBM Benchmark Kernels 0.1, compiled Jul 5 2017 21:59:22, type: verification |
| 556 | Available kernels to benchmark: |
| 557 | list-aa-pv-soa |
| 558 | list-aa-ria-soa |
| 559 | list-aa-soa |
| 560 | list-aa-aos |
| 561 | list-pull-split-nt-1s-soa |
| 562 | list-pull-split-nt-2s-soa |
| 563 | list-push-soa |
| 564 | list-push-aos |
| 565 | list-pull-soa |
| 566 | list-pull-aos |
| 567 | push-soa |
| 568 | push-aos |
| 569 | pull-soa |
| 570 | pull-aos |
| 571 | blk-push-soa |
| 572 | blk-push-aos |
| 573 | blk-pull-soa |
| 574 | blk-pull-aos |
| 575 | </pre> |
| 576 | </div> |
| 577 | </div> |
| 578 | <div class="section" id="id1"> |
| 579 | <h1><a class="toc-backref" href="#id10">3 Benchmarking</a></h1> |
| 580 | <p>Correct benchmarking is a nontrivial task. Whenever benchmark results should be |
| 581 | created make sure the binary was compiled with:</p> |
| 582 | <ul class="simple"> |
| 583 | <li><tt class="docutils literal">BENCHMARK=on</tt> and</li> |
| 584 | <li><tt class="docutils literal">BUILD=release</tt> and</li> |
| 585 | <li>the correct ISA for macros is used, selected via <tt class="docutils literal">ISA</tt> and</li> |
| 586 | <li>use <tt class="docutils literal">TARCH</tt> to specify the architecture the compiler generates code for.</li> |
| 587 | </ul> |
| 588 | <p>During benchmarking pinning should be used via the <tt class="docutils literal"><span class="pre">-pin</span></tt> parameter. Running |
| 589 | a benchmark with 10 threads an pin them to the first 10 cores works like</p> |
| 590 | <pre class="literal-block"> |
| 591 | $ bin/lbmbenchk-linux-intel-release ... -t 10 -pin $(seq -s , 0 9) |
| 592 | </pre> |
| 593 | <p>Things the binary does nor check or controll:</p> |
| 594 | <ul class="simple"> |
| 595 | <li>transparent huge pages: when allocating memory small 4 KiB pages might be |
| 596 | replaced with larger ones. This is in general a good thing, but if this is |
| 597 | really the case, depends on the system settings.</li> |
| 598 | <li>CPU/core frequency: For reproducible results the frequency of all cores |
| 599 | should be fixed.</li> |
| 600 | <li>NUMA placement policy: The benchmark assumes a first touch policy, which |
| 601 | means the memory will be placed at the NUMA domain the touching core is |
| 602 | associated with. If a different policy is in place or the NUMA domain to be |
| 603 | used is already full memory might be allocated in a remote domain. Accesses |
| 604 | to remote domains typically have a higher latency and lower bandwidth.</li> |
| 605 | <li>System load: interference with other application, espcially on desktop |
| 606 | systems should be avoided.</li> |
| 607 | <li>Padding: most kernels do not care about padding against cache or TLB |
| 608 | thrashing. Even if the number of (fluid) nodes suggest everything is fine, |
| 609 | through parallelization still problems might occur.</li> |
| 610 | <li>CPU dispatcher function: the compiler might add different versions of a |
| 611 | function for different ISA extensions. Make sure the code you might think is |
| 612 | executed is actually the code which is executed.</li> |
| 613 | </ul> |
| 614 | </div> |
| 615 | <div class="section" id="acknowledgements"> |
| 616 | <h1><a class="toc-backref" href="#id11">4 Acknowledgements</a></h1> |
| 617 | <p>This work was funded by BMBF, grant no. 01IH15003A (project SKAMPY).</p> |
| 618 | <p>This work was funded by KONWHIR project OMI4PAPS.</p> |
| 619 | <p>Document was generated at 2017-10-26 09:43.</p> |
| 620 | </div> |
| 621 | </div> |
| 622 | </body> |
| 623 | </html> |