Commit | Line | Data |
---|---|---|
10988083 MW |
1 | // -------------------------------------------------------------------------- |
2 | // | |
3 | // Copyright | |
4 | // Markus Wittmann, 2016-2017 | |
5 | // RRZE, University of Erlangen-Nuremberg, Germany | |
6 | // markus.wittmann -at- fau.de or hpc -at- rrze.fau.de | |
7 | // | |
8 | // Viktor Haag, 2016 | |
9 | // LSS, University of Erlangen-Nuremberg, Germany | |
10 | // | |
11 | // This file is part of the Lattice Boltzmann Benchmark Kernels (LbmBenchKernels). | |
12 | // | |
13 | // LbmBenchKernels is free software: you can redistribute it and/or modify | |
14 | // it under the terms of the GNU General Public License as published by | |
15 | // the Free Software Foundation, either version 3 of the License, or | |
16 | // (at your option) any later version. | |
17 | // | |
18 | // LbmBenchKernels is distributed in the hope that it will be useful, | |
19 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 | // GNU General Public License for more details. | |
22 | // | |
23 | // You should have received a copy of the GNU General Public License | |
24 | // along with LbmBenchKernels. If not, see <http://www.gnu.org/licenses/>. | |
25 | // | |
26 | // -------------------------------------------------------------------------- | |
27 | #ifndef _GNU_SOURCE | |
28 | #define _GNU_SOURCE | |
29 | #endif | |
30 | #include <sched.h> | |
31 | #include <errno.h> | |
32 | ||
33 | ||
34 | #include "Base.h" | |
35 | #include "Pinning.h" | |
36 | ||
37 | ||
38 | ||
39 | ||
40 | // ----------------------------------------------------------------------- | |
41 | // | |
42 | // Binds the calling thread to specified core. | |
43 | // | |
44 | // Return value: 0 = success, else error. | |
45 | // | |
46 | // ----------------------------------------------------------------------- | |
47 | ||
48 | int PinCurrentThreadToCore(int coreNumber) | |
49 | { | |
50 | int error = 0; | |
51 | ||
52 | cpu_set_t cpu_set; | |
53 | CPU_ZERO(&cpu_set); | |
54 | CPU_SET(coreNumber, &cpu_set); | |
55 | ||
56 | error = sched_setaffinity((pid_t)0, sizeof(cpu_set_t), &cpu_set); | |
57 | ||
58 | if (error != 0) { | |
59 | Error("pinning thread to core %d failed (%d): %s\n", | |
60 | coreNumber, error, strerror(error)); | |
61 | } | |
62 | ||
63 | return error; | |
64 | } | |
65 | ||
66 | ||
67 | // ----------------------------------------------------------------------- | |
68 | // | |
69 | // Binds the calling thread to specified core by a cpu list specified | |
70 | // in the given environment variable. | |
71 | // | |
72 | // Return value: 0 = success, else error. | |
73 | // | |
74 | // ----------------------------------------------------------------------- | |
75 | ||
76 | int PinCurrentThreadByEnvVar(const char * envVarName, | |
77 | int mpiRank, int nodeRank, int threadNumber) | |
78 | { | |
79 | const char * envVarValue; | |
80 | int core; | |
81 | ||
82 | envVarValue = getenv(envVarName); | |
83 | ||
84 | if (envVarValue == NULL) { | |
85 | if (mpiRank == 0) { | |
86 | Print("skip pinning: env var %s not set\n", envVarName); | |
87 | } | |
88 | ||
89 | return 0; | |
90 | } | |
91 | ||
92 | core = PinParseCpuList(envVarValue, mpiRank, nodeRank, threadNumber); | |
93 | ||
94 | if (core < 0) { | |
95 | return core; | |
96 | } | |
97 | ||
98 | return PinCurrentThreadToCore(core); | |
99 | } | |
100 | ||
101 | ||
102 | // ----------------------------------------------------------------------- | |
103 | // | |
104 | // Binds the calling thread to a core specified in the CPU list. | |
105 | // | |
106 | // Return value: 0 = success, else error. | |
107 | // | |
108 | // ----------------------------------------------------------------------- | |
109 | ||
110 | int PinCurrentThreadByCpuList(const char * cpuList, | |
111 | int mpiRank, int nodeRank, int threadNumber) | |
112 | { | |
113 | int core; | |
114 | ||
115 | if (cpuList == NULL) { | |
116 | if (mpiRank == 0) { | |
117 | printf("ERROR: cpu list is NULL.\n"); | |
118 | } | |
119 | ||
120 | exit(1); | |
121 | } | |
122 | ||
123 | core = PinParseCpuList(cpuList, mpiRank, nodeRank, threadNumber); | |
124 | ||
125 | if (core < 0) { | |
126 | return core; | |
127 | } | |
128 | ||
129 | return PinCurrentThreadToCore(core); | |
130 | } | |
131 | ||
132 | ||
133 | // ----------------------------------------------------------------------- | |
134 | // | |
135 | // Parses the provided cpu list and returns the core number for the | |
136 | // specified MPI rank, local rank, and thread. | |
137 | // | |
138 | // The cpu list has for example a format of: 0,1,2 or 0,1,2_3,4,5 | |
139 | // | |
140 | // Blocks (0,1,2 or 3,4,5) separated by "_" specify pinning inside a | |
141 | // node rank. The first block maps to node rank 1, the second to node | |
142 | // rank 2, etc. | |
143 | // | |
144 | // Inside a block the core numbers specify where the threads should | |
145 | // be pinned to. They are separated by "," and the first number maps | |
146 | // to the first core, the second number to the second core, etc. | |
147 | // | |
148 | // For example: 0,2,4_6,8,10 | |
149 | // | |
150 | // Node rank 0 thread 0 pinned to core 0 | |
151 | // 0 1 2 | |
152 | // 0 2 4 | |
153 | // 1 0 6 | |
154 | // 1 1 8 | |
155 | // 1 2 10 | |
156 | // | |
157 | // ----------------------------------------------------------------------- | |
158 | ||
159 | int PinParseCpuList(const char * cpuList, | |
160 | int mpiRank, int nodeRank, int threadNumber) | |
161 | { | |
162 | int cpu = -1; | |
163 | ||
164 | if (cpuList == NULL) { | |
165 | return -1; | |
166 | } | |
167 | ||
168 | const char * c = cpuList; | |
169 | ||
170 | // Ensure only valid characters are in the cpu list. | |
171 | // Cpu list is in the format of "0,1,2_3,4,5". | |
172 | while (((*c >= '0' && *c <= '9') || *c == ',' || *c == '_')) { | |
173 | ++c; | |
174 | } | |
175 | ||
176 | if (*c != 0x00) { | |
177 | // Invalid character detected. | |
178 | return -2; | |
179 | } | |
180 | ||
181 | c = cpuList; | |
182 | ||
183 | int i = 0; | |
184 | ||
185 | // Move variable c after the "nodeRank"th "_" in the cpu list. | |
186 | while (i < nodeRank && *c != 0x00) { | |
187 | if (*c == '_') ++i; | |
188 | ++c; | |
189 | } | |
190 | ||
191 | if (i != nodeRank || *c < '0' || *c > '9') { | |
192 | // Cpu list for this node rank not found. | |
193 | return -3; | |
194 | } | |
195 | ||
196 | // Now find the core for the specified thread. | |
197 | ||
198 | int t = 0; | |
199 | ||
200 | while (t < threadNumber && *c != 0x00) { | |
201 | if (*c == ',') { | |
202 | ++t; | |
203 | } | |
204 | else if (*c == '_') { | |
205 | // Unexpected character at this position. | |
206 | break; | |
207 | } | |
208 | ||
209 | ++c; | |
210 | } | |
211 | ||
212 | if (t != threadNumber || *c < '0' || *c > '9') { | |
213 | // Cpu for this threadNumber not found. | |
214 | return -4; | |
215 | } | |
216 | ||
217 | cpu = atoi(c); | |
218 | ||
219 | return cpu; | |
220 | } | |
221 | ||
222 | ||
223 | ||
224 | // ----------------------------------------------------------------------- | |
225 | // | |
226 | // Returns the first core from the calling thread's affinity set. | |
227 | // | |
228 | // On error a value < 0 is returned. | |
229 | // | |
230 | // ----------------------------------------------------------------------- | |
231 | ||
232 | int PinCurrentCore() | |
233 | { | |
234 | int core = -1; | |
235 | int err; | |
236 | ||
237 | cpu_set_t cpu_set; | |
238 | CPU_ZERO(&cpu_set); | |
239 | ||
240 | err = sched_getaffinity((pid_t)0, sizeof(cpu_set_t), &cpu_set); | |
241 | ||
242 | // constant CPU_SETSIZE is one larger than the maximum CPU | |
243 | // number that can be stored in a CPU set | |
244 | for (int i = 0; i < CPU_SETSIZE; ++i) { | |
245 | if (CPU_ISSET(i, &cpu_set)) { | |
246 | core = i; | |
247 | break; | |
248 | } | |
249 | } | |
250 | ||
251 | if (err != 0) { | |
252 | Error("getting thread affinty failed (%d): %s\n", err, strerror(err)); | |
253 | return -1; | |
254 | } | |
255 | ||
256 | return core; | |
257 | } | |
258 | ||
259 | ||
260 | ||
261 | // ----------------------------------------------------------------------- | |
262 | // | |
263 | // Returns the all cores from the calling thread's affinity set. | |
264 | // | |
265 | // On error a value < 0 is returned. | |
266 | // | |
267 | // ----------------------------------------------------------------------- | |
268 | ||
269 | typedef cpu_set_t CpuSet; | |
270 | ||
271 | ||
272 | static CpuSet PinCurrentCores() | |
273 | { | |
274 | CpuSet cpuSet; | |
275 | int err; | |
276 | ||
277 | cpu_set_t cpu_set; | |
278 | CPU_ZERO(&cpu_set); | |
279 | ||
280 | err = sched_getaffinity((pid_t)0, sizeof(cpu_set_t), &cpu_set); | |
281 | ||
282 | cpuSet = cpu_set; | |
283 | ||
284 | if (err != 0) { | |
285 | Error("getting thread affinty failed (%d): %s\n", err, strerror(err)); | |
286 | return cpuSet; | |
287 | } | |
288 | ||
289 | return cpuSet; | |
290 | } | |
291 | ||
292 | static char * CpuSetToString(cpu_set_t * cpu_set) | |
293 | { | |
294 | int previousSetCore = -2; | |
295 | int rangeBeginCore = -2; | |
296 | ||
297 | char * buffer1 = (char *)malloc(1024); | |
298 | Assert(buffer1 != NULL); | |
299 | char * buffer2 = (char *)malloc(1024); | |
300 | Assert(buffer2 != NULL); | |
301 | ||
302 | buffer1[0] = 0x00; | |
303 | buffer2[0] = 0x00; | |
304 | ||
305 | char * buffer = buffer1; | |
306 | char * bufferOld = buffer2; | |
307 | ||
308 | const char * empty = ""; | |
309 | const char * realComma = ","; | |
310 | const char * comma = empty; | |
311 | ||
312 | // TODO: use snprintf | |
313 | // TODO: increase allocated buffer if necessary | |
314 | ||
315 | for (int i = 0; i < CPU_SETSIZE; ++i) { | |
316 | if (!CPU_ISSET(i, cpu_set)) { | |
317 | continue; | |
318 | } | |
319 | ||
320 | if (i == previousSetCore + 1) { | |
321 | previousSetCore = i; | |
322 | continue; | |
323 | } | |
324 | ||
325 | // Now we reached the end of a range. | |
326 | // The range can also consist of only one core. | |
327 | // Be aware, that this core is not part of the range. | |
328 | ||
329 | // TODO: this code is repeated below -> use it only once | |
330 | if (rangeBeginCore >= 0 && previousSetCore >= 0) { | |
331 | char * tmp; | |
332 | ||
333 | tmp = buffer; | |
334 | buffer = bufferOld; | |
335 | bufferOld = tmp; | |
336 | ||
337 | if (rangeBeginCore < previousSetCore) { | |
338 | sprintf(buffer, "%s%s%d-%d", bufferOld, comma, rangeBeginCore, previousSetCore); | |
339 | } | |
340 | else { | |
341 | sprintf(buffer, "%s%s%d", bufferOld, comma, previousSetCore); | |
342 | } | |
343 | ||
344 | comma = realComma; | |
345 | } | |
346 | ||
347 | // With this core a new range begins. | |
348 | rangeBeginCore = i; | |
349 | previousSetCore = i; | |
350 | } | |
351 | ||
352 | if (rangeBeginCore >= 0 && previousSetCore >= 0) { | |
353 | char * tmp; | |
354 | ||
355 | tmp = buffer; | |
356 | buffer = bufferOld; | |
357 | bufferOld = tmp; | |
358 | ||
359 | if (rangeBeginCore < previousSetCore) { | |
360 | sprintf(buffer, "%s%s%d-%d", bufferOld, comma, rangeBeginCore, previousSetCore); | |
361 | } | |
362 | else { | |
363 | sprintf(buffer, "%s%s%d", bufferOld, comma, previousSetCore); | |
364 | } | |
365 | } | |
366 | ||
367 | free(bufferOld); bufferOld = NULL; | |
368 | ||
369 | return buffer; | |
370 | } | |
371 | ||
372 | char * PinCpuListAsString() | |
373 | { | |
374 | CpuSet cpuSet = PinCurrentCores(); | |
375 | ||
376 | return CpuSetToString(&cpuSet); | |
377 | } | |
378 | ||
379 | #ifdef TEST | |
380 | ||
381 | int main(int argc, char * argv[]) | |
382 | { | |
383 | char * cpuList = PinCpuListAsString(); | |
384 | ||
385 | printf("pinned to cores: %s\n", cpuList); | |
386 | ||
387 | free(cpuList); cpuList = NULL; | |
388 | ||
389 | return 0; | |
390 | } | |
391 | ||
392 | #endif // TEST | |
393 |