Loading...
Searching...
No Matches
profilingPstream.C
Go to the documentation of this file.
1/*---------------------------------------------------------------------------*\
2 ========= |
3 \\ / F ield | OpenFOAM: The Open Source CFD Toolbox
4 \\ / O peration |
5 \\ / A nd | www.openfoam.com
6 \\/ M anipulation |
7-------------------------------------------------------------------------------
8 Copyright (C) 2019-2023 OpenCFD Ltd.
9-------------------------------------------------------------------------------
10License
11 This file is part of OpenFOAM.
12
13 OpenFOAM is free software: you can redistribute it and/or modify it
14 under the terms of the GNU General Public License as published by
15 the Free Software Foundation, either version 3 of the License, or
16 (at your option) any later version.
17
18 OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
21 for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
25
26\*---------------------------------------------------------------------------*/
27
28#include "profilingPstream.H"
29#include "List.H"
30#include "Tuple2.H"
31#include "UPstream.H"
32
33// * * * * * * * * * * * * * * Static Data Members * * * * * * * * * * * * * //
34
35std::unique_ptr<Foam::cpuTime> Foam::profilingPstream::timer_(nullptr);
36
37bool Foam::profilingPstream::suspend_(false);
39Foam::profilingPstream::timingList Foam::profilingPstream::times_(double(0));
40Foam::profilingPstream::countList Foam::profilingPstream::counts_(uint64_t(0));
41
42
43// * * * * * * * * * * * * * Static Member Functions * * * * * * * * * * * * //
44
46{
47 if (!timer_)
48 {
49 timer_.reset(new cpuTime);
50 times_ = double(0);
51 counts_ = uint64_t(0);
52 }
53 suspend_ = false;
54}
55
56
58{
59 timer_.reset(nullptr);
60 suspend_ = false;
61}
62
63
65{
66 times_ = double(0);
67 counts_ = uint64_t(0);
68}
69
70
72{
73 double total = 0;
74 for (const double val : times_)
75 {
76 total += val;
77 }
78
79 return total;
80}
81
82
83// * * * * * * * * * * * * * * * Local Functions * * * * * * * * * * * * * * //
84
85namespace Foam
86{
87
88// Loop over all values (with striding) and extract the value at given index
89template<class Type>
90inline static void extractValues
91(
92 UList<Type>& result,
93 const int index,
94 const UList<Type>& allValues
95)
96{
97 if (result.empty())
98 {
99 return;
100 }
101
102 const label numProc = result.size();
103 const Type* values = allValues.cbegin();
104 const label stride = allValues.size() / numProc;
105
106 if (!values || !stride)
107 {
108 result = Type(0);
109 return;
110 }
111
112 for (label proci = 0; proci < numProc; ++proci, values += stride)
113 {
114 result[proci] = values[index];
116}
117
118
119// Loop over all values (with striding) and extract combined value
120// using the given unary function
121template<class Type, class Extract>
122inline static void extractValues
123(
124 UList<Type>& result,
125 const UList<Type>& allValues,
126 const Extract& extract
127)
128{
129 if (result.empty())
130 {
131 return;
132 }
133
134 const label numProc = result.size();
135 const Type* values = allValues.cbegin();
136 const label stride = allValues.size() / numProc;
137
138 if (!values || !stride)
139 {
140 result = Type(0);
141 return;
142 }
143
144 for (label proci = 0; proci < numProc; ++proci, values += stride)
145 {
146 result[proci] = extract(values);
147 }
148}
149
150
151inline static void printTimingDetail(const UList<double>& values)
152{
153 const label numProc = values.size();
154
155 if (numProc)
156 {
157 Info<< indent << " times " << numProc << '(';
158
159 for (label proci = 0; proci < numProc; ++proci)
160 {
161 if (proci) Info<< ' ';
162 Info<< values[proci];
164
165 Info<< ')' << nl;
166 }
167}
168
169
170inline static void printTimingDetail(const UList<uint64_t>& values)
171{
172 const label numProc = values.size();
173
174 if (numProc)
175 {
176 // Output via std::ostream to avoid conversion to Foam::label
177 // that Ostream performs
178
179 auto& os = Info.stdStream();
180
181 Info<< indent << " counts " << numProc << '(';
182
183 for (label proci = 0; proci < numProc; ++proci)
184 {
185 if (proci) os << ' ';
186 os << values[proci];
187 }
188
189 Info<< ')' << nl;
190 }
192
193} // End namespace Foam
194
195
196// * * * * * * * * * * * * * * * Member Functions * * * * * * * * * * * * * //
197
198void Foam::profilingPstream::report(const int reportLevel)
199{
200 const label numProc = (UPstream::parRun() ? UPstream::nProcs() : 1);
201
202 if (numProc < 2)
203 {
204 return;
205 }
206
207 // Use mpiGather on all values and perform the combinations
208 // and statistics locally. This reduces the overall number of MPI
209 // calls. For detailed output we need this information anyhow.
210
211 // NB: profilingPstream uses a FixedList for timings(), counts()
212 // so sizes are guaranteed to be consistent and identical everywhere.
213
214 List<double> allTimes;
215 List<uint64_t> allCounts;
216
217 // Avoid disturbing any information
218 const bool oldSuspend = suspend();
219
220 {
221 // The timings
222 const auto& procValues = times_;
223
224 if (UPstream::master())
225 {
226 allTimes.resize(numProc * procValues.size());
227 }
228
230 (
231 procValues.cdata(), // Send
232 allTimes.data(), // Recv
233 procValues.size(), // Num send/recv data per rank
235 );
236 }
237
238 if (reportLevel > 1)
239 {
240 // The counts
241 const auto& procValues = counts_;
242
243 if (UPstream::master())
244 {
245 allCounts.resize(numProc * procValues.size());
246 }
247
249 (
250 procValues.cdata(), // Send
251 allCounts.data(), // Recv
252 procValues.size(), // Num send/recv data per rank
254 );
255 }
256
257 // Resume if not previously suspended
258 if (!oldSuspend)
259 {
260 resume();
261 }
262
263
264 // (Time, Processor) for each of: min/max/sum(avg)
265 typedef FixedList<Tuple2<double, int>, 3> statData;
266
267 // Extract min/max/average
268 auto calcStats = [](const UList<double>& data) -> statData
269 {
270 statData stats;
271 stats = Tuple2<double, int>((data.empty() ? 0 : data[0]), 0);
272
273 const label np = data.size();
274 for (label proci = 1; proci < np; ++proci)
275 {
276 Tuple2<double, int> tup(data[proci], proci);
277
278 // 0: min, 1: max, 2: total(avg)
279 if (stats[0].first() > tup.first()) stats[0] = tup;
280 if (stats[1].first() < tup.first()) stats[1] = tup;
281 stats[2].first() += tup.first();
282 }
283
284 // From total -> average value
285 if (np) { stats[2].first() /= np; }
286
287 return stats;
288 };
289
290
291 const auto printTimingStats =
292 [&](Ostream& os, const char* tag, const statData& stats)
293 {
294 os << indent << tag << ": avg = " << stats[2].first()
295 << ", min = " << stats[0].first()
296 << " (proc " << stats[0].second() << ')'
297 << ", max = " << stats[1].first()
298 << " (proc " << stats[1].second() << ')'
299 << nl;
300 };
301
302
303 if (UPstream::master())
304 {
305 Info<< "profiling(parallel):" << nl
306 << incrIndent;
307
308 statData stats;
309 List<double> extractedTimes(numProc);
310 List<uint64_t> extractedCounts;
311
312 if (reportLevel > 1)
313 {
314 extractedCounts.resize(numProc);
315 }
316
317 // Total times
318 {
320 (
321 extractedTimes,
322 allTimes,
323 [=](const double values[])
324 {
325 double total = 0;
326 for (unsigned i = 0; i < timingType::nCategories; ++i)
327 {
328 total += values[i];
329 }
330 return total;
331 }
332 );
333 stats = calcStats(extractedTimes);
334
335 printTimingStats(Info(), "total ", stats);
336 if (reportLevel > 0) printTimingDetail(extractedTimes);
337 }
338
339 // all-all
340 {
341 const int index = int(timingType::ALL_TO_ALL);
342
343 extractValues(extractedTimes, index, allTimes);
344 extractValues(extractedCounts, index, allCounts);
345 stats = calcStats(extractedTimes);
346
347 printTimingStats(Info(), "all-all ", stats);
348 if (reportLevel > 0) printTimingDetail(extractedTimes);
349 if (reportLevel > 1) printTimingDetail(extractedCounts);
350 }
351
352 // broadcast
353 {
354 const int index = int(timingType::BROADCAST);
355
356 extractValues(extractedTimes, index, allTimes);
357 extractValues(extractedCounts, index, allCounts);
358 stats = calcStats(extractedTimes);
359
360 printTimingStats(Info(), "broadcast ", stats);
361 if (reportLevel > 0) printTimingDetail(extractedTimes);
362 if (reportLevel > 1) printTimingDetail(extractedCounts);
363 }
364
365 // probe
366 {
367 const int index = int(timingType::PROBE);
368
369 extractValues(extractedTimes, index, allTimes);
370 extractValues(extractedCounts, index, allCounts);
371 stats = calcStats(extractedTimes);
372
373 printTimingStats(Info(), "probe ", stats);
374 if (reportLevel > 0) printTimingDetail(extractedTimes);
375 if (reportLevel > 1) printTimingDetail(extractedCounts);
376 }
377
378 // Reduce/scatter times
379 {
380 // const int index = int(timingType::REDUCE);
381
383 (
384 extractedTimes,
385 allTimes,
386 [=](const double values[])
387 {
388 return
389 (
390 values[timingType::REDUCE]
391 + values[timingType::GATHER]
392 + values[timingType::SCATTER]
393 );
394 }
395 );
397 (
398 extractedCounts,
399 allCounts,
400 [=](const uint64_t values[])
401 {
402 return
403 (
404 values[timingType::REDUCE]
405 + values[timingType::GATHER]
406 + values[timingType::SCATTER]
407 );
408 }
409 );
410 stats = calcStats(extractedTimes);
411
412 printTimingStats(Info(), "reduce ", stats);
413 if (reportLevel > 0) printTimingDetail(extractedTimes);
414 if (reportLevel > 1) printTimingDetail(extractedCounts);
415 }
416
417 // Recv/send times
418 #if 0 // FUTURE?
419 {
420 // const int index = int(timingType::RECV);
421
423 (
424 extractedTimes,
425 allTimes,
426 [=](const double values[])
427 {
428 return
429 (
430 values[timingType::RECV]
431 + values[timingType::SEND]
432 );
433 }
434 );
436 (
437 extractedCounts,
438 allCounts,
439 [=](const uint64_t values[])
440 {
441 return
442 (
443 values[timingType::RECV]
444 + values[timingType::SEND]
445 );
446 }
447 );
448 stats = calcStats(extractedTimes);
449
450 printTimingStats(Info(), "send/recv ", stats);
451 if (reportLevel > 0) printTimingDetail(extractedTimes);
452 if (reportLevel > 1) printTimingDetail(extractedCounts);
453 }
454 #endif
455
456 // request
457 {
458 const int index = int(timingType::REQUEST);
459
460 extractValues(extractedTimes, index, allTimes);
461 extractValues(extractedCounts, index, allCounts);
462 stats = calcStats(extractedTimes);
463
464 printTimingStats(Info(), "request ", stats);
465
466 if (reportLevel > 0) printTimingDetail(extractedTimes);
467 if (reportLevel > 1) printTimingDetail(extractedCounts);
468 }
469
470 // wait
471 {
472 const int index = int(timingType::WAIT);
473
474 extractValues(extractedTimes, index, allTimes);
475 extractValues(extractedCounts, index, allCounts);
476 stats = calcStats(extractedTimes);
477
478 printTimingStats(Info(), "wait ", stats);
479
480 if (reportLevel > 0) printTimingDetail(extractedTimes);
481 if (reportLevel > 1) printTimingDetail(extractedCounts);
482 }
483
485 }
486}
487
488
489// ************************************************************************* //
A 1D vector of objects of type <T> with a fixed length <N>.
Definition FixedList.H:73
A 1D array of objects of type <T>, where the size of the vector is known and used for subscript bound...
Definition List.H:72
void resize(const label len)
Adjust allocated size of list.
Definition ListI.H:153
An Ostream is an abstract base class for all output systems (streams, files, token lists,...
Definition Ostream.H:59
A 2-tuple for storing two objects of dissimilar types. The container is similar in purpose to std::pa...
Definition Tuple2.H:51
const T1 & first() const noexcept
Access the first element.
Definition Tuple2.H:132
A 1D vector of objects of type <T>, where the size of the vector is known and can be used for subscri...
Definition UList.H:89
bool empty() const noexcept
True if List is empty (ie, size() is zero).
Definition UList.H:701
const_iterator cbegin() const noexcept
Return const_iterator to begin traversing the constant UList.
Definition UListI.H:424
T * data() noexcept
Return pointer to the underlying array serving as data storage.
Definition UListI.H:274
void size(const label n)
Older name for setAddressableSize.
Definition UList.H:118
static label commWorld() noexcept
Communicator for all ranks (respecting any local worlds).
Definition UPstream.H:1101
static void mpiGather(const Type *sendData, Type *recvData, int count, const int communicator=UPstream::worldComm)
Receive identically-sized (contiguous) data from all ranks.
static bool master(const label communicator=worldComm)
True if process corresponds to the master rank in the communicator.
Definition UPstream.H:1714
static label nProcs(const label communicator=worldComm)
Number of ranks in parallel run (for given communicator). It is 1 for serial run.
Definition UPstream.H:1697
static bool & parRun() noexcept
Test if this a parallel run.
Definition UPstream.H:1681
static void disable() noexcept
Remove timer for measuring communication activity. Does not affect times/counts.
FixedList< double, timingType::nCategories > timingList
Fixed-size container for timing values.
static void enable()
Create timer for measuring communication or un-suspend existing.
static void resume() noexcept
Resume use of timer (if previously active).
FixedList< uint64_t, timingType::nCategories > countList
Fixed-size container for timing counts.
static void reset()
Reset times/counts. Does not affect the timer itself.
static void report(const int reportLevel=0)
Report current information. Uses parallel communication!
static bool suspend() noexcept
Suspend use of timer. Return old status.
static double elapsedTime()
The total of times.
OBJstream os(runTime.globalPath()/outputName)
List< T > values(const HashTable< T, Key, Hash > &tbl, const bool doSort=false)
List of values from HashTable, optionally sorted.
Definition HashOps.H:164
Namespace for OpenFOAM.
static void extractValues(UList< Type > &result, const int index, const UList< Type > &allValues)
static List< T > extract(const word &key, const UPtrList< entry > &entries, const T &initValue)
messageStream Info
Information stream (stdout output on master, null elsewhere).
static void printTimingDetail(const UList< double > &values)
Ostream & incrIndent(Ostream &os)
Increment the indent level.
Definition Ostream.H:490
Ostream & indent(Ostream &os)
Indent stream.
Definition Ostream.H:481
const direction noexcept
Definition scalarImpl.H:265
Ostream & decrIndent(Ostream &os)
Decrement the indent level.
Definition Ostream.H:499
cpuTimePosix cpuTime
Selection of preferred clock mechanism for the elapsed cpu time.
Definition cpuTimeFwd.H:38
constexpr char nl
The newline '\n' character (0x0a).
Definition Ostream.H:50