source: mainline/uspace/app/perf/perf.c@ b4a4ad94

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since b4a4ad94 was b4a4ad94, checked in by Vojtech Horky <vojtech.horky@…>, 7 years ago

perf: compute average throughput correctly

The correct way to compute average throughput is to use geometric mean.
See comment in the source code for an example why it gives more
meaningful results.

Also added a simplified square root approximation (using Babylonian
method) to compute standard deviation.

Note that the diff is awful as the context did not catch on the
compute_stats() function.

  • Property mode set to 100644
File size: 8.6 KB
Line 
1/*
2 * Copyright (c) 2018 Jiri Svoboda
3 * Copyright (c) 2018 Vojtech Horky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup perf
31 * @{
32 */
33/**
34 * @file
35 */
36
37#include <assert.h>
38#include <math.h>
39#include <stdio.h>
40#include <stddef.h>
41#include <stdlib.h>
42#include <str.h>
43#include <time.h>
44#include <errno.h>
45#include <perf.h>
46#include <types/casting.h>
47#include "perf.h"
48#include "benchlist.h"
49
50#define MIN_DURATION_SECS 10
51#define NUM_SAMPLES 10
52#define MAX_ERROR_STR_LENGTH 1024
53
54static void short_report(stopwatch_t *stopwatch, int run_index,
55 benchmark_t *bench, uint64_t workload_size)
56{
57 usec_t duration_usec = NSEC2USEC(stopwatch_get_nanos(stopwatch));
58
59 printf("Completed %" PRIu64 " operations in %llu us",
60 workload_size, duration_usec);
61 if (duration_usec > 0) {
62 double nanos = stopwatch_get_nanos(stopwatch);
63 double thruput = (double) workload_size / (nanos / 1000000000.0l);
64 printf(", %.0f ops/s.\n", thruput);
65 } else {
66 printf(".\n");
67 }
68}
69
70/*
71 * This is a temporary solution until we have proper sqrt() implementation
72 * in libmath.
73 *
74 * The algorithm uses Babylonian method [1].
75 *
76 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method
77 */
78static double estimate_square_root(double value, double precision)
79{
80 double estimate = 1.;
81 double prev_estimate = estimate + 10 * precision;
82
83 while (fabs(estimate - prev_estimate) > precision) {
84 prev_estimate = estimate;
85 estimate = (prev_estimate + value / prev_estimate) / 2.;
86 }
87
88 return estimate;
89}
90
91/*
92 * Compute available statistics from given stopwatches.
93 *
94 * We compute normal mean for average duration of the workload and geometric
95 * mean for average thruput. Note that geometric mean is necessary to compute
96 * average throughput correctly - consider the following example:
97 * - we run always 60 operations,
98 * - first run executes in 30 s (i.e. 2 ops/s)
99 * - and second one in 10 s (6 ops/s).
100 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we
101 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average
102 * throughput is 3 ops/s (which is exactly what geometric mean means).
103 *
104 */
105static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
106 uint64_t workload_size, double precision, double *out_duration_avg,
107 double *out_duration_sigma, double *out_thruput_avg)
108{
109 double inv_thruput_sum = 0.0;
110 double nanos_sum = 0.0;
111 double nanos_sum2 = 0.0;
112
113 for (size_t i = 0; i < stopwatch_count; i++) {
114 double nanos = stopwatch_get_nanos(&stopwatch[i]);
115 double thruput = (double) workload_size / nanos;
116
117 inv_thruput_sum += 1.0 / thruput;
118 nanos_sum += nanos;
119 nanos_sum2 += nanos * nanos;
120 }
121 *out_duration_avg = nanos_sum / stopwatch_count;
122 double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) /
123 ((double) stopwatch_count - 1);
124 // FIXME: implement sqrt properly
125 *out_duration_sigma = estimate_square_root(sigma2, precision);
126 *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count);
127}
128
129static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
130 benchmark_t *bench, uint64_t workload_size)
131{
132 double duration_avg, duration_sigma, thruput_avg;
133 compute_stats(stopwatch, stopwatch_count, workload_size, 0.001,
134 &duration_avg, &duration_sigma, &thruput_avg);
135
136 printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); "
137 "%.0f ops/s; Samples: %zu\n",
138 workload_size, duration_avg / 1000.0, duration_sigma / 1000.0,
139 thruput_avg * 1000000000.0, stopwatch_count);
140}
141
142static bool run_benchmark(benchmark_t *bench)
143{
144 printf("Warm up and determine workload size...\n");
145
146 char *error_msg = malloc(MAX_ERROR_STR_LENGTH + 1);
147 if (error_msg == NULL) {
148 printf("Out of memory!\n");
149 return false;
150 }
151 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "");
152
153 bool ret = true;
154
155 if (bench->setup != NULL) {
156 ret = bench->setup(error_msg, MAX_ERROR_STR_LENGTH);
157 if (!ret) {
158 goto leave_error;
159 }
160 }
161
162 /*
163 * Find workload size that is big enough to last few seconds.
164 * We also check that uint64_t is big enough.
165 */
166 uint64_t workload_size = 0;
167 for (size_t bits = 0; bits <= 64; bits++) {
168 if (bits == 64) {
169 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "Workload too small even for 1 << 63");
170 goto leave_error;
171 }
172 workload_size = ((uint64_t) 1) << bits;
173
174 stopwatch_t stopwatch = STOPWATCH_INITIALIZE_STATIC;
175
176 bool ok = bench->entry(&stopwatch, workload_size,
177 error_msg, MAX_ERROR_STR_LENGTH);
178 if (!ok) {
179 goto leave_error;
180 }
181 short_report(&stopwatch, -1, bench, workload_size);
182
183 nsec_t duration = stopwatch_get_nanos(&stopwatch);
184 if (duration > SEC2NSEC(MIN_DURATION_SECS)) {
185 break;
186 }
187 }
188
189 printf("Workload size set to %" PRIu64 ", measuring %d samples.\n", workload_size, NUM_SAMPLES);
190
191 stopwatch_t *stopwatch = calloc(NUM_SAMPLES, sizeof(stopwatch_t));
192 if (stopwatch == NULL) {
193 snprintf(error_msg, MAX_ERROR_STR_LENGTH, "failed allocating memory");
194 goto leave_error;
195 }
196 for (int i = 0; i < NUM_SAMPLES; i++) {
197 stopwatch_init(&stopwatch[i]);
198
199 bool ok = bench->entry(&stopwatch[i], workload_size,
200 error_msg, MAX_ERROR_STR_LENGTH);
201 if (!ok) {
202 free(stopwatch);
203 goto leave_error;
204 }
205 short_report(&stopwatch[i], i, bench, workload_size);
206 }
207
208 summary_stats(stopwatch, NUM_SAMPLES, bench, workload_size);
209 printf("\nBenchmark completed\n");
210
211 free(stopwatch);
212
213 goto leave;
214
215leave_error:
216 printf("Error: %s\n", error_msg);
217 ret = false;
218
219leave:
220 if (bench->teardown != NULL) {
221 bool ok = bench->teardown(error_msg, MAX_ERROR_STR_LENGTH);
222 if (!ok) {
223 printf("Error: %s\n", error_msg);
224 ret = false;
225 }
226 }
227
228 free(error_msg);
229
230 return ret;
231}
232
233static int run_benchmarks(void)
234{
235 unsigned int count_ok = 0;
236 unsigned int count_fail = 0;
237
238 char *failed_names = NULL;
239
240 printf("\n*** Running all benchmarks ***\n\n");
241
242 for (size_t it = 0; it < benchmark_count; it++) {
243 printf("%s (%s)\n", benchmarks[it]->name, benchmarks[it]->desc);
244 if (run_benchmark(benchmarks[it])) {
245 count_ok++;
246 continue;
247 }
248
249 if (!failed_names) {
250 failed_names = str_dup(benchmarks[it]->name);
251 } else {
252 char *f = NULL;
253 asprintf(&f, "%s, %s", failed_names, benchmarks[it]->name);
254 if (!f) {
255 printf("Out of memory.\n");
256 abort();
257 }
258 free(failed_names);
259 failed_names = f;
260 }
261 count_fail++;
262 }
263
264 printf("\nCompleted, %u benchmarks run, %u succeeded.\n",
265 count_ok + count_fail, count_ok);
266 if (failed_names)
267 printf("Failed benchmarks: %s\n", failed_names);
268
269 return count_fail;
270}
271
272static void list_benchmarks(void)
273{
274 size_t len = 0;
275 for (size_t i = 0; i < benchmark_count; i++) {
276 size_t len_now = str_length(benchmarks[i]->name);
277 if (len_now > len)
278 len = len_now;
279 }
280
281 assert(can_cast_size_t_to_int(len) && "benchmark name length overflow");
282
283 for (size_t i = 0; i < benchmark_count; i++)
284 printf("%-*s %s\n", (int) len, benchmarks[i]->name, benchmarks[i]->desc);
285
286 printf("%-*s Run all benchmarks\n", (int) len, "*");
287}
288
289int main(int argc, char *argv[])
290{
291 if (argc < 2) {
292 printf("Usage:\n\n");
293 printf("%s <benchmark>\n\n", argv[0]);
294 list_benchmarks();
295 return 0;
296 }
297
298 if (str_cmp(argv[1], "*") == 0) {
299 return run_benchmarks();
300 }
301
302 for (size_t i = 0; i < benchmark_count; i++) {
303 if (str_cmp(argv[1], benchmarks[i]->name) == 0) {
304 return (run_benchmark(benchmarks[i]) ? 0 : -1);
305 }
306 }
307
308 printf("Unknown benchmark \"%s\"\n", argv[1]);
309 return -2;
310}
311
312/** @}
313 */
Note: See TracBrowser for help on using the repository browser.