source: mainline/uspace/app/perf/perf.c@ f85546d

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since f85546d was f85546d, checked in by Vojtech Horky <vojtech.horky@…>, 6 years ago

perf: add benchmark parameters

Added API for extra benchmark parameters that could modify their
behaviour. The API is not used by any of the existing benchmarks (new
benchmarks coming in next commits).

  • Property mode set to 100644
File size: 10.5 KB
Line 
1/*
2 * Copyright (c) 2018 Jiri Svoboda
3 * Copyright (c) 2018 Vojtech Horky
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * - Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * - Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * - The name of the author may not be used to endorse or promote products
16 * derived from this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/** @addtogroup perf
31 * @{
32 */
33/**
34 * @file
35 */
36
37#include <assert.h>
38#include <getopt.h>
39#include <math.h>
40#include <stdio.h>
41#include <stddef.h>
42#include <stdlib.h>
43#include <str.h>
44#include <time.h>
45#include <errno.h>
46#include <str_error.h>
47#include <perf.h>
48#include <types/casting.h>
49#include "benchlist.h"
50#include "csv.h"
51#include "params.h"
52#include "perf.h"
53
54#define MIN_DURATION_SECS 10
55#define NUM_SAMPLES 10
56#define MAX_ERROR_STR_LENGTH 1024
57
58static void short_report(stopwatch_t *stopwatch, int run_index,
59 benchmark_t *bench, uint64_t workload_size)
60{
61 csv_report_add_entry(stopwatch, run_index, bench, workload_size);
62
63 usec_t duration_usec = NSEC2USEC(stopwatch_get_nanos(stopwatch));
64
65 printf("Completed %" PRIu64 " operations in %llu us",
66 workload_size, duration_usec);
67 if (duration_usec > 0) {
68 double nanos = stopwatch_get_nanos(stopwatch);
69 double thruput = (double) workload_size / (nanos / 1000000000.0l);
70 printf(", %.0f ops/s.\n", thruput);
71 } else {
72 printf(".\n");
73 }
74}
75
76/*
77 * This is a temporary solution until we have proper sqrt() implementation
78 * in libmath.
79 *
80 * The algorithm uses Babylonian method [1].
81 *
82 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method
83 */
84static double estimate_square_root(double value, double precision)
85{
86 double estimate = 1.;
87 double prev_estimate = estimate + 10 * precision;
88
89 while (fabs(estimate - prev_estimate) > precision) {
90 prev_estimate = estimate;
91 estimate = (prev_estimate + value / prev_estimate) / 2.;
92 }
93
94 return estimate;
95}
96
97/*
98 * Compute available statistics from given stopwatches.
99 *
100 * We compute normal mean for average duration of the workload and geometric
101 * mean for average thruput. Note that geometric mean is necessary to compute
102 * average throughput correctly - consider the following example:
103 * - we run always 60 operations,
104 * - first run executes in 30 s (i.e. 2 ops/s)
105 * - and second one in 10 s (6 ops/s).
106 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we
107 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average
108 * throughput is 3 ops/s (which is exactly what geometric mean means).
109 *
110 */
111static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
112 uint64_t workload_size, double precision, double *out_duration_avg,
113 double *out_duration_sigma, double *out_thruput_avg)
114{
115 double inv_thruput_sum = 0.0;
116 double nanos_sum = 0.0;
117 double nanos_sum2 = 0.0;
118
119 for (size_t i = 0; i < stopwatch_count; i++) {
120 double nanos = stopwatch_get_nanos(&stopwatch[i]);
121 double thruput = (double) workload_size / nanos;
122
123 inv_thruput_sum += 1.0 / thruput;
124 nanos_sum += nanos;
125 nanos_sum2 += nanos * nanos;
126 }
127 *out_duration_avg = nanos_sum / stopwatch_count;
128 double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) /
129 ((double) stopwatch_count - 1);
130 // FIXME: implement sqrt properly
131 *out_duration_sigma = estimate_square_root(sigma2, precision);
132 *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count);
133}
134
135static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
136 benchmark_t *bench, uint64_t workload_size)
137{
138 double duration_avg, duration_sigma, thruput_avg;
139 compute_stats(stopwatch, stopwatch_count, workload_size, 0.001,
140 &duration_avg, &duration_sigma, &thruput_avg);
141
142 printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); "
143 "%.0f ops/s; Samples: %zu\n",
144 workload_size, duration_avg / 1000.0, duration_sigma / 1000.0,
145 thruput_avg * 1000000000.0, stopwatch_count);
146}
147
148static bool run_benchmark(benchmark_t *bench)
149{
150 printf("Warm up and determine workload size...\n");
151
152 char *error_msg = malloc(MAX_ERROR_STR_LENGTH + 1);
153 if (error_msg == NULL) {
154 printf("Out of memory!\n");
155 return false;
156 }
157 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "");
158
159 bool ret = true;
160
161 if (bench->setup != NULL) {
162 ret = bench->setup(error_msg, MAX_ERROR_STR_LENGTH);
163 if (!ret) {
164 goto leave_error;
165 }
166 }
167
168 /*
169 * Find workload size that is big enough to last few seconds.
170 * We also check that uint64_t is big enough.
171 */
172 uint64_t workload_size = 0;
173 for (size_t bits = 0; bits <= 64; bits++) {
174 if (bits == 64) {
175 str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "Workload too small even for 1 << 63");
176 goto leave_error;
177 }
178 workload_size = ((uint64_t) 1) << bits;
179
180 stopwatch_t stopwatch = STOPWATCH_INITIALIZE_STATIC;
181
182 bool ok = bench->entry(&stopwatch, workload_size,
183 error_msg, MAX_ERROR_STR_LENGTH);
184 if (!ok) {
185 goto leave_error;
186 }
187 short_report(&stopwatch, -1, bench, workload_size);
188
189 nsec_t duration = stopwatch_get_nanos(&stopwatch);
190 if (duration > SEC2NSEC(MIN_DURATION_SECS)) {
191 break;
192 }
193 }
194
195 printf("Workload size set to %" PRIu64 ", measuring %d samples.\n", workload_size, NUM_SAMPLES);
196
197 stopwatch_t *stopwatch = calloc(NUM_SAMPLES, sizeof(stopwatch_t));
198 if (stopwatch == NULL) {
199 snprintf(error_msg, MAX_ERROR_STR_LENGTH, "failed allocating memory");
200 goto leave_error;
201 }
202 for (int i = 0; i < NUM_SAMPLES; i++) {
203 stopwatch_init(&stopwatch[i]);
204
205 bool ok = bench->entry(&stopwatch[i], workload_size,
206 error_msg, MAX_ERROR_STR_LENGTH);
207 if (!ok) {
208 free(stopwatch);
209 goto leave_error;
210 }
211 short_report(&stopwatch[i], i, bench, workload_size);
212 }
213
214 summary_stats(stopwatch, NUM_SAMPLES, bench, workload_size);
215 printf("\nBenchmark completed\n");
216
217 free(stopwatch);
218
219 goto leave;
220
221leave_error:
222 printf("Error: %s\n", error_msg);
223 ret = false;
224
225leave:
226 if (bench->teardown != NULL) {
227 bool ok = bench->teardown(error_msg, MAX_ERROR_STR_LENGTH);
228 if (!ok) {
229 printf("Error: %s\n", error_msg);
230 ret = false;
231 }
232 }
233
234 free(error_msg);
235
236 return ret;
237}
238
239static int run_benchmarks(void)
240{
241 unsigned int count_ok = 0;
242 unsigned int count_fail = 0;
243
244 char *failed_names = NULL;
245
246 printf("\n*** Running all benchmarks ***\n\n");
247
248 for (size_t it = 0; it < benchmark_count; it++) {
249 printf("%s (%s)\n", benchmarks[it]->name, benchmarks[it]->desc);
250 if (run_benchmark(benchmarks[it])) {
251 count_ok++;
252 continue;
253 }
254
255 if (!failed_names) {
256 failed_names = str_dup(benchmarks[it]->name);
257 } else {
258 char *f = NULL;
259 asprintf(&f, "%s, %s", failed_names, benchmarks[it]->name);
260 if (!f) {
261 printf("Out of memory.\n");
262 abort();
263 }
264 free(failed_names);
265 failed_names = f;
266 }
267 count_fail++;
268 }
269
270 printf("\nCompleted, %u benchmarks run, %u succeeded.\n",
271 count_ok + count_fail, count_ok);
272 if (failed_names)
273 printf("Failed benchmarks: %s\n", failed_names);
274
275 return count_fail;
276}
277
278static void list_benchmarks(void)
279{
280 size_t len = 0;
281 for (size_t i = 0; i < benchmark_count; i++) {
282 size_t len_now = str_length(benchmarks[i]->name);
283 if (len_now > len)
284 len = len_now;
285 }
286
287 assert(can_cast_size_t_to_int(len) && "benchmark name length overflow");
288
289 for (size_t i = 0; i < benchmark_count; i++)
290 printf(" %-*s %s\n", (int) len, benchmarks[i]->name, benchmarks[i]->desc);
291
292 printf(" %-*s Run all benchmarks\n", (int) len, "*");
293}
294
295static void print_usage(const char *progname)
296{
297 printf("Usage: %s [options] <benchmark>\n", progname);
298 printf("-h, --help "
299 "Print this help and exit\n");
300 printf("-o, --output filename.csv "
301 "Store machine-readable data in filename.csv\n");
302 printf("-p, --param KEY=VALUE "
303 "Additional parameters for the benchmark\n");
304 printf("<benchmark> is one of the following:\n");
305 list_benchmarks();
306}
307
308static void handle_param_arg(char *arg)
309{
310 char *value = NULL;
311 char *key = str_tok(arg, "=", &value);
312 bench_param_set(key, value);
313}
314
315int main(int argc, char *argv[])
316{
317 errno_t rc = bench_param_init();
318 if (rc != EOK) {
319 fprintf(stderr, "Failed to initialize internal params structure: %s\n",
320 str_error(rc));
321 return -5;
322 }
323
324 const char *short_options = "ho:p:";
325 struct option long_options[] = {
326 { "help", optional_argument, NULL, 'h' },
327 { "param", required_argument, NULL, 'p' },
328 { "output", required_argument, NULL, 'o' },
329 { 0, 0, NULL, 0 }
330 };
331
332 char *csv_output_filename = NULL;
333
334 int opt = 0;
335 while ((opt = getopt_long(argc, argv, short_options, long_options, NULL)) > 0) {
336 switch (opt) {
337 case 'h':
338 print_usage(*argv);
339 return 0;
340 case 'o':
341 csv_output_filename = optarg;
342 break;
343 case 'p':
344 handle_param_arg(optarg);
345 break;
346 case -1:
347 default:
348 break;
349 }
350 }
351
352 if (optind + 1 != argc) {
353 print_usage(*argv);
354 fprintf(stderr, "Error: specify one benchmark to run or * for all.\n");
355 return -3;
356 }
357
358 const char *benchmark = argv[optind];
359
360 if (csv_output_filename != NULL) {
361 errno_t rc = csv_report_open(csv_output_filename);
362 if (rc != EOK) {
363 fprintf(stderr, "Failed to open CSV report '%s': %s\n",
364 csv_output_filename, str_error(rc));
365 return -4;
366 }
367 }
368
369 int exit_code = 0;
370
371 if (str_cmp(benchmark, "*") == 0) {
372 exit_code = run_benchmarks();
373 } else {
374 bool benchmark_exists = false;
375 for (size_t i = 0; i < benchmark_count; i++) {
376 if (str_cmp(benchmark, benchmarks[i]->name) == 0) {
377 benchmark_exists = true;
378 exit_code = run_benchmark(benchmarks[i]) ? 0 : -1;
379 break;
380 }
381 }
382 if (!benchmark_exists) {
383 printf("Unknown benchmark \"%s\"\n", benchmark);
384 exit_code = -2;
385 }
386 }
387
388 csv_report_close();
389 bench_param_cleanup();
390
391 return exit_code;
392}
393
394/** @}
395 */
Note: See TracBrowser for help on using the repository browser.