Context Navigation

perf.c@ b4a4ad94

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since b4a4ad94 was b4a4ad94, checked in by Vojtech Horky <vojtech.horky@…>, 7 years ago

perf: compute average throughput correctly

The correct way to compute average throughput is to use geometric mean.
See comment in the source code for an example why it gives more
meaningful results.

Also added a simplified square root approximation (using Babylonian
method) to compute standard deviation.

Note that the diff is awful as the context did not catch on the
compute_stats() function.

Property mode set to 100644

File size: 8.6 KB

Line
1	/*
2	* Copyright (c) 2018 Jiri Svoboda
3	* Copyright (c) 2018 Vojtech Horky
4	* All rights reserved.
5	*
6	* Redistribution and use in source and binary forms, with or without
7	* modification, are permitted provided that the following conditions
8	* are met:
9	*
10	* - Redistributions of source code must retain the above copyright
11	* notice, this list of conditions and the following disclaimer.
12	* - Redistributions in binary form must reproduce the above copyright
13	* notice, this list of conditions and the following disclaimer in the
14	* documentation and/or other materials provided with the distribution.
15	* - The name of the author may not be used to endorse or promote products
16	* derived from this software without specific prior written permission.
17	*
18	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28	*/
29
30	/** @addtogroup perf
31	* @{
32	*/
33	/**
34	* @file
35	*/
36
37	#include <assert.h>
38	#include <math.h>
39	#include <stdio.h>
40	#include <stddef.h>
41	#include <stdlib.h>
42	#include <str.h>
43	#include <time.h>
44	#include <errno.h>
45	#include <perf.h>
46	#include <types/casting.h>
47	#include "perf.h"
48	#include "benchlist.h"
49
50	#define MIN_DURATION_SECS 10
51	#define NUM_SAMPLES 10
52	#define MAX_ERROR_STR_LENGTH 1024
53
54	static void short_report(stopwatch_t *stopwatch, int run_index,
55	benchmark_t *bench, uint64_t workload_size)
56	{
57	usec_t duration_usec = NSEC2USEC(stopwatch_get_nanos(stopwatch));
58
59	printf("Completed %" PRIu64 " operations in %llu us",
60	workload_size, duration_usec);
61	if (duration_usec > 0) {
62	double nanos = stopwatch_get_nanos(stopwatch);
63	double thruput = (double) workload_size / (nanos / 1000000000.0l);
64	printf(", %.0f ops/s.\n", thruput);
65	} else {
66	printf(".\n");
67	}
68	}
69
70	/*
71	* This is a temporary solution until we have proper sqrt() implementation
72	* in libmath.
73	*
74	* The algorithm uses Babylonian method [1].
75	*
76	* [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method
77	*/
78	static double estimate_square_root(double value, double precision)
79	{
80	double estimate = 1.;
81	double prev_estimate = estimate + 10 * precision;
82
83	while (fabs(estimate - prev_estimate) > precision) {
84	prev_estimate = estimate;
85	estimate = (prev_estimate + value / prev_estimate) / 2.;
86	}
87
88	return estimate;
89	}
90
91	/*
92	* Compute available statistics from given stopwatches.
93	*
94	* We compute normal mean for average duration of the workload and geometric
95	* mean for average thruput. Note that geometric mean is necessary to compute
96	* average throughput correctly - consider the following example:
97	* - we run always 60 operations,
98	* - first run executes in 30 s (i.e. 2 ops/s)
99	* - and second one in 10 s (6 ops/s).
100	* Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we
101	* actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average
102	* throughput is 3 ops/s (which is exactly what geometric mean means).
103	*
104	*/
105	static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
106	uint64_t workload_size, double precision, double *out_duration_avg,
107	double out_duration_sigma, double out_thruput_avg)
108	{
109	double inv_thruput_sum = 0.0;
110	double nanos_sum = 0.0;
111	double nanos_sum2 = 0.0;
112
113	for (size_t i = 0; i < stopwatch_count; i++) {
114	double nanos = stopwatch_get_nanos(&stopwatch[i]);
115	double thruput = (double) workload_size / nanos;
116
117	inv_thruput_sum += 1.0 / thruput;
118	nanos_sum += nanos;
119	nanos_sum2 += nanos * nanos;
120	}
121	*out_duration_avg = nanos_sum / stopwatch_count;
122	double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) /
123	((double) stopwatch_count - 1);
124	// FIXME: implement sqrt properly
125	*out_duration_sigma = estimate_square_root(sigma2, precision);
126	*out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count);
127	}
128
129	static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
130	benchmark_t *bench, uint64_t workload_size)
131	{
132	double duration_avg, duration_sigma, thruput_avg;
133	compute_stats(stopwatch, stopwatch_count, workload_size, 0.001,
134	&duration_avg, &duration_sigma, &thruput_avg);
135
136	printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); "
137	"%.0f ops/s; Samples: %zu\n",
138	workload_size, duration_avg / 1000.0, duration_sigma / 1000.0,
139	thruput_avg * 1000000000.0, stopwatch_count);
140	}
141
142	static bool run_benchmark(benchmark_t *bench)
143	{
144	printf("Warm up and determine workload size...\n");
145
146	char *error_msg = malloc(MAX_ERROR_STR_LENGTH + 1);
147	if (error_msg == NULL) {
148	printf("Out of memory!\n");
149	return false;
150	}
151	str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "");
152
153	bool ret = true;
154
155	if (bench->setup != NULL) {
156	ret = bench->setup(error_msg, MAX_ERROR_STR_LENGTH);
157	if (!ret) {
158	goto leave_error;
159	}
160	}
161
162	/*
163	* Find workload size that is big enough to last few seconds.
164	* We also check that uint64_t is big enough.
165	*/
166	uint64_t workload_size = 0;
167	for (size_t bits = 0; bits <= 64; bits++) {
168	if (bits == 64) {
169	str_cpy(error_msg, MAX_ERROR_STR_LENGTH, "Workload too small even for 1 << 63");
170	goto leave_error;
171	}
172	workload_size = ((uint64_t) 1) << bits;
173
174	stopwatch_t stopwatch = STOPWATCH_INITIALIZE_STATIC;
175
176	bool ok = bench->entry(&stopwatch, workload_size,
177	error_msg, MAX_ERROR_STR_LENGTH);
178	if (!ok) {
179	goto leave_error;
180	}
181	short_report(&stopwatch, -1, bench, workload_size);
182
183	nsec_t duration = stopwatch_get_nanos(&stopwatch);
184	if (duration > SEC2NSEC(MIN_DURATION_SECS)) {
185	break;
186	}
187	}
188
189	printf("Workload size set to %" PRIu64 ", measuring %d samples.\n", workload_size, NUM_SAMPLES);
190
191	stopwatch_t *stopwatch = calloc(NUM_SAMPLES, sizeof(stopwatch_t));
192	if (stopwatch == NULL) {
193	snprintf(error_msg, MAX_ERROR_STR_LENGTH, "failed allocating memory");
194	goto leave_error;
195	}
196	for (int i = 0; i < NUM_SAMPLES; i++) {
197	stopwatch_init(&stopwatch[i]);
198
199	bool ok = bench->entry(&stopwatch[i], workload_size,
200	error_msg, MAX_ERROR_STR_LENGTH);
201	if (!ok) {
202	free(stopwatch);
203	goto leave_error;
204	}
205	short_report(&stopwatch[i], i, bench, workload_size);
206	}
207
208	summary_stats(stopwatch, NUM_SAMPLES, bench, workload_size);
209	printf("\nBenchmark completed\n");
210
211	free(stopwatch);
212
213	goto leave;
214
215	leave_error:
216	printf("Error: %s\n", error_msg);
217	ret = false;
218
219	leave:
220	if (bench->teardown != NULL) {
221	bool ok = bench->teardown(error_msg, MAX_ERROR_STR_LENGTH);
222	if (!ok) {
223	printf("Error: %s\n", error_msg);
224	ret = false;
225	}
226	}
227
228	free(error_msg);
229
230	return ret;
231	}
232
233	static int run_benchmarks(void)
234	{
235	unsigned int count_ok = 0;
236	unsigned int count_fail = 0;
237
238	char *failed_names = NULL;
239
240	printf("\n* Running all benchmarks *\n\n");
241
242	for (size_t it = 0; it < benchmark_count; it++) {
243	printf("%s (%s)\n", benchmarks[it]->name, benchmarks[it]->desc);
244	if (run_benchmark(benchmarks[it])) {
245	count_ok++;
246	continue;
247	}
248
249	if (!failed_names) {
250	failed_names = str_dup(benchmarks[it]->name);
251	} else {
252	char *f = NULL;
253	asprintf(&f, "%s, %s", failed_names, benchmarks[it]->name);
254	if (!f) {
255	printf("Out of memory.\n");
256	abort();
257	}
258	free(failed_names);
259	failed_names = f;
260	}
261	count_fail++;
262	}
263
264	printf("\nCompleted, %u benchmarks run, %u succeeded.\n",
265	count_ok + count_fail, count_ok);
266	if (failed_names)
267	printf("Failed benchmarks: %s\n", failed_names);
268
269	return count_fail;
270	}
271
272	static void list_benchmarks(void)
273	{
274	size_t len = 0;
275	for (size_t i = 0; i < benchmark_count; i++) {
276	size_t len_now = str_length(benchmarks[i]->name);
277	if (len_now > len)
278	len = len_now;
279	}
280
281	assert(can_cast_size_t_to_int(len) && "benchmark name length overflow");
282
283	for (size_t i = 0; i < benchmark_count; i++)
284	printf("%-*s %s\n", (int) len, benchmarks[i]->name, benchmarks[i]->desc);
285
286	printf("%-s Run all benchmarks\n", (int) len, "");
287	}
288
289	int main(int argc, char *argv[])
290	{
291	if (argc < 2) {
292	printf("Usage:\n\n");
293	printf("%s <benchmark>\n\n", argv[0]);
294	list_benchmarks();
295	return 0;
296	}
297
298	if (str_cmp(argv[1], "*") == 0) {
299	return run_benchmarks();
300	}
301
302	for (size_t i = 0; i < benchmark_count; i++) {
303	if (str_cmp(argv[1], benchmarks[i]->name) == 0) {
304	return (run_benchmark(benchmarks[i]) ? 0 : -1);
305	}
306	}
307
308	printf("Unknown benchmark \"%s\"\n", argv[1]);
309	return -2;
310	}
311
312	/** @}
313	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/app/perf/perf.c@ b4a4ad94

Download in other formats: