Fork us on GitHub Follow us on Facebook Follow us on Twitter

Changeset b4a4ad94 in mainline


Ignore:
Timestamp:
2019-01-02T21:17:05Z (2 years ago)
Author:
Vojtech Horky <vojtech.horky@…>
Branches:
lfn, master
Children:
b9f1585
Parents:
79bb48e
git-author:
Vojtech Horky <vojtech.horky@…> (2019-01-02 21:10:03)
git-committer:
Vojtech Horky <vojtech.horky@…> (2019-01-02 21:17:05)
Message:

perf: compute average throughput correctly

The correct way to compute average throughput is to use geometric mean.
See comment in the source code for an example why it gives more
meaningful results.

Also added a simplified square root approximation (using Babylonian
method) to compute standard deviation.

Note that the diff is awful as the context did not catch on the
compute_stats() function.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/app/perf/perf.c

    r79bb48e rb4a4ad94  
    3636
    3737#include <assert.h>
     38#include <math.h>
    3839#include <stdio.h>
    3940#include <stddef.h>
     
    6162                double nanos = stopwatch_get_nanos(stopwatch);
    6263                double thruput = (double) workload_size / (nanos / 1000000000.0l);
    63                 printf(", %.0f cycles/s.\n", thruput);
     64                printf(", %.0f ops/s.\n", thruput);
    6465        } else {
    6566                printf(".\n");
     
    6768}
    6869
     70/*
     71 * This is a temporary solution until we have proper sqrt() implementation
     72 * in libmath.
     73 *
     74 * The algorithm uses Babylonian method [1].
     75 *
     76 * [1] https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Babylonian_method
     77 */
     78static double estimate_square_root(double value, double precision)
     79{
     80        double estimate = 1.;
     81        double prev_estimate = estimate + 10 * precision;
     82
     83        while (fabs(estimate - prev_estimate) > precision) {
     84                prev_estimate = estimate;
     85                estimate = (prev_estimate + value / prev_estimate) / 2.;
     86        }
     87
     88        return estimate;
     89}
     90
     91/*
     92 * Compute available statistics from given stopwatches.
     93 *
     94 * We compute normal mean for average duration of the workload and geometric
     95 * mean for average thruput. Note that geometric mean is necessary to compute
     96 * average throughput correctly - consider the following example:
     97 *  - we run always 60 operations,
     98 *  - first run executes in 30 s (i.e. 2 ops/s)
     99 *  - and second one in 10 s (6 ops/s).
     100 * Then, naively, average throughput would be (2+6)/2 = 4 [ops/s]. However, we
     101 * actually executed 60 + 60 ops in 30 + 10 seconds. So the actual average
     102 * throughput is 3 ops/s (which is exactly what geometric mean means).
     103 *
     104 */
     105static void compute_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
     106    uint64_t workload_size, double precision, double *out_duration_avg,
     107    double *out_duration_sigma, double *out_thruput_avg)
     108{
     109        double inv_thruput_sum = 0.0;
     110        double nanos_sum = 0.0;
     111        double nanos_sum2 = 0.0;
     112
     113        for (size_t i = 0; i < stopwatch_count; i++) {
     114                double nanos = stopwatch_get_nanos(&stopwatch[i]);
     115                double thruput = (double) workload_size / nanos;
     116
     117                inv_thruput_sum += 1.0 / thruput;
     118                nanos_sum += nanos;
     119                nanos_sum2 += nanos * nanos;
     120        }
     121        *out_duration_avg = nanos_sum / stopwatch_count;
     122        double sigma2 = (nanos_sum2 - nanos_sum * (*out_duration_avg)) /
     123            ((double) stopwatch_count - 1);
     124        // FIXME: implement sqrt properly
     125        *out_duration_sigma = estimate_square_root(sigma2, precision);
     126        *out_thruput_avg = 1.0 / (inv_thruput_sum / stopwatch_count);
     127}
     128
    69129static void summary_stats(stopwatch_t *stopwatch, size_t stopwatch_count,
    70130    benchmark_t *bench, uint64_t workload_size)
    71131{
    72         double sum = 0.0;
    73         double sum_square = 0.0;
    74 
    75         for (size_t i = 0; i < stopwatch_count; i++) {
    76                 double nanos = stopwatch_get_nanos(&stopwatch[i]);
    77                 double thruput = (double) workload_size / (nanos / 1000000000.0l);
    78                 sum += thruput;
    79                 sum_square += thruput * thruput;
    80         }
    81 
    82         double avg = sum / stopwatch_count;
    83 
    84         double sd_numer = sum_square + stopwatch_count * avg * avg - 2 * sum * avg;
    85         double sd_square = sd_numer / ((double) stopwatch_count - 1);
    86 
    87         printf("Average: %.0f cycles/s Std.dev^2: %.0f cycles/s Samples: %zu\n",
    88             avg, sd_square, stopwatch_count);
     132        double duration_avg, duration_sigma, thruput_avg;
     133        compute_stats(stopwatch, stopwatch_count, workload_size, 0.001,
     134            &duration_avg, &duration_sigma, &thruput_avg);
     135
     136        printf("Average: %" PRIu64 " ops in %.0f us (sd %.0f us); "
     137            "%.0f ops/s; Samples: %zu\n",
     138            workload_size, duration_avg / 1000.0, duration_sigma / 1000.0,
     139            thruput_avg * 1000000000.0, stopwatch_count);
    89140}
    90141
Note: See TracChangeset for help on using the changeset viewer.