Changeset b2ec5cf in mainline for kernel/generic/include/atomic.h


Ignore:
Timestamp:
2023-04-15T16:47:54Z (13 months ago)
Author:
Jiří Zárevúcky <zarevucky.jiri@…>
Branches:
master, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
169815e
Parents:
dd218ea
git-author:
Jiří Zárevúcky <zarevucky.jiri@…> (2023-04-15 11:54:58)
git-committer:
Jiří Zárevúcky <zarevucky.jiri@…> (2023-04-15 16:47:54)
Message:

Implement atomic_time_stat_t for lockless timekeeping

We keep monotonically increasing temporal statistics in several places.
They are frequently written from the thread that owns them, and rarely
read from other threads in certain syscalls. This new code serves the
purpose of avoiding the need for synchronization on the writer side.
On 64b system, we can simply assume that 64b writes are indivisible,
and relaxed atomic read/writes simply serve to formally prevent C
undefined behavior from data races (they translate to regular memory
reads/writes in assembly).

On 32b systems, we use the same algorithm that's been used for userspace
clock access, using three fields and some memory barriers to maintain
consistency of reads when the upper half changes. Only readers always
synchronize though. For writers, barriers are avoided in the common case
when the upper half remains unchanged.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • kernel/generic/include/atomic.h

    rdd218ea rb2ec5cf  
    6363            (new_val), memory_order_relaxed)
    6464
     65#if __64_BITS__
     66
     67typedef struct {
     68        atomic_uint_fast64_t value;
     69} atomic_time_stat_t;
     70
     71#define ATOMIC_TIME_INITIALIZER() (atomic_time_stat_t) {}
     72
     73static inline void atomic_time_increment(atomic_time_stat_t *time, int a)
     74{
     75        /*
     76         * We require increments to be synchronized with each other, so we
     77         * can use ordinary reads and writes instead of a more expensive atomic
     78         * read-modify-write operations.
     79         */
     80        uint64_t v = atomic_load_explicit(&time->value, memory_order_relaxed);
     81        atomic_store_explicit(&time->value, v + a, memory_order_relaxed);
     82}
     83
     84static inline uint64_t atomic_time_read(atomic_time_stat_t *time)
     85{
     86        return atomic_load_explicit(&time->value, memory_order_relaxed);
     87}
     88
     89#else
     90
     91/**
     92 * A monotonically increasing 64b time statistic.
     93 * Increments must be synchronized with each other (or limited to a single
     94 * thread/CPU), but reads can be performed from any thread.
     95 *
     96 */
     97typedef struct {
     98        uint64_t true_value;
     99        atomic_uint_fast32_t high1;
     100        atomic_uint_fast32_t high2;
     101        atomic_uint_fast32_t low;
     102} atomic_time_stat_t;
     103
     104#define ATOMIC_TIME_INITIALIZER() (atomic_time_stat_t) {}
     105
     106static inline void atomic_time_increment(atomic_time_stat_t *time, int a)
     107{
     108        /*
     109         * On 32b architectures, we can't rely on 64b memory reads/writes being
     110         * architecturally atomic, but we also don't want to pay the cost of
     111         * emulating atomic reads/writes, so instead we split value in half
     112         * and perform some ordering magic to make sure readers always get
     113         * consistent value.
     114         */
     115
     116        /* true_value is only used by the writer, so this need not be atomic. */
     117        uint64_t val = time->true_value;
     118        uint32_t old_high = val >> 32;
     119        val += a;
     120        uint32_t new_high = val >> 32;
     121        time->true_value = val;
     122
     123        /* Tell GCC that the first branch is far more likely than the second. */
     124        if (__builtin_expect(old_high == new_high, 1)) {
     125                /* If the high half didn't change, we need not bother with barriers. */
     126                atomic_store_explicit(&time->low, (uint32_t) val, memory_order_relaxed);
     127        } else {
     128                /*
     129                 * If both halves changed, extra ordering is necessary.
     130                 * The idea is that if reader reads high1 and high2 with the same value,
     131                 * it is guaranteed that they read the correct low half for that value.
     132                 *
     133                 * This is the same sequence that is used by userspace to read clock.
     134                 */
     135                atomic_store_explicit(&time->high1, new_high, memory_order_relaxed);
     136                atomic_store_explicit(&time->low, (uint32_t) val, memory_order_release);
     137                atomic_store_explicit(&time->high2, new_high, memory_order_release);
     138        }
     139}
     140
     141static inline uint64_t atomic_time_read(atomic_time_stat_t *time)
     142{
     143        uint32_t high2 = atomic_load_explicit(&time->high2, memory_order_acquire);
     144        uint32_t low = atomic_load_explicit(&time->low, memory_order_acquire);
     145        uint32_t high1 = atomic_load_explicit(&time->high1, memory_order_relaxed);
     146
     147        if (high1 != high2)
     148                low = 0;
     149
     150        /* If the values differ, high1 is always the newer value. */
     151        return (uint64_t) high1 << 32 | (uint64_t) low;
     152}
     153
     154#endif /* __64_BITS__ */
     155
    65156#endif
    66157
Note: See TracChangeset for help on using the changeset viewer.