Fork us on GitHub Follow us on Facebook Follow us on Twitter

Changeset d4d36f9 in mainline


Ignore:
Timestamp:
2012-07-30T05:23:06Z (8 years ago)
Author:
Adam Hraska <adam.hraska+hos@…>
Branches:
master
Children:
452e91b
Parents:
f0fcb04
Message:

rcu: Added another preemptible kernel rcu - A-RCU.

Files:
6 edited

Legend:

Unmodified
Added
Removed
  • HelenOS.config

    rf0fcb04 rd4d36f9  
    385385! [COMPILER=gcc_cross|COMPILER=gcc_native] CONFIG_LTO (n/y)
    386386
     387% Kernel RCU algorithm
     388@ "PREEMPT_PODZIMEK" Preemptible Podzimek-RCU
     389@ "PREEMPT_A" Preemptible A-RCU
     390! RCU (choice)
     391
    387392
    388393## Hardware support
  • kernel/generic/include/arch.h

    rf0fcb04 rd4d36f9  
    3838#include <arch/arch.h>  /* arch_pre_main() */
    3939#include <arch/asm.h>   /* get_stack_base() */
     40#include <config.h>
    4041
    4142
     
    6970typedef struct {
    7071        size_t preemption;     /**< Preemption disabled counter and flag. */
     72#ifdef RCU_PREEMPT_A
     73        size_t rcu_nesting;    /**< RCU nesting count and flag. */
     74#endif
    7175        struct thread *thread; /**< Current thread. */
    7276        struct task *task;     /**< Current task. */
  • kernel/generic/include/synch/rcu.h

    rf0fcb04 rd4d36f9  
    123123extern void _rcu_synchronize(bool expedite);
    124124
     125
     126#ifdef RCU_PREEMPT_A
     127
     128#define RCU_CNT_INC       (1 << 1)
     129#define RCU_WAS_PREEMPTED (1 << 0)
     130
     131/* Fwd. decl. because of inlining. */
     132void _rcu_preempted_unlock(void);
     133
     134/** Delimits the start of an RCU reader critical section.
     135 *
     136 * Reader sections may be nested and are preemptable. You must not
     137 * however block/sleep within reader sections.
     138 */
     139static inline void rcu_read_lock(void)
     140{
     141        THE->rcu_nesting += RCU_CNT_INC;
     142}
     143
     144/** Delimits the end of an RCU reader critical section. */
     145static inline void rcu_read_unlock(void)
     146{
     147        THE->rcu_nesting -= RCU_CNT_INC;
     148       
     149        if (RCU_WAS_PREEMPTED == THE->rcu_nesting) {
     150                _rcu_preempted_unlock();
     151        }
     152}
     153
     154#elif defined(RCU_PREEMPT_PODZIMEK)
    125155
    126156/* Fwd decl. required by the inlined implementation. Not part of public API. */
     
    210240        preemption_enable();
    211241}
    212 
    213 
    214242#endif
    215243
     244#endif
     245
    216246/** @}
    217247 */
  • kernel/generic/include/synch/rcu_types.h

    rf0fcb04 rd4d36f9  
    3939#include <synch/semaphore.h>
    4040
     41#if !defined(RCU_PREEMPT_PODZIMEK) && !defined(RCU_PREEMPT_A)
     42#define RCU_PREEMPT_A
     43//#error You must select an RCU algorithm.
     44#endif
     45
    4146
    4247/* Fwd decl. */
     
    5863/** RCU related per-cpu data. */
    5964typedef struct rcu_cpu_data {
    60         /** The cpu recorded a quiescent state last time during this grace period */
     65        /** The cpu recorded a quiescent state last time during this grace period.*/
    6166        rcu_gp_t last_seen_gp;
     67
     68#ifdef RCU_PREEMPT_PODZIMEK
     69        /** This cpu has not yet passed a quiescent state and it is delaying the
     70         * detector. Once it reaches a QS it must sema_up(rcu.remaining_readers).
     71         */
     72        bool is_delaying_gp;
    6273       
     74        /** True if we should signal the detector that we exited a reader section.
     75         *
     76         * Equal to (THREAD->rcu.was_preempted || CPU->rcu.is_delaying_gp).
     77         */
     78        bool signal_unlock;
     79
    6380        /** The number of times an RCU reader section is nested on this cpu.
    6481         *
     
    6885         */
    6986        size_t nesting_cnt;
    70 
     87#endif
     88       
    7189        /** Callbacks to invoke once the current grace period ends, ie cur_cbs_gp.
    7290         * Accessed by the local reclaimer only.
     
    102120        rcu_gp_t next_cbs_gp;
    103121       
    104         /** This cpu has not yet passed a quiescent state and it is delaying the
    105          * detector. Once it reaches a QS it must sema_up(rcu.remaining_readers).
    106          */
    107         bool is_delaying_gp;
    108        
    109         /** True if we should signal the detector that we exited a reader section.
    110          *
    111          * Equal to (THREAD->rcu.was_preempted || CPU->rcu.is_delaying_gp).
    112          */
    113         bool signal_unlock;
    114        
    115122        /** Positive if there are callbacks pending in arriving_cbs. */
    116123        semaphore_t arrived_flag;
     
    142149         */
    143150        size_t nesting_cnt;
     151
     152#ifdef RCU_PREEMPT_PODZIMEK
    144153       
    145154        /** True if the thread was preempted in a reader section.
     
    151160         */
    152161        bool was_preempted;
     162#endif
     163       
    153164        /** Preempted threads link. Access with rcu.prempt_lock.*/
    154165        link_t preempt_link;
  • kernel/generic/src/proc/the.c

    rf0fcb04 rd4d36f9  
    6060        the->as = NULL;
    6161        the->magic = MAGIC;
     62#ifdef RCU_PREEMPT_A   
     63        the->rcu_nesting = 0;
     64#endif
    6265}
    6366
  • kernel/generic/src/synch/rcu.c

    rf0fcb04 rd4d36f9  
    8181        /** Detector uses so signal reclaimers that a grace period ended. */
    8282        condvar_t gp_ended;
    83         /** Reclaimers notify the detector when they request more grace periods.*/
    84         condvar_t req_gp_changed;
    8583        /** Reclaimers use to notify the detector to accelerate GP detection. */
    8684        condvar_t expedite_now;
    8785        /**
    88          * The detector waits on this semaphore for any readers delaying the GP.
    89          *
    90          * Each of the cpus with readers that are delaying the current GP
    91          * must up() this sema once they reach a quiescent state. If there
    92          * are any readers in cur_preempted (ie preempted preexisting) and
    93          * they are already delaying GP detection, the last to unlock its
    94          * reader section must up() this sema once.
    95          */
    96         semaphore_t remaining_readers;
    97        
    98         /** Protects the 4 fields below. */
     86         * Protects: req_gp_end_cnt, req_expedited_cnt, completed_gp, _rcu_cur_gp;
     87         * or: completed_gp, _rcu_cur_gp
     88         */
    9989        SPINLOCK_DECLARE(gp_lock);
    100         /** Number of grace period ends the detector was requested to announce. */
    101         size_t req_gp_end_cnt;
    102         /** Number of consecutive grace periods to detect quickly and aggressively.*/
    103         size_t req_expedited_cnt;
    10490        /**
    10591         * The number of the most recently completed grace period. At most
     
    122108        bool preempt_blocking_det;
    123109       
     110#ifdef RCU_PREEMPT_A
     111       
     112        /**
     113         * The detector waits on this semaphore for any preempted readers
     114         * delaying the grace period once all cpus pass a quiescent state.
     115         */
     116        semaphore_t remaining_readers;
     117
     118#elif defined(RCU_PREEMPT_PODZIMEK)
     119       
     120        /** Reclaimers notify the detector when they request more grace periods.*/
     121        condvar_t req_gp_changed;
     122        /** Number of grace period ends the detector was requested to announce. */
     123        size_t req_gp_end_cnt;
     124        /** Number of consecutive grace periods to detect quickly and aggressively.*/
     125        size_t req_expedited_cnt;
    124126        /**
    125127         * Number of cpus with readers that are delaying the current GP.
     
    127129         */
    128130        atomic_t delaying_cpu_cnt;
     131        /**
     132         * The detector waits on this semaphore for any readers delaying the GP.
     133         *
     134         * Each of the cpus with readers that are delaying the current GP
     135         * must up() this sema once they reach a quiescent state. If there
     136         * are any readers in cur_preempted (ie preempted preexisting) and
     137         * they are already delaying GP detection, the last to unlock its
     138         * reader section must up() this sema once.
     139         */
     140        semaphore_t remaining_readers;
     141#endif
    129142       
    130143        /** Excludes simultaneous rcu_barrier() calls. */
     
    149162static rcu_data_t rcu;
    150163
    151 static void start_detector(void);
    152164static void start_reclaimers(void);
    153 static void read_unlock_impl(size_t *pnesting_cnt);
    154165static void synch_complete(rcu_item_t *rcu_item);
    155166static void add_barrier_cb(void *arg);
     
    164175static void exec_completed_cbs(rcu_gp_t last_completed_gp);
    165176static void exec_cbs(rcu_item_t **phead);
    166 static void req_detection(size_t req_cnt);
    167177static bool wait_for_cur_cbs_gp_end(bool expedite, rcu_gp_t *last_completed_gp);
    168178static void upd_missed_gp_in_wait(rcu_gp_t completed_gp);
     179
     180#ifdef RCU_PREEMPT_PODZIMEK
     181static void start_detector(void);
     182static void read_unlock_impl(size_t *pnesting_cnt);
     183static void req_detection(size_t req_cnt);
    169184static bool cv_wait_for_gp(rcu_gp_t wait_on_gp);
    170185static void detector(void *);
    171186static bool wait_for_detect_req(void);
    172 static void start_new_gp(void);
    173187static void end_cur_gp(void);
    174188static bool wait_for_readers(void);
    175 static void rm_quiescent_cpus(cpu_mask_t *cpu_mask);
    176189static bool gp_sleep(void);
    177190static void interrupt_delaying_cpus(cpu_mask_t *cpu_mask);
     191static bool wait_for_delaying_cpus(void);
     192#elif defined(RCU_PREEMPT_A)
     193static bool wait_for_readers(bool expedite);
     194static bool gp_sleep(bool *expedite);
     195#endif
     196
     197static void start_new_gp(void);
     198static void rm_quiescent_cpus(cpu_mask_t *cpu_mask);
     199static void sample_cpus(cpu_mask_t *reader_cpus, void *arg);
    178200static void sample_local_cpu(void *);
    179 static bool wait_for_delaying_cpus(void);
    180201static bool wait_for_preempt_reader(void);
     202static void note_preempted_reader(void);
     203static void rm_preempted_reader(void);
    181204static void upd_max_cbs_in_slice(void);
    182205
     
    187210{
    188211        condvar_initialize(&rcu.gp_ended);
    189         condvar_initialize(&rcu.req_gp_changed);
    190212        condvar_initialize(&rcu.expedite_now);
    191         semaphore_initialize(&rcu.remaining_readers, 0);
    192        
     213
    193214        spinlock_initialize(&rcu.gp_lock, "rcu.gp_lock");
    194         rcu.req_gp_end_cnt = 0;
    195         rcu.req_expedited_cnt = 0;
    196215        _rcu_cur_gp = 0;
    197216        rcu.completed_gp = 0;
     
    205224        atomic_set(&rcu.barrier_wait_cnt, 0);
    206225        waitq_initialize(&rcu.barrier_wq);
    207        
     226
     227        semaphore_initialize(&rcu.remaining_readers, 0);
     228       
     229#ifdef RCU_PREEMPT_PODZIMEK
     230        condvar_initialize(&rcu.req_gp_changed);
     231       
     232        rcu.req_gp_end_cnt = 0;
     233        rcu.req_expedited_cnt = 0;
    208234        atomic_set(&rcu.delaying_cpu_cnt, 0);
     235#endif
    209236       
    210237        rcu.detector_thr = 0;
     
    222249                rcu_init();
    223250        }
    224        
     251
    225252        CPU->rcu.last_seen_gp = 0;
    226        
     253
     254#ifdef RCU_PREEMPT_PODZIMEK
    227255        CPU->rcu.nesting_cnt = 0;
     256        CPU->rcu.is_delaying_gp = false;
     257        CPU->rcu.signal_unlock = false;
     258#endif
    228259       
    229260        CPU->rcu.cur_cbs = 0;
     
    238269        CPU->rcu.next_cbs_gp = 0;
    239270       
    240         CPU->rcu.is_delaying_gp = false;
    241         CPU->rcu.signal_unlock = false;
    242        
    243271        semaphore_initialize(&CPU->rcu.arrived_flag, 0);
    244272
     
    258286void rcu_kinit_init(void)
    259287{
     288#ifdef RCU_PREEMPT_PODZIMEK
    260289        start_detector();
     290#endif
     291       
    261292        start_reclaimers();
    262293}
     
    266297{
    267298        thread->rcu.nesting_cnt = 0;
     299
     300#ifdef RCU_PREEMPT_PODZIMEK
    268301        thread->rcu.was_preempted = false;
     302#endif
     303       
    269304        link_initialize(&thread->rcu.preempt_link);
    270305}
    271306
    272 /** Called from scheduler() when exiting the current thread.
    273  *
    274  * Preemption or interrupts are disabled and the scheduler() already
    275  * switched away from the current thread, calling rcu_after_thread_ran().
    276  */
    277 void rcu_thread_exiting(void)
    278 {
    279         ASSERT(THREAD != 0);
    280         ASSERT(THREAD->state == Exiting);
    281         ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
    282         /*
    283          * The scheduler() must have already switched to a temporary
    284          * nesting counter for interrupt handlers (we could be idle)
    285          * so that interrupt handlers do not modify the exiting thread's
    286          * reader section nesting count while we examine/process it.
    287          */
    288        
    289         /*
    290          * The thread forgot to exit its reader critical section.
    291          * It is a bug, but rather than letting the entire system lock up
    292          * forcefully leave the reader section. The thread is not holding
    293          * any references anyway since it is exiting so it is safe.
    294          */
    295         if (0 < THREAD->rcu.nesting_cnt) {
    296                 THREAD->rcu.nesting_cnt = 1;
    297                 read_unlock_impl(&THREAD->rcu.nesting_cnt);
    298         }
    299 }
    300307
    301308/** Cleans up global RCU resources and stops dispatching callbacks.
     
    318325        }
    319326
     327#ifdef RCU_PREEMPT_PODZIMEK
    320328        /* Stop the detector and wait. */
    321329        if (rcu.detector_thr) {
     
    325333                rcu.detector_thr = 0;
    326334        }
    327 }
    328 
    329 /** Starts the detector thread. */
    330 static void start_detector(void)
    331 {
    332         rcu.detector_thr =
    333                 thread_create(detector, 0, TASK, THREAD_FLAG_NONE, "rcu-det");
    334        
    335         if (!rcu.detector_thr)
    336                 panic("Failed to create RCU detector thread.");
    337        
    338         thread_ready(rcu.detector_thr);
    339 }
    340 
    341 /** Creates and runs cpu-bound reclaimer threads. */
    342 static void start_reclaimers(void)
    343 {
    344         for (unsigned int cpu_id = 0; cpu_id < config.cpu_count; ++cpu_id) {
    345                 char name[THREAD_NAME_BUFLEN] = {0};
    346                
    347                 snprintf(name, THREAD_NAME_BUFLEN - 1, "rcu-rec/%u", cpu_id);
    348                
    349                 cpus[cpu_id].rcu.reclaimer_thr =
    350                         thread_create(reclaimer, 0, TASK, THREAD_FLAG_NONE, name);
    351 
    352                 if (!cpus[cpu_id].rcu.reclaimer_thr)
    353                         panic("Failed to create RCU reclaimer thread on cpu%u.", cpu_id);
    354 
    355                 thread_wire(cpus[cpu_id].rcu.reclaimer_thr, &cpus[cpu_id]);
    356                 thread_ready(cpus[cpu_id].rcu.reclaimer_thr);
    357         }
     335#endif
    358336}
    359337
     
    366344       
    367345        return completed;
     346}
     347
     348/** Creates and runs cpu-bound reclaimer threads. */
     349static void start_reclaimers(void)
     350{
     351        for (unsigned int cpu_id = 0; cpu_id < config.cpu_count; ++cpu_id) {
     352                char name[THREAD_NAME_BUFLEN] = {0};
     353               
     354                snprintf(name, THREAD_NAME_BUFLEN - 1, "rcu-rec/%u", cpu_id);
     355               
     356                cpus[cpu_id].rcu.reclaimer_thr =
     357                        thread_create(reclaimer, 0, TASK, THREAD_FLAG_NONE, name);
     358
     359                if (!cpus[cpu_id].rcu.reclaimer_thr)
     360                        panic("Failed to create RCU reclaimer thread on cpu%u.", cpu_id);
     361
     362                thread_wire(cpus[cpu_id].rcu.reclaimer_thr, &cpus[cpu_id]);
     363                thread_ready(cpus[cpu_id].rcu.reclaimer_thr);
     364        }
     365}
     366
     367#ifdef RCU_PREEMPT_PODZIMEK
     368
     369/** Starts the detector thread. */
     370static void start_detector(void)
     371{
     372        rcu.detector_thr =
     373                thread_create(detector, 0, TASK, THREAD_FLAG_NONE, "rcu-det");
     374       
     375        if (!rcu.detector_thr)
     376                panic("Failed to create RCU detector thread.");
     377       
     378        thread_ready(rcu.detector_thr);
    368379}
    369380
     
    440451                THREAD->rcu.was_preempted = false;
    441452
    442                 irq_spinlock_lock(&rcu.preempt_lock, false);
    443                
    444                 bool prev_empty = list_empty(&rcu.cur_preempted);
    445                 list_remove(&THREAD->rcu.preempt_link);
    446                 bool now_empty = list_empty(&rcu.cur_preempted);
    447                
    448                 /* This was the last reader in cur_preempted. */
    449                 bool last_removed = now_empty && !prev_empty;
    450                
    451                 /*
    452                  * Preempted readers are blocking the detector and
    453                  * this was the last reader blocking the current GP.
    454                  */
    455                 if (last_removed && rcu.preempt_blocking_det) {
    456                         rcu.preempt_blocking_det = false;
    457                         semaphore_up(&rcu.remaining_readers);
    458                 }
    459                
    460                 irq_spinlock_unlock(&rcu.preempt_lock, false);
     453                rm_preempted_reader();
    461454        }
    462455       
     
    466459        interrupts_restore(ipl);
    467460}
     461
     462#endif /* RCU_PREEMPT_PODZIMEK */
    468463
    469464typedef struct synch_item {
     
    488483{
    489484        /* Calling from a reader section will deadlock. */
    490         ASSERT(0 == CPU->rcu.nesting_cnt);
     485        ASSERT(!rcu_read_locked());
    491486       
    492487        synch_item_t completion;
     
    630625        return cur_cbs_empty() && next_cbs_empty() && arriving_cbs_empty();
    631626}
     627
    632628
    633629/** Reclaimer thread dispatches locally queued callbacks once a GP ends. */
     
    747743}
    748744
    749 
    750745/** Prepares another batch of callbacks to dispatch at the nest grace period.
    751746 *
     
    817812        return expedite;       
    818813}
     814
     815
     816#ifdef RCU_PREEMPT_A
     817
     818/** Waits for the grace period associated with callbacks cub_cbs to elapse.
     819 *
     820 * @param expedite Instructs the detector to aggressively speed up grace
     821 *            period detection without any delay.
     822 * @param completed_gp Returns the most recent completed grace period
     823 *            number.
     824 * @return false if the thread was interrupted and should stop.
     825 */
     826static bool wait_for_cur_cbs_gp_end(bool expedite, rcu_gp_t *completed_gp)
     827{
     828        spinlock_lock(&rcu.gp_lock);
     829
     830        ASSERT(CPU->rcu.cur_cbs_gp <= CPU->rcu.next_cbs_gp);
     831        ASSERT(CPU->rcu.cur_cbs_gp <= _rcu_cur_gp + 1);
     832       
     833        while (rcu.completed_gp < CPU->rcu.cur_cbs_gp) {
     834                /* GP has not yet started - start a new one. */
     835                if (rcu.completed_gp == _rcu_cur_gp) {
     836                        start_new_gp();
     837                        spinlock_unlock(&rcu.gp_lock);
     838
     839                        if (!wait_for_readers(expedite))
     840                                return false;
     841
     842                        spinlock_lock(&rcu.gp_lock);
     843                        /* Notify any reclaimers this GP had ended. */
     844                        rcu.completed_gp = _rcu_cur_gp;
     845                        condvar_broadcast(&rcu.gp_ended);
     846                } else {
     847                        /* GP detection is in progress.*/
     848                       
     849                        if (expedite)
     850                                condvar_signal(&rcu.expedite_now);
     851                       
     852                        /* Wait for the GP to complete. */
     853                        int ret = _condvar_wait_timeout_spinlock(&rcu.gp_ended,
     854                                &rcu.gp_lock, SYNCH_NO_TIMEOUT, SYNCH_FLAGS_INTERRUPTIBLE);
     855                       
     856                        if (ret == ESYNCH_INTERRUPTED) {
     857                                spinlock_unlock(&rcu.gp_lock);
     858                                return false;                   
     859                        }
     860                       
     861                        upd_missed_gp_in_wait(rcu.completed_gp);
     862                }
     863        }
     864       
     865        *completed_gp = rcu.completed_gp;
     866        spinlock_unlock(&rcu.gp_lock);
     867       
     868        return true;
     869}
     870
     871static bool wait_for_readers(bool expedite)
     872{
     873        DEFINE_CPU_MASK(reader_cpus);
     874       
     875        cpu_mask_active(reader_cpus);
     876        rm_quiescent_cpus(reader_cpus);
     877       
     878        while (!cpu_mask_is_none(reader_cpus)) {
     879                /* Give cpus a chance to context switch (a QS) and batch callbacks. */
     880                if(!gp_sleep(&expedite))
     881                        return false;
     882               
     883                rm_quiescent_cpus(reader_cpus);
     884                sample_cpus(reader_cpus, reader_cpus);
     885        }
     886       
     887        /* Update statistic. */
     888        if (expedite) {
     889                ++rcu.stat_expedited_cnt;
     890        }
     891       
     892        /*
     893         * All cpus have passed through a QS and see the most recent _rcu_cur_gp.
     894         * As a result newly preempted readers will associate with next_preempted
     895         * and the number of old readers in cur_preempted will monotonically
     896         * decrease. Wait for those old/preexisting readers.
     897         */
     898        return wait_for_preempt_reader();
     899}
     900
     901static bool gp_sleep(bool *expedite)
     902{
     903        if (*expedite) {
     904                scheduler();
     905                return true;
     906        } else {
     907                spinlock_lock(&rcu.gp_lock);
     908
     909                int ret = 0;
     910                ret = _condvar_wait_timeout_spinlock(&rcu.expedite_now, &rcu.gp_lock,
     911                        DETECT_SLEEP_MS * 1000, SYNCH_FLAGS_INTERRUPTIBLE);
     912
     913                /* rcu.expedite_now was signaled. */
     914                if (ret == ESYNCH_OK_BLOCKED) {
     915                        *expedite = true;
     916                }
     917
     918                spinlock_unlock(&rcu.gp_lock);
     919
     920                return (ret != ESYNCH_INTERRUPTED);
     921        }
     922}
     923
     924static void sample_local_cpu(void *arg)
     925{
     926        ASSERT(interrupts_disabled());
     927        cpu_mask_t *reader_cpus = (cpu_mask_t *)arg;
     928       
     929        bool locked = RCU_CNT_INC <= THE->rcu_nesting;
     930        bool passed_qs = (CPU->rcu.last_seen_gp == _rcu_cur_gp);
     931               
     932        if (locked && !passed_qs) {
     933                /*
     934                 * This cpu has not yet passed a quiescent state during this grace
     935                 * period and it is currently in a reader section. We'll have to
     936                 * try to sample this cpu again later.
     937                 */
     938        } else {
     939                /* Either not in a reader section or already passed a QS. */
     940                cpu_mask_reset(reader_cpus, CPU->id);
     941                /* Contain new reader sections and make prior changes visible to them.*/
     942                memory_barrier();
     943                CPU->rcu.last_seen_gp = _rcu_cur_gp;
     944        }
     945}
     946
     947/** Called by the scheduler() when switching away from the current thread. */
     948void rcu_after_thread_ran(void)
     949{
     950        ASSERT(interrupts_disabled());
     951
     952        /* Preempted a reader critical section for the first time. */
     953        if (rcu_read_locked() && !(THE->rcu_nesting & RCU_WAS_PREEMPTED)) {
     954                THE->rcu_nesting |= RCU_WAS_PREEMPTED;
     955                note_preempted_reader();
     956        }
     957       
     958        /* Save the thread's nesting count when it is not running. */
     959        THREAD->rcu.nesting_cnt = THE->rcu_nesting;
     960
     961        /* Clear rcu_nesting only after noting that a thread was preempted. */
     962        compiler_barrier();
     963        THE->rcu_nesting = 0;
     964
     965        if (CPU->rcu.last_seen_gp != _rcu_cur_gp) {
     966                /*
     967                 * Contain any memory accesses of old readers before announcing a QS.
     968                 * Also make changes from the previous GP visible to this cpu.
     969                 */
     970                memory_barrier();
     971                /*
     972                * The preempted reader has been noted globally. There are therefore
     973                * no readers running on this cpu so this is a quiescent state.
     974                */
     975                CPU->rcu.last_seen_gp = _rcu_cur_gp;
     976        }
     977
     978        /*
     979         * Forcefully associate the reclaime with the highest priority
     980         * even if preempted due to its time slice running out.
     981         */
     982        if (THREAD == CPU->rcu.reclaimer_thr) {
     983                THREAD->priority = -1;
     984        }
     985       
     986        upd_max_cbs_in_slice();
     987}
     988
     989/** Called by the scheduler() when switching to a newly scheduled thread. */
     990void rcu_before_thread_runs(void)
     991{
     992        ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
     993        ASSERT(!rcu_read_locked());
     994       
     995        /* Load the thread's saved nesting count from before it was preempted. */
     996        THE->rcu_nesting = THREAD->rcu.nesting_cnt;
     997}
     998
     999/** Called from scheduler() when exiting the current thread.
     1000 *
     1001 * Preemption or interrupts are disabled and the scheduler() already
     1002 * switched away from the current thread, calling rcu_after_thread_ran().
     1003 */
     1004void rcu_thread_exiting(void)
     1005{
     1006        ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
     1007        /*
     1008         * The thread forgot to exit its reader critical section.
     1009         * It is a bug, but rather than letting the entire system lock up
     1010         * forcefully leave the reader section. The thread is not holding
     1011         * any references anyway since it is exiting so it is safe.
     1012         */
     1013        if (RCU_CNT_INC <= THREAD->rcu.nesting_cnt) {
     1014                /* Emulate _rcu_preempted_unlock() with the proper nesting count. */
     1015                if (THREAD->rcu.nesting_cnt & RCU_WAS_PREEMPTED) {
     1016                        ipl_t ipl = interrupts_disable();
     1017                        rm_preempted_reader();
     1018                        interrupts_restore(ipl);
     1019                }
     1020        }
     1021}
     1022
     1023/** Returns true if in an rcu reader section. */
     1024bool rcu_read_locked(void)
     1025{
     1026        return RCU_CNT_INC <= THE->rcu_nesting;
     1027}
     1028
     1029/** Invoked when a preempted reader finally exits its reader section. */
     1030void _rcu_preempted_unlock(void)
     1031{
     1032        ipl_t ipl = interrupts_disable();
     1033       
     1034        if (THE->rcu_nesting == RCU_WAS_PREEMPTED) {
     1035                THE->rcu_nesting = 0;
     1036                rm_preempted_reader();
     1037        }
     1038       
     1039        interrupts_restore(ipl);
     1040}
     1041
     1042#elif defined(RCU_PREEMPT_PODZIMEK)
    8191043
    8201044/** Waits for the grace period associated with callbacks cub_cbs to elapse.
     
    8971121}
    8981122
    899 static void upd_missed_gp_in_wait(rcu_gp_t completed_gp)
    900 {
    901         ASSERT(CPU->rcu.cur_cbs_gp <= completed_gp);
    902        
    903         size_t delta = (size_t)(completed_gp - CPU->rcu.cur_cbs_gp);
    904         CPU->rcu.stat_missed_gp_in_wait += delta;
    905 }
    906 
    907 
    908 /** Requests the detector to detect at least req_cnt consecutive grace periods.*/
    909 static void req_detection(size_t req_cnt)
    910 {
    911         if (rcu.req_gp_end_cnt < req_cnt) {
    912                 bool detector_idle = (0 == rcu.req_gp_end_cnt);
    913                 rcu.req_gp_end_cnt = req_cnt;
    914 
    915                 if (detector_idle) {
    916                         ASSERT(_rcu_cur_gp == rcu.completed_gp);
    917                         condvar_signal(&rcu.req_gp_changed);
    918                 }
    919         }
    920 }
    921 
    9221123/** Waits for an announcement of the end of the grace period wait_on_gp. */
    9231124static bool cv_wait_for_gp(rcu_gp_t wait_on_gp)
     
    9381139        return interrupted;
    9391140}
     1141
     1142/** Requests the detector to detect at least req_cnt consecutive grace periods.*/
     1143static void req_detection(size_t req_cnt)
     1144{
     1145        if (rcu.req_gp_end_cnt < req_cnt) {
     1146                bool detector_idle = (0 == rcu.req_gp_end_cnt);
     1147                rcu.req_gp_end_cnt = req_cnt;
     1148
     1149                if (detector_idle) {
     1150                        ASSERT(_rcu_cur_gp == rcu.completed_gp);
     1151                        condvar_signal(&rcu.req_gp_changed);
     1152                }
     1153        }
     1154}
     1155
    9401156
    9411157/** The detector thread detects and notifies reclaimers of grace period ends. */
     
    9851201}
    9861202
     1203
     1204static void end_cur_gp(void)
     1205{
     1206        ASSERT(spinlock_locked(&rcu.gp_lock));
     1207       
     1208        rcu.completed_gp = _rcu_cur_gp;
     1209        --rcu.req_gp_end_cnt;
     1210       
     1211        condvar_broadcast(&rcu.gp_ended);
     1212}
     1213
     1214/** Waits for readers that started before the current GP started to finish. */
     1215static bool wait_for_readers(void)
     1216{
     1217        DEFINE_CPU_MASK(reading_cpus);
     1218       
     1219        /* All running cpus have potential readers. */
     1220        cpu_mask_active(reading_cpus);
     1221
     1222        /*
     1223         * Give readers time to pass through a QS. Also, batch arriving
     1224         * callbacks in order to amortize detection overhead.
     1225         */
     1226        if (!gp_sleep())
     1227                return false;
     1228       
     1229        /* Non-intrusively determine which cpus have yet to pass a QS. */
     1230        rm_quiescent_cpus(reading_cpus);
     1231       
     1232        /* Actively interrupt cpus delaying the current GP and demand a QS. */
     1233        interrupt_delaying_cpus(reading_cpus);
     1234       
     1235        /* Wait for the interrupted cpus to notify us that they reached a QS. */
     1236        if (!wait_for_delaying_cpus())
     1237                return false;
     1238        /*
     1239         * All cpus recorded a QS or are still idle. Any new readers will be added
     1240         * to next_preempt if preempted, ie the number of readers in cur_preempted
     1241         * monotonically descreases.
     1242         */
     1243       
     1244        /* Wait for the last reader in cur_preempted to notify us it is done. */
     1245        if (!wait_for_preempt_reader())
     1246                return false;
     1247       
     1248        return true;
     1249}
     1250
     1251/** Sleeps a while if the current grace period is not to be expedited. */
     1252static bool gp_sleep(void)
     1253{
     1254        spinlock_lock(&rcu.gp_lock);
     1255
     1256        int ret = 0;
     1257        while (0 == rcu.req_expedited_cnt && 0 == ret) {
     1258                /* minor bug: sleeps for the same duration if woken up spuriously. */
     1259                ret = _condvar_wait_timeout_spinlock(&rcu.expedite_now, &rcu.gp_lock,
     1260                        DETECT_SLEEP_MS * 1000, SYNCH_FLAGS_INTERRUPTIBLE);
     1261        }
     1262       
     1263        if (0 < rcu.req_expedited_cnt) {
     1264                --rcu.req_expedited_cnt;
     1265                /* Update statistic. */
     1266                ++rcu.stat_expedited_cnt;
     1267        }
     1268       
     1269        spinlock_unlock(&rcu.gp_lock);
     1270       
     1271        return (ret != ESYNCH_INTERRUPTED);
     1272}
     1273
     1274/** Actively interrupts and checks the offending cpus for quiescent states. */
     1275static void interrupt_delaying_cpus(cpu_mask_t *cpu_mask)
     1276{
     1277        atomic_set(&rcu.delaying_cpu_cnt, 0);
     1278       
     1279        sample_cpus(cpu_mask, 0);
     1280}
     1281
     1282/** Invoked on a cpu delaying grace period detection.
     1283 *
     1284 * Induces a quiescent state for the cpu or it instructs remaining
     1285 * readers to notify the detector once they finish.
     1286 */
     1287static void sample_local_cpu(void *arg)
     1288{
     1289        ASSERT(interrupts_disabled());
     1290        ASSERT(!CPU->rcu.is_delaying_gp);
     1291       
     1292        /* Cpu did not pass a quiescent state yet. */
     1293        if (CPU->rcu.last_seen_gp != _rcu_cur_gp) {
     1294                /* Interrupted a reader in a reader critical section. */
     1295                if (0 < CPU->rcu.nesting_cnt) {
     1296                        ASSERT(!CPU->idle);
     1297                        /* Note to notify the detector from rcu_read_unlock(). */
     1298                        CPU->rcu.is_delaying_gp = true;
     1299                        /*
     1300                         * Set signal_unlock only after setting is_delaying_gp so
     1301                         * that NMI handlers do not accidentally clear it in unlock()
     1302                         * before seeing and acting upon is_delaying_gp.
     1303                         */
     1304                        compiler_barrier();
     1305                        CPU->rcu.signal_unlock = true;
     1306                       
     1307                        atomic_inc(&rcu.delaying_cpu_cnt);
     1308                } else {
     1309                        /*
     1310                         * The cpu did not enter any rcu reader sections since
     1311                         * the start of the current GP. Record a quiescent state.
     1312                         *
     1313                         * Or, we interrupted rcu_read_unlock_impl() right before
     1314                         * it recorded a QS. Record a QS for it. The memory barrier
     1315                         * contains the reader section's mem accesses before
     1316                         * updating last_seen_gp.
     1317                         *
     1318                         * Or, we interrupted rcu_read_lock() right after it recorded
     1319                         * a QS for the previous GP but before it got a chance to
     1320                         * increment its nesting count. The memory barrier again
     1321                         * stops the CS code from spilling out of the CS.
     1322                         */
     1323                        memory_barrier();
     1324                        CPU->rcu.last_seen_gp = _rcu_cur_gp;
     1325                }
     1326        } else {
     1327                /*
     1328                 * This cpu already acknowledged that it had passed through
     1329                 * a quiescent state since the start of cur_gp.
     1330                 */
     1331        }
     1332       
     1333        /*
     1334         * smp_call() makes sure any changes propagate back to the caller.
     1335         * In particular, it makes the most current last_seen_gp visible
     1336         * to the detector.
     1337         */
     1338}
     1339
     1340/** Waits for cpus delaying the current grace period if there are any. */
     1341static bool wait_for_delaying_cpus(void)
     1342{
     1343        int delaying_cpu_cnt = atomic_get(&rcu.delaying_cpu_cnt);
     1344
     1345        for (int i = 0; i < delaying_cpu_cnt; ++i){
     1346                if (!semaphore_down_interruptable(&rcu.remaining_readers))
     1347                        return false;
     1348        }
     1349       
     1350        /* Update statistic. */
     1351        rcu.stat_delayed_cnt += delaying_cpu_cnt;
     1352       
     1353        return true;
     1354}
     1355
     1356/** Called by the scheduler() when switching away from the current thread. */
     1357void rcu_after_thread_ran(void)
     1358{
     1359        ASSERT(interrupts_disabled());
     1360        /* todo: make is_delaying_gp and was_preempted NMI safe via local atomics.*/
     1361
     1362        /*
     1363         * Prevent NMI handlers from interfering. The detector will be notified
     1364         * here if CPU->rcu.is_delaying_gp and the current thread is no longer
     1365         * running so there is nothing to signal to the detector.
     1366         */
     1367        CPU->rcu.signal_unlock = false;
     1368        /* Separates clearing of .signal_unlock from CPU->rcu.nesting_cnt = 0. */
     1369        compiler_barrier();
     1370       
     1371        /* Save the thread's nesting count when it is not running. */
     1372        THREAD->rcu.nesting_cnt = CPU->rcu.nesting_cnt;
     1373        /* Interrupt handlers might use RCU while idle in scheduler(). */
     1374        CPU->rcu.nesting_cnt = 0;
     1375       
     1376        /* Preempted a reader critical section for the first time. */
     1377        if (0 < THREAD->rcu.nesting_cnt && !THREAD->rcu.was_preempted) {
     1378                THREAD->rcu.was_preempted = true;
     1379                note_preempted_reader();
     1380        }
     1381       
     1382        /*
     1383         * The preempted reader has been noted globally. There are therefore
     1384         * no readers running on this cpu so this is a quiescent state.
     1385         */
     1386        _rcu_record_qs();
     1387
     1388        /*
     1389         * This cpu is holding up the current GP. Let the detector know
     1390         * it has just passed a quiescent state.
     1391         *
     1392         * The detector waits separately for preempted readers, so we have
     1393         * to notify the detector even if we have just preempted a reader.
     1394         */
     1395        if (CPU->rcu.is_delaying_gp) {
     1396                CPU->rcu.is_delaying_gp = false;
     1397                semaphore_up(&rcu.remaining_readers);
     1398        }
     1399
     1400        /*
     1401         * Forcefully associate the detector with the highest priority
     1402         * even if preempted due to its time slice running out.
     1403         *
     1404         * todo: Replace with strict scheduler priority classes.
     1405         */
     1406        if (THREAD == rcu.detector_thr) {
     1407                THREAD->priority = -1;
     1408        }
     1409        else if (THREAD == CPU->rcu.reclaimer_thr) {
     1410                THREAD->priority = -1;
     1411        }
     1412       
     1413        upd_max_cbs_in_slice();
     1414}
     1415
     1416/** Called by the scheduler() when switching to a newly scheduled thread. */
     1417void rcu_before_thread_runs(void)
     1418{
     1419        ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
     1420        ASSERT(0 == CPU->rcu.nesting_cnt);
     1421       
     1422        /* Load the thread's saved nesting count from before it was preempted. */
     1423        CPU->rcu.nesting_cnt = THREAD->rcu.nesting_cnt;
     1424        /*
     1425         * In the unlikely event that a NMI occurs between the loading of the
     1426         * variables and setting signal_unlock, the NMI handler may invoke
     1427         * rcu_read_unlock() and clear signal_unlock. In that case we will
     1428         * incorrectly overwrite signal_unlock from false to true. This event
     1429         * situation benign and the next rcu_read_unlock() will at worst
     1430         * needlessly invoke _rcu_signal_unlock().
     1431         */
     1432        CPU->rcu.signal_unlock = THREAD->rcu.was_preempted || CPU->rcu.is_delaying_gp;
     1433}
     1434
     1435/** Called from scheduler() when exiting the current thread.
     1436 *
     1437 * Preemption or interrupts are disabled and the scheduler() already
     1438 * switched away from the current thread, calling rcu_after_thread_ran().
     1439 */
     1440void rcu_thread_exiting(void)
     1441{
     1442        ASSERT(THREAD != 0);
     1443        ASSERT(THREAD->state == Exiting);
     1444        ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
     1445        /*
     1446         * The thread forgot to exit its reader critical section.
     1447         * It is a bug, but rather than letting the entire system lock up
     1448         * forcefully leave the reader section. The thread is not holding
     1449         * any references anyway since it is exiting so it is safe.
     1450         */
     1451        if (0 < THREAD->rcu.nesting_cnt) {
     1452                THREAD->rcu.nesting_cnt = 1;
     1453                read_unlock_impl(&THREAD->rcu.nesting_cnt);
     1454        }
     1455}
     1456
     1457
     1458#endif /* RCU_PREEMPT_PODZIMEK */
     1459
    9871460/** Announces the start of a new grace period for preexisting readers to ack. */
    9881461static void start_new_gp(void)
     
    10081481}
    10091482
    1010 static void end_cur_gp(void)
    1011 {
    1012         ASSERT(spinlock_locked(&rcu.gp_lock));
    1013        
    1014         rcu.completed_gp = _rcu_cur_gp;
    1015         --rcu.req_gp_end_cnt;
    1016        
    1017         condvar_broadcast(&rcu.gp_ended);
    1018 }
    1019 
    1020 /** Waits for readers that started before the current GP started to finish. */
    1021 static bool wait_for_readers(void)
    1022 {
    1023         DEFINE_CPU_MASK(reading_cpus);
    1024        
    1025         /* All running cpus have potential readers. */
    1026         cpu_mask_active(reading_cpus);
    1027 
     1483/** Remove those cpus from the mask that have already passed a quiescent
     1484 * state since the start of the current grace period.
     1485 */
     1486static void rm_quiescent_cpus(cpu_mask_t *cpu_mask)
     1487{
    10281488        /*
    10291489         * Ensure the announcement of the start of a new GP (ie up-to-date
     
    10801540        memory_barrier(); /* MB C */
    10811541       
    1082         /*
    1083          * Give readers time to pass through a QS. Also, batch arriving
    1084          * callbacks in order to amortize detection overhead.
    1085          */
    1086         if (!gp_sleep())
    1087                 return false;
    1088        
    1089         /* Non-intrusively determine which cpus have yet to pass a QS. */
    1090         rm_quiescent_cpus(reading_cpus);
    1091        
    1092         /* Actively interrupt cpus delaying the current GP and demand a QS. */
    1093         interrupt_delaying_cpus(reading_cpus);
    1094        
    1095         /* Wait for the interrupted cpus to notify us that they reached a QS. */
    1096         if (!wait_for_delaying_cpus())
    1097                 return false;
    1098         /*
    1099          * All cpus recorded a QS or are still idle. Any new readers will be added
    1100          * to next_preempt if preempted, ie the number of readers in cur_preempted
    1101          * monotonically descreases.
    1102          */
    1103        
    1104         /* Wait for the last reader in cur_preempted to notify us it is done. */
    1105         if (!wait_for_preempt_reader())
    1106                 return false;
    1107        
    1108         return true;
    1109 }
    1110 
    1111 /** Remove those cpus from the mask that have already passed a quiescent
    1112  * state since the start of the current grace period.
    1113  */
    1114 static void rm_quiescent_cpus(cpu_mask_t *cpu_mask)
    1115 {
    11161542        cpu_mask_for_each(*cpu_mask, cpu_id) {
    11171543                /*
     
    11371563}
    11381564
    1139 /** Sleeps a while if the current grace period is not to be expedited. */
    1140 static bool gp_sleep(void)
    1141 {
    1142         spinlock_lock(&rcu.gp_lock);
    1143 
    1144         int ret = 0;
    1145         while (0 == rcu.req_expedited_cnt && 0 == ret) {
    1146                 /* minor bug: sleeps for the same duration if woken up spuriously. */
    1147                 ret = _condvar_wait_timeout_spinlock(&rcu.expedite_now, &rcu.gp_lock,
    1148                         DETECT_SLEEP_MS * 1000, SYNCH_FLAGS_INTERRUPTIBLE);
    1149         }
    1150        
    1151         if (0 < rcu.req_expedited_cnt) {
    1152                 --rcu.req_expedited_cnt;
    1153                 /* Update statistic. */
    1154                 ++rcu.stat_expedited_cnt;
    1155         }
    1156        
    1157         spinlock_unlock(&rcu.gp_lock);
    1158        
    1159         return (ret != ESYNCH_INTERRUPTED);
    1160 }
    1161 
    1162 /** Actively interrupts and checks the offending cpus for quiescent states. */
    1163 static void interrupt_delaying_cpus(cpu_mask_t *cpu_mask)
     1565/** Invokes sample_local_cpu(arg) on each cpu of reader_cpus. */
     1566static void sample_cpus(cpu_mask_t *reader_cpus, void *arg)
    11641567{
    11651568        const size_t max_conconcurrent_calls = 16;
     
    11671570        size_t outstanding_calls = 0;
    11681571       
    1169         atomic_set(&rcu.delaying_cpu_cnt, 0);
    1170        
    1171         cpu_mask_for_each(*cpu_mask, cpu_id) {
    1172                 smp_call_async(cpu_id, sample_local_cpu, 0, &call[outstanding_calls]);
     1572        cpu_mask_for_each(*reader_cpus, cpu_id) {
     1573                smp_call_async(cpu_id, sample_local_cpu, arg, &call[outstanding_calls]);
    11731574                ++outstanding_calls;
    11741575
     
    11911592}
    11921593
    1193 /** Invoked on a cpu delaying grace period detection.
    1194  *
    1195  * Induces a quiescent state for the cpu or it instructs remaining
    1196  * readers to notify the detector once they finish.
    1197  */
    1198 static void sample_local_cpu(void *arg)
    1199 {
    1200         ASSERT(interrupts_disabled());
    1201         ASSERT(!CPU->rcu.is_delaying_gp);
    1202        
    1203         /* Cpu did not pass a quiescent state yet. */
     1594static void upd_missed_gp_in_wait(rcu_gp_t completed_gp)
     1595{
     1596        ASSERT(CPU->rcu.cur_cbs_gp <= completed_gp);
     1597       
     1598        size_t delta = (size_t)(completed_gp - CPU->rcu.cur_cbs_gp);
     1599        CPU->rcu.stat_missed_gp_in_wait += delta;
     1600}
     1601
     1602/** Globally note that the current thread was preempted in a reader section. */
     1603static void note_preempted_reader(void)
     1604{
     1605        irq_spinlock_lock(&rcu.preempt_lock, false);
     1606
    12041607        if (CPU->rcu.last_seen_gp != _rcu_cur_gp) {
    1205                 /* Interrupted a reader in a reader critical section. */
    1206                 if (0 < CPU->rcu.nesting_cnt) {
    1207                         ASSERT(!CPU->idle);
    1208                         /* Note to notify the detector from rcu_read_unlock(). */
    1209                         CPU->rcu.is_delaying_gp = true;
    1210                         /*
    1211                          * Set signal_unlock only after setting is_delaying_gp so
    1212                          * that NMI handlers do not accidentally clear it in unlock()
    1213                          * before seeing and acting upon is_delaying_gp.
    1214                          */
    1215                         compiler_barrier();
    1216                         CPU->rcu.signal_unlock = true;
    1217                        
    1218                         atomic_inc(&rcu.delaying_cpu_cnt);
    1219                 } else {
    1220                         /*
    1221                          * The cpu did not enter any rcu reader sections since
    1222                          * the start of the current GP. Record a quiescent state.
    1223                          *
    1224                          * Or, we interrupted rcu_read_unlock_impl() right before
    1225                          * it recorded a QS. Record a QS for it. The memory barrier
    1226                          * contains the reader section's mem accesses before
    1227                          * updating last_seen_gp.
    1228                          *
    1229                          * Or, we interrupted rcu_read_lock() right after it recorded
    1230                          * a QS for the previous GP but before it got a chance to
    1231                          * increment its nesting count. The memory barrier again
    1232                          * stops the CS code from spilling out of the CS.
    1233                          */
    1234                         memory_barrier();
    1235                         CPU->rcu.last_seen_gp = _rcu_cur_gp;
    1236                 }
     1608                /* The reader started before the GP started - we must wait for it.*/
     1609                list_append(&THREAD->rcu.preempt_link, &rcu.cur_preempted);
    12371610        } else {
    12381611                /*
    1239                  * This cpu already acknowledged that it had passed through
    1240                  * a quiescent state since the start of cur_gp.
     1612                 * The reader started after the GP started and this cpu
     1613                 * already noted a quiescent state. We might block the next GP.
    12411614                 */
    1242         }
    1243        
    1244         /*
    1245          * smp_call() makes sure any changes propagate back to the caller.
    1246          * In particular, it makes the most current last_seen_gp visible
    1247          * to the detector.
    1248          */
    1249 }
    1250 
    1251 /** Waits for cpus delaying the current grace period if there are any. */
    1252 static bool wait_for_delaying_cpus(void)
    1253 {
    1254         int delaying_cpu_cnt = atomic_get(&rcu.delaying_cpu_cnt);
    1255 
    1256         for (int i = 0; i < delaying_cpu_cnt; ++i){
    1257                 if (!semaphore_down_interruptable(&rcu.remaining_readers))
    1258                         return false;
    1259         }
    1260        
    1261         /* Update statistic. */
    1262         rcu.stat_delayed_cnt += delaying_cpu_cnt;
    1263        
    1264         return true;
     1615                list_append(&THREAD->rcu.preempt_link, &rcu.next_preempted);
     1616        }
     1617
     1618        irq_spinlock_unlock(&rcu.preempt_lock, false);
     1619}
     1620
     1621/** Remove the current thread from the global list of preempted readers. */
     1622static void rm_preempted_reader(void)
     1623{
     1624        irq_spinlock_lock(&rcu.preempt_lock, false);
     1625       
     1626        ASSERT(link_used(&THREAD->rcu.preempt_link));
     1627
     1628        bool prev_empty = list_empty(&rcu.cur_preempted);
     1629        list_remove(&THREAD->rcu.preempt_link);
     1630        bool now_empty = list_empty(&rcu.cur_preempted);
     1631
     1632        /* This was the last reader in cur_preempted. */
     1633        bool last_removed = now_empty && !prev_empty;
     1634
     1635        /*
     1636         * Preempted readers are blocking the detector and
     1637         * this was the last reader blocking the current GP.
     1638         */
     1639        if (last_removed && rcu.preempt_blocking_det) {
     1640                rcu.preempt_blocking_det = false;
     1641                semaphore_up(&rcu.remaining_readers);
     1642        }
     1643
     1644        irq_spinlock_unlock(&rcu.preempt_lock, false);
    12651645}
    12661646
     
    12851665}
    12861666
    1287 /** Called by the scheduler() when switching away from the current thread. */
    1288 void rcu_after_thread_ran(void)
    1289 {
    1290         ASSERT(interrupts_disabled());
    1291         /* todo: make is_delaying_gp and was_preempted NMI safe via local atomics.*/
    1292 
    1293         /*
    1294          * Prevent NMI handlers from interfering. The detector will be notified
    1295          * here if CPU->rcu.is_delaying_gp and the current thread is no longer
    1296          * running so there is nothing to signal to the detector.
    1297          */
    1298         CPU->rcu.signal_unlock = false;
    1299         /* Separates clearing of .signal_unlock from CPU->rcu.nesting_cnt = 0. */
    1300         compiler_barrier();
    1301        
    1302         /* Save the thread's nesting count when it is not running. */
    1303         THREAD->rcu.nesting_cnt = CPU->rcu.nesting_cnt;
    1304         /* Interrupt handlers might use RCU while idle in scheduler(). */
    1305         CPU->rcu.nesting_cnt = 0;
    1306        
    1307         /* Preempted a reader critical section for the first time. */
    1308         if (0 < THREAD->rcu.nesting_cnt && !THREAD->rcu.was_preempted) {
    1309                 THREAD->rcu.was_preempted = true;
    1310                
    1311                 irq_spinlock_lock(&rcu.preempt_lock, false);
    1312                
    1313                 if (CPU->rcu.last_seen_gp != _rcu_cur_gp) {
    1314                         /* The reader started before the GP started - we must wait for it.*/
    1315                         list_append(&THREAD->rcu.preempt_link, &rcu.cur_preempted);
    1316                 } else {
    1317                         /*
    1318                          * The reader started after the GP started and this cpu
    1319                          * already noted a quiescent state. We might block the next GP.
    1320                          */
    1321                         list_append(&THREAD->rcu.preempt_link, &rcu.next_preempted);
    1322                 }
    1323 
    1324                 irq_spinlock_unlock(&rcu.preempt_lock, false);
    1325         }
    1326        
    1327        
    1328         /*
    1329          * The preempted reader has been noted globally. There are therefore
    1330          * no readers running on this cpu so this is a quiescent state.
    1331          */
    1332         _rcu_record_qs();
    1333 
    1334         /*
    1335          * This cpu is holding up the current GP. Let the detector know
    1336          * it has just passed a quiescent state.
    1337          *
    1338          * The detector waits separately for preempted readers, so we have
    1339          * to notify the detector even if we have just preempted a reader.
    1340          */
    1341         if (CPU->rcu.is_delaying_gp) {
    1342                 CPU->rcu.is_delaying_gp = false;
    1343                 semaphore_up(&rcu.remaining_readers);
    1344         }
    1345 
    1346         /*
    1347          * Forcefully associate the detector with the highest priority
    1348          * even if preempted due to its time slice running out.
    1349          *
    1350          * todo: Replace with strict scheduler priority classes.
    1351          */
    1352         if (THREAD == rcu.detector_thr) {
    1353                 THREAD->priority = -1;
    1354         }
    1355         else if (THREAD == CPU->rcu.reclaimer_thr) {
    1356                 THREAD->priority = -1;
    1357         }
    1358        
    1359         upd_max_cbs_in_slice();
    1360 }
    1361 
    13621667static void upd_max_cbs_in_slice(void)
    13631668{
     
    13721677}
    13731678
    1374 /** Called by the scheduler() when switching to a newly scheduled thread. */
    1375 void rcu_before_thread_runs(void)
    1376 {
    1377         ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
    1378         ASSERT(0 == CPU->rcu.nesting_cnt);
    1379        
    1380         /* Load the thread's saved nesting count from before it was preempted. */
    1381         CPU->rcu.nesting_cnt = THREAD->rcu.nesting_cnt;
    1382         /*
    1383          * In the unlikely event that a NMI occurs between the loading of the
    1384          * variables and setting signal_unlock, the NMI handler may invoke
    1385          * rcu_read_unlock() and clear signal_unlock. In that case we will
    1386          * incorrectly overwrite signal_unlock from false to true. This event
    1387          * situation benign and the next rcu_read_unlock() will at worst
    1388          * needlessly invoke _rcu_signal_unlock().
    1389          */
    1390         CPU->rcu.signal_unlock = THREAD->rcu.was_preempted || CPU->rcu.is_delaying_gp;
    1391 }
    1392 
    1393 
    13941679/** Prints RCU run-time statistics. */
    13951680void rcu_print_stat(void)
     
    14011686         */
    14021687       
    1403         printf("Configuration: expedite_threshold=%d, critical_threshold=%d,"
    1404                 " detect_sleep=%dms\n",
    1405                 EXPEDITE_THRESHOLD, CRITICAL_THRESHOLD, DETECT_SLEEP_MS);
     1688#ifdef RCU_PREEMPT_PODZIMEK
     1689        const char *algo = "podzimek-preempt-rcu";
     1690#elif defined(RCU_PREEMPT_A)
     1691        const char *algo = "a-preempt-rcu";
     1692#endif
     1693       
     1694        printf("Config: expedite_threshold=%d, critical_threshold=%d,"
     1695                " detect_sleep=%dms, %s\n",     
     1696                EXPEDITE_THRESHOLD, CRITICAL_THRESHOLD, DETECT_SLEEP_MS, algo);
    14061697        printf("Completed GPs: %" PRIu64 "\n", rcu.completed_gp);
    14071698        printf("Expedited GPs: %zu\n", rcu.stat_expedited_cnt);
Note: See TracChangeset for help on using the changeset viewer.