source: mainline/uspace/lib/c/generic/thread/rcu.c@ 0abc2ae

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 0abc2ae was 05882233, checked in by Jiří Zárevúcky <jiri.zarevucky@…>, 7 years ago

Unify various barrier includes into <barrier.h>

  • Property mode set to 100644
File size: 12.5 KB
RevLine 
[a82d33d]1/*
2 * Copyright (c) 2012 Adam Hraska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup liburcu
30 * @{
31 */
32/**
33 * @file
[1b20da0]34 *
35 * User space RCU is based on URCU utilizing signals [1]. This
36 * implementation does not however signal each thread of the process
[a82d33d]37 * to issue a memory barrier. Instead, we introduced a syscall that
38 * issues memory barriers (via IPIs) on cpus that are running threads
39 * of the current process. First, it does not require us to schedule
[1b20da0]40 * and run every thread of the process. Second, IPIs are less intrusive
[a82d33d]41 * than switching contexts and entering user space.
[1b20da0]42 *
[a82d33d]43 * This algorithm is further modified to require a single instead of
44 * two reader group changes per grace period. Signal-URCU flips
[1b20da0]45 * the reader group and waits for readers of the previous group
[a82d33d]46 * twice in succession in order to wait for new readers that were
[1b20da0]47 * delayed and mistakenly associated with the previous reader group.
[a82d33d]48 * The modified algorithm ensures that the new reader group is
49 * always empty (by explicitly waiting for it to become empty).
50 * Only then does it flip the reader group and wait for preexisting
51 * readers of the old reader group (invariant of SRCU [2, 3]).
[1b20da0]52 *
53 *
[a82d33d]54 * [1] User-level implementations of read-copy update,
55 * 2012, appendix
56 * http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
[1b20da0]57 *
[a82d33d]58 * [2] linux/kernel/srcu.c in Linux 3.5-rc2,
59 * 2012
60 * http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/kernel/srcu.c?v=linux-3.5-rc2-ccs-1.8.3
61 *
[1b20da0]62 * [3] [RFC PATCH 5/5 single-thread-version] implement
[a82d33d]63 * per-domain single-thread state machine,
64 * 2012, Lai
65 * https://lkml.org/lkml/2012/3/6/586
66 */
67
68#include "rcu.h"
69#include <fibril_synch.h>
70#include <fibril.h>
71#include <stdio.h>
[582a0b8]72#include <stddef.h>
[05882233]73#include <barrier.h>
[a82d33d]74#include <macros.h>
75#include <async.h>
[a2f42e5]76#include <adt/list.h>
[05882233]77#include <barrier.h>
[a2f42e5]78#include <assert.h>
[1b7eec9]79#include <time.h>
[a82d33d]80
[6340b4d2]81#include "../private/fibril.h"
[d73d992]82
[a2f42e5]83
84/** RCU sleeps for RCU_SLEEP_MS before polling an active RCU reader again. */
[a82d33d]85#define RCU_SLEEP_MS 10
86
87#define RCU_NESTING_SHIFT 1
88#define RCU_NESTING_INC (1 << RCU_NESTING_SHIFT)
89#define RCU_GROUP_BIT_MASK (size_t)(RCU_NESTING_INC - 1)
90#define RCU_GROUP_A (size_t)(0 | RCU_NESTING_INC)
91#define RCU_GROUP_B (size_t)(1 | RCU_NESTING_INC)
92
93
[a2f42e5]94/** Fibril local RCU data. */
95typedef struct fibril_rcu_data {
[a82d33d]96 size_t nesting_cnt;
97 link_t link;
[a2f42e5]98 bool registered;
99} fibril_rcu_data_t;
[a82d33d]100
[a2f42e5]101/** Process global RCU data. */
[a82d33d]102typedef struct rcu_data {
[99022de]103 size_t cur_gp;
[a82d33d]104 size_t reader_group;
[42f5860]105 fibril_rmutex_t list_mutex;
[a82d33d]106 list_t fibrils_list;
[1b7eec9]107 struct {
[42f5860]108 fibril_rmutex_t mutex;
[1b7eec9]109 bool locked;
110 list_t blocked_fibrils;
111 } sync_lock;
[a82d33d]112} rcu_data_t;
113
[1b7eec9]114typedef struct blocked_fibril {
[514d561]115 fibril_event_t unblock;
[1b7eec9]116 link_t link;
117 bool is_ready;
118} blocked_fibril_t;
119
[a82d33d]120
[a2f42e5]121/** Fibril local RCU data. */
122static fibril_local fibril_rcu_data_t fibril_rcu = {
123 .nesting_cnt = 0,
124 .link = {
125 .next = NULL,
126 .prev = NULL
127 },
128 .registered = false
[a82d33d]129};
130
[a2f42e5]131/** Process global RCU data. */
[a82d33d]132static rcu_data_t rcu = {
[99022de]133 .cur_gp = 0,
[a82d33d]134 .reader_group = RCU_GROUP_A,
[42f5860]135 .list_mutex = FIBRIL_RMUTEX_INITIALIZER(rcu.list_mutex),
[a82d33d]136 .fibrils_list = LIST_INITIALIZER(rcu.fibrils_list),
[1b7eec9]137 .sync_lock = {
[42f5860]138 .mutex = FIBRIL_RMUTEX_INITIALIZER(rcu.sync_lock.mutex),
[1b7eec9]139 .locked = false,
140 .blocked_fibrils = LIST_INITIALIZER(rcu.sync_lock.blocked_fibrils),
141 },
[a82d33d]142};
143
144
[3679f51a]145static void wait_for_readers(size_t reader_group);
[a82d33d]146static void force_mb_in_all_threads(void);
[a2f42e5]147static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group);
[a82d33d]148
[3679f51a]149static void lock_sync(void);
[1b7eec9]150static void unlock_sync(void);
[3679f51a]151static void sync_sleep(void);
[1b7eec9]152
[a82d33d]153static bool is_in_group(size_t nesting_cnt, size_t group);
154static bool is_in_reader_section(size_t nesting_cnt);
155static size_t get_other_group(size_t group);
156
157
158/** Registers a fibril so it may start using RCU read sections.
[1b20da0]159 *
[a82d33d]160 * A fibril must be registered with rcu before it can enter RCU critical
161 * sections delineated by rcu_read_lock() and rcu_read_unlock().
162 */
163void rcu_register_fibril(void)
164{
[a2f42e5]165 assert(!fibril_rcu.registered);
[a35b458]166
[42f5860]167 fibril_rmutex_lock(&rcu.list_mutex);
[a2f42e5]168 list_append(&fibril_rcu.link, &rcu.fibrils_list);
[42f5860]169 fibril_rmutex_unlock(&rcu.list_mutex);
[a35b458]170
[a2f42e5]171 fibril_rcu.registered = true;
[a82d33d]172}
173
174/** Deregisters a fibril that had been using RCU read sections.
[1b20da0]175 *
[a82d33d]176 * A fibril must be deregistered before it exits if it had
177 * been registered with rcu via rcu_register_fibril().
178 */
179void rcu_deregister_fibril(void)
180{
[a2f42e5]181 assert(fibril_rcu.registered);
[a35b458]182
[1b20da0]183 /*
[a82d33d]184 * Forcefully unlock any reader sections. The fibril is exiting
185 * so it is not holding any references to data protected by the
[1b20da0]186 * rcu section. Therefore, it is safe to unlock. Otherwise,
[a82d33d]187 * rcu_synchronize() would wait indefinitely.
188 */
189 memory_barrier();
[a2f42e5]190 fibril_rcu.nesting_cnt = 0;
[a35b458]191
[42f5860]192 fibril_rmutex_lock(&rcu.list_mutex);
[a2f42e5]193 list_remove(&fibril_rcu.link);
[42f5860]194 fibril_rmutex_unlock(&rcu.list_mutex);
[a2f42e5]195
196 fibril_rcu.registered = false;
[a82d33d]197}
198
[1b20da0]199/** Delimits the start of an RCU reader critical section.
200 *
201 * RCU reader sections may be nested.
[a82d33d]202 */
203void rcu_read_lock(void)
204{
[a2f42e5]205 assert(fibril_rcu.registered);
[a35b458]206
[a2f42e5]207 size_t nesting_cnt = ACCESS_ONCE(fibril_rcu.nesting_cnt);
[a35b458]208
[a82d33d]209 if (0 == (nesting_cnt >> RCU_NESTING_SHIFT)) {
[a2f42e5]210 ACCESS_ONCE(fibril_rcu.nesting_cnt) = ACCESS_ONCE(rcu.reader_group);
[a82d33d]211 /* Required by MB_FORCE_L */
212 compiler_barrier(); /* CC_BAR_L */
213 } else {
[a2f42e5]214 ACCESS_ONCE(fibril_rcu.nesting_cnt) = nesting_cnt + RCU_NESTING_INC;
[a82d33d]215 }
216}
217
[514d561]218/** Delimits the end of an RCU reader critical section. */
[a82d33d]219void rcu_read_unlock(void)
220{
[a2f42e5]221 assert(fibril_rcu.registered);
[b188002]222 assert(rcu_read_locked());
[a35b458]223
[a82d33d]224 /* Required by MB_FORCE_U */
225 compiler_barrier(); /* CC_BAR_U */
226 /* todo: ACCESS_ONCE(nesting_cnt) ? */
[a2f42e5]227 fibril_rcu.nesting_cnt -= RCU_NESTING_INC;
228}
229
230/** Returns true if the current fibril is in an RCU reader section. */
231bool rcu_read_locked(void)
232{
233 return 0 != (fibril_rcu.nesting_cnt >> RCU_NESTING_SHIFT);
[a82d33d]234}
235
236/** Blocks until all preexisting readers exit their critical sections. */
[3679f51a]237void rcu_synchronize(void)
[a82d33d]238{
[a2f42e5]239 assert(!rcu_read_locked());
[a35b458]240
[99022de]241 /* Contain load of rcu.cur_gp. */
242 memory_barrier();
243
244 /* Approximately the number of the GP in progress. */
245 size_t gp_in_progress = ACCESS_ONCE(rcu.cur_gp);
[a35b458]246
[3679f51a]247 lock_sync();
[a35b458]248
[1b20da0]249 /*
250 * Exit early if we were stuck waiting for the mutex for a full grace
[99022de]251 * period. Started waiting during gp_in_progress (or gp_in_progress + 1
252 * if the value propagated to this cpu too late) so wait for the next
253 * full GP, gp_in_progress + 1, to finish. Ie don't wait if the GP
254 * after that, gp_in_progress + 2, already started.
255 */
[a2f42e5]256 /* rcu.cur_gp >= gp_in_progress + 2, but tolerates overflows. */
257 if (rcu.cur_gp != gp_in_progress && rcu.cur_gp + 1 != gp_in_progress) {
[1b7eec9]258 unlock_sync();
[99022de]259 return;
260 }
[a35b458]261
[99022de]262 ++ACCESS_ONCE(rcu.cur_gp);
[a35b458]263
[1b20da0]264 /*
265 * Pairs up with MB_FORCE_L (ie CC_BAR_L). Makes changes prior
266 * to rcu_synchronize() visible to new readers.
[a82d33d]267 */
268 memory_barrier(); /* MB_A */
[a35b458]269
[1b20da0]270 /*
271 * Pairs up with MB_A.
272 *
[a82d33d]273 * If the memory barrier is issued before CC_BAR_L in the target
274 * thread, it pairs up with MB_A and the thread sees all changes
275 * prior to rcu_synchronize(). Ie any reader sections are new
[1b20da0]276 * rcu readers.
277 *
[a82d33d]278 * If the memory barrier is issued after CC_BAR_L, it pairs up
279 * with MB_B and it will make the most recent nesting_cnt visible
280 * in this thread. Since the reader may have already accessed
281 * memory protected by RCU (it ran instructions passed CC_BAR_L),
[1b20da0]282 * it is a preexisting reader. Seeing the most recent nesting_cnt
[a82d33d]283 * ensures the thread will be identified as a preexisting reader
284 * and we will wait for it in wait_for_readers(old_reader_group).
285 */
286 force_mb_in_all_threads(); /* MB_FORCE_L */
[a35b458]287
[1b20da0]288 /*
[a82d33d]289 * Pairs with MB_FORCE_L (ie CC_BAR_L, CC_BAR_U) and makes the most
290 * current fibril.nesting_cnt visible to this cpu.
291 */
292 read_barrier(); /* MB_B */
[a35b458]293
[a82d33d]294 size_t new_reader_group = get_other_group(rcu.reader_group);
[3679f51a]295 wait_for_readers(new_reader_group);
[a35b458]296
[a82d33d]297 /* Separates waiting for readers in new_reader_group from group flip. */
298 memory_barrier();
[a35b458]299
[a82d33d]300 /* Flip the group new readers should associate with. */
301 size_t old_reader_group = rcu.reader_group;
302 rcu.reader_group = new_reader_group;
303
304 /* Flip the group before waiting for preexisting readers in the old group.*/
305 memory_barrier();
[a35b458]306
[3679f51a]307 wait_for_readers(old_reader_group);
[a35b458]308
[a82d33d]309 /* MB_FORCE_U */
310 force_mb_in_all_threads(); /* MB_FORCE_U */
[a35b458]311
[1b7eec9]312 unlock_sync();
[a82d33d]313}
314
315/** Issues a memory barrier in each thread of this process. */
316static void force_mb_in_all_threads(void)
317{
[1b20da0]318 /*
319 * Only issue barriers in running threads. The scheduler will
[a82d33d]320 * execute additional memory barriers when switching to threads
321 * of the process that are currently not running.
322 */
323 smp_memory_barrier();
324}
325
326/** Waits for readers of reader_group to exit their readers sections. */
[3679f51a]327static void wait_for_readers(size_t reader_group)
[a82d33d]328{
[42f5860]329 fibril_rmutex_lock(&rcu.list_mutex);
[a35b458]330
[a82d33d]331 list_t quiescent_fibrils;
332 list_initialize(&quiescent_fibrils);
[a35b458]333
[a82d33d]334 while (!list_empty(&rcu.fibrils_list)) {
335 list_foreach_safe(rcu.fibrils_list, fibril_it, next_fibril) {
[1b20da0]336 fibril_rcu_data_t *fib = member_to_inst(fibril_it,
[1433ecda]337 fibril_rcu_data_t, link);
[a35b458]338
[a82d33d]339 if (is_preexisting_reader(fib, reader_group)) {
[42f5860]340 fibril_rmutex_unlock(&rcu.list_mutex);
[3679f51a]341 sync_sleep();
[42f5860]342 fibril_rmutex_lock(&rcu.list_mutex);
[1b7eec9]343 /* Break to while loop. */
[a82d33d]344 break;
345 } else {
346 list_remove(fibril_it);
347 list_append(fibril_it, &quiescent_fibrils);
348 }
349 }
350 }
[a35b458]351
[a82d33d]352 list_concat(&rcu.fibrils_list, &quiescent_fibrils);
[42f5860]353 fibril_rmutex_unlock(&rcu.list_mutex);
[1b7eec9]354}
355
[3679f51a]356static void lock_sync(void)
[1b7eec9]357{
[42f5860]358 fibril_rmutex_lock(&rcu.sync_lock.mutex);
[1b7eec9]359 if (rcu.sync_lock.locked) {
[3679f51a]360 blocked_fibril_t blocked_fib;
[514d561]361 blocked_fib.unblock = FIBRIL_EVENT_INIT;
[3679f51a]362
363 list_append(&blocked_fib.link, &rcu.sync_lock.blocked_fibrils);
364
365 do {
366 blocked_fib.is_ready = false;
[42f5860]367 fibril_rmutex_unlock(&rcu.sync_lock.mutex);
[514d561]368 fibril_wait_for(&blocked_fib.unblock);
[42f5860]369 fibril_rmutex_lock(&rcu.sync_lock.mutex);
[3679f51a]370 } while (rcu.sync_lock.locked);
371
372 list_remove(&blocked_fib.link);
373 rcu.sync_lock.locked = true;
[1b7eec9]374 } else {
375 rcu.sync_lock.locked = true;
376 }
377}
378
379static void unlock_sync(void)
380{
381 assert(rcu.sync_lock.locked);
[a35b458]382
[42f5860]383 /* Unlock but wake up any fibrils waiting for the lock. */
[a35b458]384
[42f5860]385 if (!list_empty(&rcu.sync_lock.blocked_fibrils)) {
386 blocked_fibril_t *blocked_fib = member_to_inst(
387 list_first(&rcu.sync_lock.blocked_fibrils), blocked_fibril_t, link);
[a35b458]388
[42f5860]389 if (!blocked_fib->is_ready) {
390 blocked_fib->is_ready = true;
[514d561]391 fibril_notify(&blocked_fib->unblock);
[1b7eec9]392 }
393 }
[42f5860]394
395 rcu.sync_lock.locked = false;
396 fibril_rmutex_unlock(&rcu.sync_lock.mutex);
[1b7eec9]397}
398
[3679f51a]399static void sync_sleep(void)
[1b7eec9]400{
401 assert(rcu.sync_lock.locked);
[1b20da0]402 /*
403 * Release the futex to avoid deadlocks in singlethreaded apps
404 * but keep sync locked.
[1b7eec9]405 */
[42f5860]406 fibril_rmutex_unlock(&rcu.sync_lock.mutex);
[5f97ef44]407 fibril_usleep(RCU_SLEEP_MS * 1000);
[42f5860]408 fibril_rmutex_lock(&rcu.sync_lock.mutex);
[a82d33d]409}
410
[1b7eec9]411
[a2f42e5]412static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group)
[a82d33d]413{
414 size_t nesting_cnt = ACCESS_ONCE(fib->nesting_cnt);
[a35b458]415
[a82d33d]416 return is_in_group(nesting_cnt, group) && is_in_reader_section(nesting_cnt);
417}
418
419static size_t get_other_group(size_t group)
420{
[1b20da0]421 if (group == RCU_GROUP_A)
[a82d33d]422 return RCU_GROUP_B;
423 else
424 return RCU_GROUP_A;
425}
426
427static bool is_in_reader_section(size_t nesting_cnt)
428{
429 return RCU_NESTING_INC <= nesting_cnt;
430}
431
432static bool is_in_group(size_t nesting_cnt, size_t group)
433{
434 return (nesting_cnt & RCU_GROUP_BIT_MASK) == (group & RCU_GROUP_BIT_MASK);
435}
436
437
438
439/** @}
440 */
Note: See TracBrowser for help on using the repository browser.