source: mainline/uspace/lib/c/generic/rcu.c@ f1380b7

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since f1380b7 was a35b458, checked in by Jiří Zárevúcky <zarevucky.jiri@…>, 7 years ago

style: Remove trailing whitespace on _all_ lines, including empty ones, for particular file types.

Command used: tools/srepl '\s\+$' '' -- *.c *.h *.py *.sh *.s *.S *.ag

Currently, whitespace on empty lines is very inconsistent.
There are two basic choices: Either remove the whitespace, or keep empty lines
indented to the level of surrounding code. The former is AFAICT more common,
and also much easier to do automatically.

Alternatively, we could write script for automatic indentation, and use that
instead. However, if such a script exists, it's possible to use the indented
style locally, by having the editor apply relevant conversions on load/save,
without affecting remote repository. IMO, it makes more sense to adopt
the simpler rule.

  • Property mode set to 100644
File size: 13.3 KB
Line 
1/*
2 * Copyright (c) 2012 Adam Hraska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup liburcu
30 * @{
31 */
32/**
33 * @file
34 *
35 * User space RCU is based on URCU utilizing signals [1]. This
36 * implementation does not however signal each thread of the process
37 * to issue a memory barrier. Instead, we introduced a syscall that
38 * issues memory barriers (via IPIs) on cpus that are running threads
39 * of the current process. First, it does not require us to schedule
40 * and run every thread of the process. Second, IPIs are less intrusive
41 * than switching contexts and entering user space.
42 *
43 * This algorithm is further modified to require a single instead of
44 * two reader group changes per grace period. Signal-URCU flips
45 * the reader group and waits for readers of the previous group
46 * twice in succession in order to wait for new readers that were
47 * delayed and mistakenly associated with the previous reader group.
48 * The modified algorithm ensures that the new reader group is
49 * always empty (by explicitly waiting for it to become empty).
50 * Only then does it flip the reader group and wait for preexisting
51 * readers of the old reader group (invariant of SRCU [2, 3]).
52 *
53 *
54 * [1] User-level implementations of read-copy update,
55 * 2012, appendix
56 * http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
57 *
58 * [2] linux/kernel/srcu.c in Linux 3.5-rc2,
59 * 2012
60 * http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/kernel/srcu.c?v=linux-3.5-rc2-ccs-1.8.3
61 *
62 * [3] [RFC PATCH 5/5 single-thread-version] implement
63 * per-domain single-thread state machine,
64 * 2012, Lai
65 * https://lkml.org/lkml/2012/3/6/586
66 */
67
68#include "rcu.h"
69#include <fibril_synch.h>
70#include <fibril.h>
71#include <stdio.h>
72#include <stddef.h>
73#include <compiler/barrier.h>
74#include <libarch/barrier.h>
75#include <futex.h>
76#include <macros.h>
77#include <async.h>
78#include <adt/list.h>
79#include <smp_memory_barrier.h>
80#include <assert.h>
81#include <time.h>
82#include <thread.h>
83
84
85/** RCU sleeps for RCU_SLEEP_MS before polling an active RCU reader again. */
86#define RCU_SLEEP_MS 10
87
88#define RCU_NESTING_SHIFT 1
89#define RCU_NESTING_INC (1 << RCU_NESTING_SHIFT)
90#define RCU_GROUP_BIT_MASK (size_t)(RCU_NESTING_INC - 1)
91#define RCU_GROUP_A (size_t)(0 | RCU_NESTING_INC)
92#define RCU_GROUP_B (size_t)(1 | RCU_NESTING_INC)
93
94
95/** Fibril local RCU data. */
96typedef struct fibril_rcu_data {
97 size_t nesting_cnt;
98 link_t link;
99 bool registered;
100} fibril_rcu_data_t;
101
102/** Process global RCU data. */
103typedef struct rcu_data {
104 size_t cur_gp;
105 size_t reader_group;
106 futex_t list_futex;
107 list_t fibrils_list;
108 struct {
109 futex_t futex;
110 bool locked;
111 list_t blocked_fibrils;
112 size_t blocked_thread_cnt;
113 futex_t futex_blocking_threads;
114 } sync_lock;
115} rcu_data_t;
116
117typedef struct blocked_fibril {
118 fid_t id;
119 link_t link;
120 bool is_ready;
121} blocked_fibril_t;
122
123
124/** Fibril local RCU data. */
125static fibril_local fibril_rcu_data_t fibril_rcu = {
126 .nesting_cnt = 0,
127 .link = {
128 .next = NULL,
129 .prev = NULL
130 },
131 .registered = false
132};
133
134/** Process global RCU data. */
135static rcu_data_t rcu = {
136 .cur_gp = 0,
137 .reader_group = RCU_GROUP_A,
138 .list_futex = FUTEX_INITIALIZER,
139 .fibrils_list = LIST_INITIALIZER(rcu.fibrils_list),
140 .sync_lock = {
141 .futex = FUTEX_INITIALIZER,
142 .locked = false,
143 .blocked_fibrils = LIST_INITIALIZER(rcu.sync_lock.blocked_fibrils),
144 .blocked_thread_cnt = 0,
145 .futex_blocking_threads = FUTEX_INITIALIZE(0),
146 },
147};
148
149
150static void wait_for_readers(size_t reader_group, blocking_mode_t blocking_mode);
151static void force_mb_in_all_threads(void);
152static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group);
153
154static void lock_sync(blocking_mode_t blocking_mode);
155static void unlock_sync(void);
156static void sync_sleep(blocking_mode_t blocking_mode);
157
158static bool is_in_group(size_t nesting_cnt, size_t group);
159static bool is_in_reader_section(size_t nesting_cnt);
160static size_t get_other_group(size_t group);
161
162
163/** Registers a fibril so it may start using RCU read sections.
164 *
165 * A fibril must be registered with rcu before it can enter RCU critical
166 * sections delineated by rcu_read_lock() and rcu_read_unlock().
167 */
168void rcu_register_fibril(void)
169{
170 assert(!fibril_rcu.registered);
171
172 futex_down(&rcu.list_futex);
173 list_append(&fibril_rcu.link, &rcu.fibrils_list);
174 futex_up(&rcu.list_futex);
175
176 fibril_rcu.registered = true;
177}
178
179/** Deregisters a fibril that had been using RCU read sections.
180 *
181 * A fibril must be deregistered before it exits if it had
182 * been registered with rcu via rcu_register_fibril().
183 */
184void rcu_deregister_fibril(void)
185{
186 assert(fibril_rcu.registered);
187
188 /*
189 * Forcefully unlock any reader sections. The fibril is exiting
190 * so it is not holding any references to data protected by the
191 * rcu section. Therefore, it is safe to unlock. Otherwise,
192 * rcu_synchronize() would wait indefinitely.
193 */
194 memory_barrier();
195 fibril_rcu.nesting_cnt = 0;
196
197 futex_down(&rcu.list_futex);
198 list_remove(&fibril_rcu.link);
199 futex_up(&rcu.list_futex);
200
201 fibril_rcu.registered = false;
202}
203
204/** Delimits the start of an RCU reader critical section.
205 *
206 * RCU reader sections may be nested.
207 */
208void rcu_read_lock(void)
209{
210 assert(fibril_rcu.registered);
211
212 size_t nesting_cnt = ACCESS_ONCE(fibril_rcu.nesting_cnt);
213
214 if (0 == (nesting_cnt >> RCU_NESTING_SHIFT)) {
215 ACCESS_ONCE(fibril_rcu.nesting_cnt) = ACCESS_ONCE(rcu.reader_group);
216 /* Required by MB_FORCE_L */
217 compiler_barrier(); /* CC_BAR_L */
218 } else {
219 ACCESS_ONCE(fibril_rcu.nesting_cnt) = nesting_cnt + RCU_NESTING_INC;
220 }
221}
222
223/** Delimits the start of an RCU reader critical section. */
224void rcu_read_unlock(void)
225{
226 assert(fibril_rcu.registered);
227 assert(rcu_read_locked());
228
229 /* Required by MB_FORCE_U */
230 compiler_barrier(); /* CC_BAR_U */
231 /* todo: ACCESS_ONCE(nesting_cnt) ? */
232 fibril_rcu.nesting_cnt -= RCU_NESTING_INC;
233}
234
235/** Returns true if the current fibril is in an RCU reader section. */
236bool rcu_read_locked(void)
237{
238 return 0 != (fibril_rcu.nesting_cnt >> RCU_NESTING_SHIFT);
239}
240
241/** Blocks until all preexisting readers exit their critical sections. */
242void _rcu_synchronize(blocking_mode_t blocking_mode)
243{
244 assert(!rcu_read_locked());
245
246 /* Contain load of rcu.cur_gp. */
247 memory_barrier();
248
249 /* Approximately the number of the GP in progress. */
250 size_t gp_in_progress = ACCESS_ONCE(rcu.cur_gp);
251
252 lock_sync(blocking_mode);
253
254 /*
255 * Exit early if we were stuck waiting for the mutex for a full grace
256 * period. Started waiting during gp_in_progress (or gp_in_progress + 1
257 * if the value propagated to this cpu too late) so wait for the next
258 * full GP, gp_in_progress + 1, to finish. Ie don't wait if the GP
259 * after that, gp_in_progress + 2, already started.
260 */
261 /* rcu.cur_gp >= gp_in_progress + 2, but tolerates overflows. */
262 if (rcu.cur_gp != gp_in_progress && rcu.cur_gp + 1 != gp_in_progress) {
263 unlock_sync();
264 return;
265 }
266
267 ++ACCESS_ONCE(rcu.cur_gp);
268
269 /*
270 * Pairs up with MB_FORCE_L (ie CC_BAR_L). Makes changes prior
271 * to rcu_synchronize() visible to new readers.
272 */
273 memory_barrier(); /* MB_A */
274
275 /*
276 * Pairs up with MB_A.
277 *
278 * If the memory barrier is issued before CC_BAR_L in the target
279 * thread, it pairs up with MB_A and the thread sees all changes
280 * prior to rcu_synchronize(). Ie any reader sections are new
281 * rcu readers.
282 *
283 * If the memory barrier is issued after CC_BAR_L, it pairs up
284 * with MB_B and it will make the most recent nesting_cnt visible
285 * in this thread. Since the reader may have already accessed
286 * memory protected by RCU (it ran instructions passed CC_BAR_L),
287 * it is a preexisting reader. Seeing the most recent nesting_cnt
288 * ensures the thread will be identified as a preexisting reader
289 * and we will wait for it in wait_for_readers(old_reader_group).
290 */
291 force_mb_in_all_threads(); /* MB_FORCE_L */
292
293 /*
294 * Pairs with MB_FORCE_L (ie CC_BAR_L, CC_BAR_U) and makes the most
295 * current fibril.nesting_cnt visible to this cpu.
296 */
297 read_barrier(); /* MB_B */
298
299 size_t new_reader_group = get_other_group(rcu.reader_group);
300 wait_for_readers(new_reader_group, blocking_mode);
301
302 /* Separates waiting for readers in new_reader_group from group flip. */
303 memory_barrier();
304
305 /* Flip the group new readers should associate with. */
306 size_t old_reader_group = rcu.reader_group;
307 rcu.reader_group = new_reader_group;
308
309 /* Flip the group before waiting for preexisting readers in the old group.*/
310 memory_barrier();
311
312 wait_for_readers(old_reader_group, blocking_mode);
313
314 /* MB_FORCE_U */
315 force_mb_in_all_threads(); /* MB_FORCE_U */
316
317 unlock_sync();
318}
319
320/** Issues a memory barrier in each thread of this process. */
321static void force_mb_in_all_threads(void)
322{
323 /*
324 * Only issue barriers in running threads. The scheduler will
325 * execute additional memory barriers when switching to threads
326 * of the process that are currently not running.
327 */
328 smp_memory_barrier();
329}
330
331/** Waits for readers of reader_group to exit their readers sections. */
332static void wait_for_readers(size_t reader_group, blocking_mode_t blocking_mode)
333{
334 futex_down(&rcu.list_futex);
335
336 list_t quiescent_fibrils;
337 list_initialize(&quiescent_fibrils);
338
339 while (!list_empty(&rcu.fibrils_list)) {
340 list_foreach_safe(rcu.fibrils_list, fibril_it, next_fibril) {
341 fibril_rcu_data_t *fib = member_to_inst(fibril_it,
342 fibril_rcu_data_t, link);
343
344 if (is_preexisting_reader(fib, reader_group)) {
345 futex_up(&rcu.list_futex);
346 sync_sleep(blocking_mode);
347 futex_down(&rcu.list_futex);
348 /* Break to while loop. */
349 break;
350 } else {
351 list_remove(fibril_it);
352 list_append(fibril_it, &quiescent_fibrils);
353 }
354 }
355 }
356
357 list_concat(&rcu.fibrils_list, &quiescent_fibrils);
358 futex_up(&rcu.list_futex);
359}
360
361static void lock_sync(blocking_mode_t blocking_mode)
362{
363 futex_down(&rcu.sync_lock.futex);
364 if (rcu.sync_lock.locked) {
365 if (blocking_mode == BM_BLOCK_FIBRIL) {
366 blocked_fibril_t blocked_fib;
367 blocked_fib.id = fibril_get_id();
368
369 list_append(&blocked_fib.link, &rcu.sync_lock.blocked_fibrils);
370
371 do {
372 blocked_fib.is_ready = false;
373 futex_up(&rcu.sync_lock.futex);
374 fibril_switch(FIBRIL_TO_MANAGER);
375 futex_down(&rcu.sync_lock.futex);
376 } while (rcu.sync_lock.locked);
377
378 list_remove(&blocked_fib.link);
379 rcu.sync_lock.locked = true;
380 } else {
381 assert(blocking_mode == BM_BLOCK_THREAD);
382 rcu.sync_lock.blocked_thread_cnt++;
383 futex_up(&rcu.sync_lock.futex);
384 futex_down(&rcu.sync_lock.futex_blocking_threads);
385 }
386 } else {
387 rcu.sync_lock.locked = true;
388 }
389}
390
391static void unlock_sync(void)
392{
393 assert(rcu.sync_lock.locked);
394
395 /*
396 * Blocked threads have a priority over fibrils when accessing sync().
397 * Pass the lock onto a waiting thread.
398 */
399 if (0 < rcu.sync_lock.blocked_thread_cnt) {
400 --rcu.sync_lock.blocked_thread_cnt;
401 futex_up(&rcu.sync_lock.futex_blocking_threads);
402 } else {
403 /* Unlock but wake up any fibrils waiting for the lock. */
404
405 if (!list_empty(&rcu.sync_lock.blocked_fibrils)) {
406 blocked_fibril_t *blocked_fib = member_to_inst(
407 list_first(&rcu.sync_lock.blocked_fibrils), blocked_fibril_t, link);
408
409 if (!blocked_fib->is_ready) {
410 blocked_fib->is_ready = true;
411 fibril_add_ready(blocked_fib->id);
412 }
413 }
414
415 rcu.sync_lock.locked = false;
416 futex_up(&rcu.sync_lock.futex);
417 }
418}
419
420static void sync_sleep(blocking_mode_t blocking_mode)
421{
422 assert(rcu.sync_lock.locked);
423 /*
424 * Release the futex to avoid deadlocks in singlethreaded apps
425 * but keep sync locked.
426 */
427 futex_up(&rcu.sync_lock.futex);
428
429 if (blocking_mode == BM_BLOCK_FIBRIL) {
430 async_usleep(RCU_SLEEP_MS * 1000);
431 } else {
432 thread_usleep(RCU_SLEEP_MS * 1000);
433 }
434
435 futex_down(&rcu.sync_lock.futex);
436}
437
438
439static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group)
440{
441 size_t nesting_cnt = ACCESS_ONCE(fib->nesting_cnt);
442
443 return is_in_group(nesting_cnt, group) && is_in_reader_section(nesting_cnt);
444}
445
446static size_t get_other_group(size_t group)
447{
448 if (group == RCU_GROUP_A)
449 return RCU_GROUP_B;
450 else
451 return RCU_GROUP_A;
452}
453
454static bool is_in_reader_section(size_t nesting_cnt)
455{
456 return RCU_NESTING_INC <= nesting_cnt;
457}
458
459static bool is_in_group(size_t nesting_cnt, size_t group)
460{
461 return (nesting_cnt & RCU_GROUP_BIT_MASK) == (group & RCU_GROUP_BIT_MASK);
462}
463
464
465
466/** @}
467 */
Note: See TracBrowser for help on using the repository browser.