source: mainline/uspace/lib/c/generic/rcu.c@ c8dc9ac

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since c8dc9ac was af2254ec, checked in by Jiri Svoboda <jiri@…>, 9 years ago

Fix build with lightweight futex locks enabled.

  • Property mode set to 100644
File size: 13.4 KB
Line 
1/*
2 * Copyright (c) 2012 Adam Hraska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup liburcu
30 * @{
31 */
32/**
33 * @file
34 *
35 * User space RCU is based on URCU utilizing signals [1]. This
36 * implementation does not however signal each thread of the process
37 * to issue a memory barrier. Instead, we introduced a syscall that
38 * issues memory barriers (via IPIs) on cpus that are running threads
39 * of the current process. First, it does not require us to schedule
40 * and run every thread of the process. Second, IPIs are less intrusive
41 * than switching contexts and entering user space.
42 *
43 * This algorithm is further modified to require a single instead of
44 * two reader group changes per grace period. Signal-URCU flips
45 * the reader group and waits for readers of the previous group
46 * twice in succession in order to wait for new readers that were
47 * delayed and mistakenly associated with the previous reader group.
48 * The modified algorithm ensures that the new reader group is
49 * always empty (by explicitly waiting for it to become empty).
50 * Only then does it flip the reader group and wait for preexisting
51 * readers of the old reader group (invariant of SRCU [2, 3]).
52 *
53 *
54 * [1] User-level implementations of read-copy update,
55 * 2012, appendix
56 * http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
57 *
58 * [2] linux/kernel/srcu.c in Linux 3.5-rc2,
59 * 2012
60 * http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/kernel/srcu.c?v=linux-3.5-rc2-ccs-1.8.3
61 *
62 * [3] [RFC PATCH 5/5 single-thread-version] implement
63 * per-domain single-thread state machine,
64 * 2012, Lai
65 * https://lkml.org/lkml/2012/3/6/586
66 */
67
68#include "rcu.h"
69#include <fibril_synch.h>
70#include <fibril.h>
71#include <stdio.h>
72#include <compiler/barrier.h>
73#include <libarch/barrier.h>
74#include <futex.h>
75#include <macros.h>
76#include <async.h>
77#include <adt/list.h>
78#include <smp_memory_barrier.h>
79#include <assert.h>
80#include <time.h>
81
82
83/** RCU sleeps for RCU_SLEEP_MS before polling an active RCU reader again. */
84#define RCU_SLEEP_MS 10
85
86#define RCU_NESTING_SHIFT 1
87#define RCU_NESTING_INC (1 << RCU_NESTING_SHIFT)
88#define RCU_GROUP_BIT_MASK (size_t)(RCU_NESTING_INC - 1)
89#define RCU_GROUP_A (size_t)(0 | RCU_NESTING_INC)
90#define RCU_GROUP_B (size_t)(1 | RCU_NESTING_INC)
91
92
93/** Fibril local RCU data. */
94typedef struct fibril_rcu_data {
95 size_t nesting_cnt;
96 link_t link;
97 bool registered;
98} fibril_rcu_data_t;
99
100/** Process global RCU data. */
101typedef struct rcu_data {
102 size_t cur_gp;
103 size_t reader_group;
104 futex_t list_futex;
105 list_t fibrils_list;
106 struct {
107 futex_t futex;
108 bool locked;
109 list_t blocked_fibrils;
110 size_t blocked_thread_cnt;
111 futex_t futex_blocking_threads;
112 } sync_lock;
113} rcu_data_t;
114
115typedef struct blocked_fibril {
116 fid_t id;
117 link_t link;
118 bool is_ready;
119} blocked_fibril_t;
120
121
122/** Fibril local RCU data. */
123static fibril_local fibril_rcu_data_t fibril_rcu = {
124 .nesting_cnt = 0,
125 .link = {
126 .next = NULL,
127 .prev = NULL
128 },
129 .registered = false
130};
131
132/** Process global RCU data. */
133static rcu_data_t rcu = {
134 .cur_gp = 0,
135 .reader_group = RCU_GROUP_A,
136 .list_futex = FUTEX_INITIALIZER,
137 .fibrils_list = LIST_INITIALIZER(rcu.fibrils_list),
138 .sync_lock = {
139 .futex = FUTEX_INITIALIZER,
140 .locked = false,
141 .blocked_fibrils = LIST_INITIALIZER(rcu.sync_lock.blocked_fibrils),
142 .blocked_thread_cnt = 0,
143 .futex_blocking_threads = FUTEX_INITIALIZE(0),
144 },
145};
146
147
148static void wait_for_readers(size_t reader_group, blocking_mode_t blocking_mode);
149static void force_mb_in_all_threads(void);
150static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group);
151
152static void lock_sync(blocking_mode_t blocking_mode);
153static void unlock_sync(void);
154static void sync_sleep(blocking_mode_t blocking_mode);
155
156static bool is_in_group(size_t nesting_cnt, size_t group);
157static bool is_in_reader_section(size_t nesting_cnt);
158static size_t get_other_group(size_t group);
159
160
161/** Registers a fibril so it may start using RCU read sections.
162 *
163 * A fibril must be registered with rcu before it can enter RCU critical
164 * sections delineated by rcu_read_lock() and rcu_read_unlock().
165 */
166void rcu_register_fibril(void)
167{
168 assert(!fibril_rcu.registered);
169
170 futex_down(&rcu.list_futex);
171 list_append(&fibril_rcu.link, &rcu.fibrils_list);
172 futex_up(&rcu.list_futex);
173
174 fibril_rcu.registered = true;
175}
176
177/** Deregisters a fibril that had been using RCU read sections.
178 *
179 * A fibril must be deregistered before it exits if it had
180 * been registered with rcu via rcu_register_fibril().
181 */
182void rcu_deregister_fibril(void)
183{
184 assert(fibril_rcu.registered);
185
186 /*
187 * Forcefully unlock any reader sections. The fibril is exiting
188 * so it is not holding any references to data protected by the
189 * rcu section. Therefore, it is safe to unlock. Otherwise,
190 * rcu_synchronize() would wait indefinitely.
191 */
192 memory_barrier();
193 fibril_rcu.nesting_cnt = 0;
194
195 futex_down(&rcu.list_futex);
196 list_remove(&fibril_rcu.link);
197 futex_up(&rcu.list_futex);
198
199 fibril_rcu.registered = false;
200}
201
202/** Delimits the start of an RCU reader critical section.
203 *
204 * RCU reader sections may be nested.
205 */
206void rcu_read_lock(void)
207{
208 assert(fibril_rcu.registered);
209
210 size_t nesting_cnt = ACCESS_ONCE(fibril_rcu.nesting_cnt);
211
212 if (0 == (nesting_cnt >> RCU_NESTING_SHIFT)) {
213 ACCESS_ONCE(fibril_rcu.nesting_cnt) = ACCESS_ONCE(rcu.reader_group);
214 /* Required by MB_FORCE_L */
215 compiler_barrier(); /* CC_BAR_L */
216 } else {
217 ACCESS_ONCE(fibril_rcu.nesting_cnt) = nesting_cnt + RCU_NESTING_INC;
218 }
219}
220
221/** Delimits the start of an RCU reader critical section. */
222void rcu_read_unlock(void)
223{
224 assert(fibril_rcu.registered);
225 assert(rcu_read_locked());
226
227 /* Required by MB_FORCE_U */
228 compiler_barrier(); /* CC_BAR_U */
229 /* todo: ACCESS_ONCE(nesting_cnt) ? */
230 fibril_rcu.nesting_cnt -= RCU_NESTING_INC;
231}
232
233/** Returns true if the current fibril is in an RCU reader section. */
234bool rcu_read_locked(void)
235{
236 return 0 != (fibril_rcu.nesting_cnt >> RCU_NESTING_SHIFT);
237}
238
239/** Blocks until all preexisting readers exit their critical sections. */
240void _rcu_synchronize(blocking_mode_t blocking_mode)
241{
242 assert(!rcu_read_locked());
243
244 /* Contain load of rcu.cur_gp. */
245 memory_barrier();
246
247 /* Approximately the number of the GP in progress. */
248 size_t gp_in_progress = ACCESS_ONCE(rcu.cur_gp);
249
250 lock_sync(blocking_mode);
251
252 /*
253 * Exit early if we were stuck waiting for the mutex for a full grace
254 * period. Started waiting during gp_in_progress (or gp_in_progress + 1
255 * if the value propagated to this cpu too late) so wait for the next
256 * full GP, gp_in_progress + 1, to finish. Ie don't wait if the GP
257 * after that, gp_in_progress + 2, already started.
258 */
259 /* rcu.cur_gp >= gp_in_progress + 2, but tolerates overflows. */
260 if (rcu.cur_gp != gp_in_progress && rcu.cur_gp + 1 != gp_in_progress) {
261 unlock_sync();
262 return;
263 }
264
265 ++ACCESS_ONCE(rcu.cur_gp);
266
267 /*
268 * Pairs up with MB_FORCE_L (ie CC_BAR_L). Makes changes prior
269 * to rcu_synchronize() visible to new readers.
270 */
271 memory_barrier(); /* MB_A */
272
273 /*
274 * Pairs up with MB_A.
275 *
276 * If the memory barrier is issued before CC_BAR_L in the target
277 * thread, it pairs up with MB_A and the thread sees all changes
278 * prior to rcu_synchronize(). Ie any reader sections are new
279 * rcu readers.
280 *
281 * If the memory barrier is issued after CC_BAR_L, it pairs up
282 * with MB_B and it will make the most recent nesting_cnt visible
283 * in this thread. Since the reader may have already accessed
284 * memory protected by RCU (it ran instructions passed CC_BAR_L),
285 * it is a preexisting reader. Seeing the most recent nesting_cnt
286 * ensures the thread will be identified as a preexisting reader
287 * and we will wait for it in wait_for_readers(old_reader_group).
288 */
289 force_mb_in_all_threads(); /* MB_FORCE_L */
290
291 /*
292 * Pairs with MB_FORCE_L (ie CC_BAR_L, CC_BAR_U) and makes the most
293 * current fibril.nesting_cnt visible to this cpu.
294 */
295 read_barrier(); /* MB_B */
296
297 size_t new_reader_group = get_other_group(rcu.reader_group);
298 wait_for_readers(new_reader_group, blocking_mode);
299
300 /* Separates waiting for readers in new_reader_group from group flip. */
301 memory_barrier();
302
303 /* Flip the group new readers should associate with. */
304 size_t old_reader_group = rcu.reader_group;
305 rcu.reader_group = new_reader_group;
306
307 /* Flip the group before waiting for preexisting readers in the old group.*/
308 memory_barrier();
309
310 wait_for_readers(old_reader_group, blocking_mode);
311
312 /* MB_FORCE_U */
313 force_mb_in_all_threads(); /* MB_FORCE_U */
314
315 unlock_sync();
316}
317
318/** Issues a memory barrier in each thread of this process. */
319static void force_mb_in_all_threads(void)
320{
321 /*
322 * Only issue barriers in running threads. The scheduler will
323 * execute additional memory barriers when switching to threads
324 * of the process that are currently not running.
325 */
326 smp_memory_barrier();
327}
328
329/** Waits for readers of reader_group to exit their readers sections. */
330static void wait_for_readers(size_t reader_group, blocking_mode_t blocking_mode)
331{
332 futex_down(&rcu.list_futex);
333
334 list_t quiescent_fibrils;
335 list_initialize(&quiescent_fibrils);
336
337 while (!list_empty(&rcu.fibrils_list)) {
338 list_foreach_safe(rcu.fibrils_list, fibril_it, next_fibril) {
339 fibril_rcu_data_t *fib = member_to_inst(fibril_it,
340 fibril_rcu_data_t, link);
341
342 if (is_preexisting_reader(fib, reader_group)) {
343 futex_up(&rcu.list_futex);
344 sync_sleep(blocking_mode);
345 futex_down(&rcu.list_futex);
346 /* Break to while loop. */
347 break;
348 } else {
349 list_remove(fibril_it);
350 list_append(fibril_it, &quiescent_fibrils);
351 }
352 }
353 }
354
355 list_concat(&rcu.fibrils_list, &quiescent_fibrils);
356 futex_up(&rcu.list_futex);
357}
358
359static void lock_sync(blocking_mode_t blocking_mode)
360{
361 futex_down(&rcu.sync_lock.futex);
362 if (rcu.sync_lock.locked) {
363 if (blocking_mode == BM_BLOCK_FIBRIL) {
364 blocked_fibril_t blocked_fib;
365 blocked_fib.id = fibril_get_id();
366
367 list_append(&blocked_fib.link, &rcu.sync_lock.blocked_fibrils);
368
369 do {
370 blocked_fib.is_ready = false;
371 futex_up(&rcu.sync_lock.futex);
372 fibril_switch(FIBRIL_TO_MANAGER);
373 futex_down(&rcu.sync_lock.futex);
374 } while (rcu.sync_lock.locked);
375
376 list_remove(&blocked_fib.link);
377 rcu.sync_lock.locked = true;
378 } else {
379 assert(blocking_mode == BM_BLOCK_THREAD);
380 rcu.sync_lock.blocked_thread_cnt++;
381 futex_up(&rcu.sync_lock.futex);
382 futex_down(&rcu.sync_lock.futex_blocking_threads);
383 }
384 } else {
385 rcu.sync_lock.locked = true;
386 }
387}
388
389static void unlock_sync(void)
390{
391 assert(rcu.sync_lock.locked);
392
393 /*
394 * Blocked threads have a priority over fibrils when accessing sync().
395 * Pass the lock onto a waiting thread.
396 */
397 if (0 < rcu.sync_lock.blocked_thread_cnt) {
398 --rcu.sync_lock.blocked_thread_cnt;
399 futex_up(&rcu.sync_lock.futex_blocking_threads);
400 } else {
401 /* Unlock but wake up any fibrils waiting for the lock. */
402
403 if (!list_empty(&rcu.sync_lock.blocked_fibrils)) {
404 blocked_fibril_t *blocked_fib = member_to_inst(
405 list_first(&rcu.sync_lock.blocked_fibrils), blocked_fibril_t, link);
406
407 if (!blocked_fib->is_ready) {
408 blocked_fib->is_ready = true;
409 fibril_add_ready(blocked_fib->id);
410 }
411 }
412
413 rcu.sync_lock.locked = false;
414 futex_up(&rcu.sync_lock.futex);
415 }
416}
417
418static void sync_sleep(blocking_mode_t blocking_mode)
419{
420 assert(rcu.sync_lock.locked);
421 /*
422 * Release the futex to avoid deadlocks in singlethreaded apps
423 * but keep sync locked.
424 */
425 futex_up(&rcu.sync_lock.futex);
426
427 if (blocking_mode == BM_BLOCK_FIBRIL) {
428 async_usleep(RCU_SLEEP_MS * 1000);
429 } else {
430 usleep(RCU_SLEEP_MS * 1000);
431 }
432
433 futex_down(&rcu.sync_lock.futex);
434}
435
436
437static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group)
438{
439 size_t nesting_cnt = ACCESS_ONCE(fib->nesting_cnt);
440
441 return is_in_group(nesting_cnt, group) && is_in_reader_section(nesting_cnt);
442}
443
444static size_t get_other_group(size_t group)
445{
446 if (group == RCU_GROUP_A)
447 return RCU_GROUP_B;
448 else
449 return RCU_GROUP_A;
450}
451
452static bool is_in_reader_section(size_t nesting_cnt)
453{
454 return RCU_NESTING_INC <= nesting_cnt;
455}
456
457static bool is_in_group(size_t nesting_cnt, size_t group)
458{
459 return (nesting_cnt & RCU_GROUP_BIT_MASK) == (group & RCU_GROUP_BIT_MASK);
460}
461
462
463
464/** @}
465 */
Note: See TracBrowser for help on using the repository browser.