source: mainline/uspace/lib/urcu/rcu.c@ 99022de

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 99022de was 99022de, checked in by Adam Hraska <adam.hraska+hos@…>, 13 years ago

urcu: Added early exit for rcu_synchronize() if it was stuck waiting for rcu fibril mutex for too long.

  • Property mode set to 100644
File size: 10.1 KB
Line 
1/*
2 * Copyright (c) 2012 Adam Hraska
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup liburcu
30 * @{
31 */
32/**
33 * @file
34 *
35 * User space RCU is based on URCU utilizing signals [1]. This
36 * implementation does not however signal each thread of the process
37 * to issue a memory barrier. Instead, we introduced a syscall that
38 * issues memory barriers (via IPIs) on cpus that are running threads
39 * of the current process. First, it does not require us to schedule
40 * and run every thread of the process. Second, IPIs are less intrusive
41 * than switching contexts and entering user space.
42 *
43 * This algorithm is further modified to require a single instead of
44 * two reader group changes per grace period. Signal-URCU flips
45 * the reader group and waits for readers of the previous group
46 * twice in succession in order to wait for new readers that were
47 * delayed and mistakenly associated with the previous reader group.
48 * The modified algorithm ensures that the new reader group is
49 * always empty (by explicitly waiting for it to become empty).
50 * Only then does it flip the reader group and wait for preexisting
51 * readers of the old reader group (invariant of SRCU [2, 3]).
52 *
53 *
54 * [1] User-level implementations of read-copy update,
55 * 2012, appendix
56 * http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
57 *
58 * [2] linux/kernel/srcu.c in Linux 3.5-rc2,
59 * 2012
60 * http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/kernel/srcu.c?v=linux-3.5-rc2-ccs-1.8.3
61 *
62 * [3] [RFC PATCH 5/5 single-thread-version] implement
63 * per-domain single-thread state machine,
64 * 2012, Lai
65 * https://lkml.org/lkml/2012/3/6/586
66 */
67
68#include "rcu.h"
69#include <fibril_synch.h>
70#include <fibril.h>
71#include <stdio.h>
72#include <compiler/barrier.h>
73#include <libarch/barrier.h>
74#include <adt/list.h>
75#include <futex.h>
76#include <macros.h>
77#include <async.h>
78#include <smp_memory_barrier.h>
79
80#define RCU_SLEEP_MS 10
81
82#define RCU_NESTING_SHIFT 1
83#define RCU_NESTING_INC (1 << RCU_NESTING_SHIFT)
84#define RCU_GROUP_BIT_MASK (size_t)(RCU_NESTING_INC - 1)
85#define RCU_GROUP_A (size_t)(0 | RCU_NESTING_INC)
86#define RCU_GROUP_B (size_t)(1 | RCU_NESTING_INC)
87
88
89typedef struct rcu_fibril_data {
90 size_t nesting_cnt;
91 link_t link;
92} rcu_fibril_data_t;
93
94typedef struct rcu_data {
95 fibril_mutex_t mtx;
96 size_t cur_gp;
97 size_t reader_group;
98 futex_t list_futex;
99 list_t fibrils_list;
100} rcu_data_t;
101
102
103static fibril_local rcu_fibril_data_t rcu_fibril = {
104 .nesting_cnt = 0
105};
106
107static rcu_data_t rcu = {
108 .mtx = FIBRIL_MUTEX_INITIALIZER(rcu.mtx),
109 .cur_gp = 0,
110 .reader_group = RCU_GROUP_A,
111 .list_futex = FUTEX_INITIALIZER,
112 .fibrils_list = LIST_INITIALIZER(rcu.fibrils_list),
113};
114
115
116static void wait_for_readers(size_t reader_group);
117static void force_mb_in_all_threads(void);
118static bool is_preexisting_reader(const rcu_fibril_data_t *fib, size_t group);
119
120static bool is_in_group(size_t nesting_cnt, size_t group);
121static bool is_in_reader_section(size_t nesting_cnt);
122static size_t get_other_group(size_t group);
123
124
125/** Registers a fibril so it may start using RCU read sections.
126 *
127 * A fibril must be registered with rcu before it can enter RCU critical
128 * sections delineated by rcu_read_lock() and rcu_read_unlock().
129 */
130void rcu_register_fibril(void)
131{
132 futex_down(&rcu.list_futex);
133 list_append(&rcu_fibril.link, &rcu.fibrils_list);
134 futex_up(&rcu.list_futex);
135}
136
137/** Deregisters a fibril that had been using RCU read sections.
138 *
139 * A fibril must be deregistered before it exits if it had
140 * been registered with rcu via rcu_register_fibril().
141 */
142void rcu_deregister_fibril(void)
143{
144 /*
145 * Forcefully unlock any reader sections. The fibril is exiting
146 * so it is not holding any references to data protected by the
147 * rcu section. Therefore, it is safe to unlock. Otherwise,
148 * rcu_synchronize() would wait indefinitely.
149 */
150 memory_barrier();
151 rcu_fibril.nesting_cnt = 0;
152
153 futex_down(&rcu.list_futex);
154 list_remove(&rcu_fibril.link);
155 futex_up(&rcu.list_futex);
156}
157
158/** Delimits the start of an RCU reader critical section.
159 *
160 * RCU reader sections may be nested.
161 */
162void rcu_read_lock(void)
163{
164 size_t nesting_cnt = ACCESS_ONCE(rcu_fibril.nesting_cnt);
165
166 if (0 == (nesting_cnt >> RCU_NESTING_SHIFT)) {
167 ACCESS_ONCE(rcu_fibril.nesting_cnt) = ACCESS_ONCE(rcu.reader_group);
168 /* Required by MB_FORCE_L */
169 compiler_barrier(); /* CC_BAR_L */
170 } else {
171 ACCESS_ONCE(rcu_fibril.nesting_cnt) = nesting_cnt + RCU_NESTING_INC;
172 }
173}
174
175/** Delimits the start of an RCU reader critical section. */
176void rcu_read_unlock(void)
177{
178 /* Required by MB_FORCE_U */
179 compiler_barrier(); /* CC_BAR_U */
180 /* todo: ACCESS_ONCE(nesting_cnt) ? */
181 rcu_fibril.nesting_cnt -= RCU_NESTING_INC;
182}
183
184/** Blocks until all preexisting readers exit their critical sections. */
185void rcu_synchronize(void)
186{
187 /* Contain load of rcu.cur_gp. */
188 memory_barrier();
189
190 /* Approximately the number of the GP in progress. */
191 size_t gp_in_progress = ACCESS_ONCE(rcu.cur_gp);
192
193 /* todo: early exit for batched sync()s */
194 fibril_mutex_lock(&rcu.mtx);
195
196 /*
197 * Exit early if we were stuck waiting for the mutex for a full grace
198 * period. Started waiting during gp_in_progress (or gp_in_progress + 1
199 * if the value propagated to this cpu too late) so wait for the next
200 * full GP, gp_in_progress + 1, to finish. Ie don't wait if the GP
201 * after that, gp_in_progress + 2, already started.
202 */
203 if (rcu.cur_gp + 2 >= gp_in_progress) {
204 fibril_mutex_unlock(&rcu.mtx);
205 return;
206 }
207
208 ++ACCESS_ONCE(rcu.cur_gp);
209
210 /*
211 * Pairs up with MB_FORCE_L (ie CC_BAR_L). Makes changes prior
212 * to rcu_synchronize() visible to new readers.
213 */
214 memory_barrier(); /* MB_A */
215
216 /*
217 * Pairs up with MB_A.
218 *
219 * If the memory barrier is issued before CC_BAR_L in the target
220 * thread, it pairs up with MB_A and the thread sees all changes
221 * prior to rcu_synchronize(). Ie any reader sections are new
222 * rcu readers.
223 *
224 * If the memory barrier is issued after CC_BAR_L, it pairs up
225 * with MB_B and it will make the most recent nesting_cnt visible
226 * in this thread. Since the reader may have already accessed
227 * memory protected by RCU (it ran instructions passed CC_BAR_L),
228 * it is a preexisting reader. Seeing the most recent nesting_cnt
229 * ensures the thread will be identified as a preexisting reader
230 * and we will wait for it in wait_for_readers(old_reader_group).
231 */
232 force_mb_in_all_threads(); /* MB_FORCE_L */
233
234 /*
235 * Pairs with MB_FORCE_L (ie CC_BAR_L, CC_BAR_U) and makes the most
236 * current fibril.nesting_cnt visible to this cpu.
237 */
238 read_barrier(); /* MB_B */
239
240 size_t new_reader_group = get_other_group(rcu.reader_group);
241 wait_for_readers(new_reader_group);
242
243 /* Separates waiting for readers in new_reader_group from group flip. */
244 memory_barrier();
245
246 /* Flip the group new readers should associate with. */
247 size_t old_reader_group = rcu.reader_group;
248 rcu.reader_group = new_reader_group;
249
250 /* Flip the group before waiting for preexisting readers in the old group.*/
251 memory_barrier();
252
253 wait_for_readers(old_reader_group);
254
255 /* MB_FORCE_U */
256 force_mb_in_all_threads(); /* MB_FORCE_U */
257
258 fibril_mutex_unlock(&rcu.mtx);
259}
260
261/** Issues a memory barrier in each thread of this process. */
262static void force_mb_in_all_threads(void)
263{
264 /*
265 * Only issue barriers in running threads. The scheduler will
266 * execute additional memory barriers when switching to threads
267 * of the process that are currently not running.
268 */
269 smp_memory_barrier();
270}
271
272/** Waits for readers of reader_group to exit their readers sections. */
273static void wait_for_readers(size_t reader_group)
274{
275 futex_down(&rcu.list_futex);
276
277 list_t quiescent_fibrils;
278 list_initialize(&quiescent_fibrils);
279
280 while (!list_empty(&rcu.fibrils_list)) {
281 list_foreach_safe(rcu.fibrils_list, fibril_it, next_fibril) {
282 rcu_fibril_data_t *fib = member_to_inst(fibril_it,
283 rcu_fibril_data_t, link);
284
285 if (is_preexisting_reader(fib, reader_group)) {
286 futex_up(&rcu.list_futex);
287 async_usleep(RCU_SLEEP_MS * 1000);
288 futex_down(&rcu.list_futex);
289 break;
290 } else {
291 list_remove(fibril_it);
292 list_append(fibril_it, &quiescent_fibrils);
293 }
294 }
295 }
296
297 list_concat(&rcu.fibrils_list, &quiescent_fibrils);
298 futex_up(&rcu.list_futex);
299}
300
301static bool is_preexisting_reader(const rcu_fibril_data_t *fib, size_t group)
302{
303 size_t nesting_cnt = ACCESS_ONCE(fib->nesting_cnt);
304
305 return is_in_group(nesting_cnt, group) && is_in_reader_section(nesting_cnt);
306}
307
308static size_t get_other_group(size_t group)
309{
310 if (group == RCU_GROUP_A)
311 return RCU_GROUP_B;
312 else
313 return RCU_GROUP_A;
314}
315
316static bool is_in_reader_section(size_t nesting_cnt)
317{
318 return RCU_NESTING_INC <= nesting_cnt;
319}
320
321static bool is_in_group(size_t nesting_cnt, size_t group)
322{
323 return (nesting_cnt & RCU_GROUP_BIT_MASK) == (group & RCU_GROUP_BIT_MASK);
324}
325
326
327
328/** @}
329 */
Note: See TracBrowser for help on using the repository browser.