Context Navigation

rcu.c@ 514d561

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 514d561 was 514d561, checked in by Jiří Zárevúcky <jiri.zarevucky@…>, 7 years ago

Fibril/async implementation overhaul.

This commit marks the move towards treating the fibril library as a mere
implementation of a generic threading interface. Understood as a layer that
wraps the kernel threads, we not only have to wrap threading itself, but also
every syscall that blocks the kernel thread (by blocking, we mean thread not
doing useful work until an external event happens — e.g. locking a kernel
mutex or thread sleep is understood as blocking, but an as_area_create() is not,
despite potentially taking a long time to complete).

Consequently, we implement fibril_ipc_wait() as a fibril-native wrapper for
kernel's ipc_wait(), and also implement timer functionality like timeouts
as part of the fibril library. This removes the interdependency between fibril
implementation and the async framework — in theory, the fibril API could be
reimplemented as a simple 1:1 shim, and the async framework would continue
working normally (note that the current implementation of loader complicates
this).

To better isolate the fibril internals from the implementation of high-level
synchronization, a fibril_event_t is added. This object conceptually acts
like a single slot wait queue. All other synchronization is implemented in
terms of this primitive.

Property mode set to 100644

File size: 12.6 KB

Line
1	/*
2	* Copyright (c) 2012 Adam Hraska
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup liburcu
30	* @{
31	*/
32	/**
33	* @file
34	*
35	* User space RCU is based on URCU utilizing signals [1]. This
36	* implementation does not however signal each thread of the process
37	* to issue a memory barrier. Instead, we introduced a syscall that
38	* issues memory barriers (via IPIs) on cpus that are running threads
39	* of the current process. First, it does not require us to schedule
40	* and run every thread of the process. Second, IPIs are less intrusive
41	* than switching contexts and entering user space.
42	*
43	* This algorithm is further modified to require a single instead of
44	* two reader group changes per grace period. Signal-URCU flips
45	* the reader group and waits for readers of the previous group
46	* twice in succession in order to wait for new readers that were
47	* delayed and mistakenly associated with the previous reader group.
48	* The modified algorithm ensures that the new reader group is
49	* always empty (by explicitly waiting for it to become empty).
50	* Only then does it flip the reader group and wait for preexisting
51	* readers of the old reader group (invariant of SRCU [2, 3]).
52	*
53	*
54	* [1] User-level implementations of read-copy update,
55	* 2012, appendix
56	* http://www.rdrop.com/users/paulmck/RCU/urcu-supp-accepted.2011.08.30a.pdf
57	*
58	* [2] linux/kernel/srcu.c in Linux 3.5-rc2,
59	* 2012
60	* http://tomoyo.sourceforge.jp/cgi-bin/lxr/source/kernel/srcu.c?v=linux-3.5-rc2-ccs-1.8.3
61	*
62	* [3] [RFC PATCH 5/5 single-thread-version] implement
63	* per-domain single-thread state machine,
64	* 2012, Lai
65	* https://lkml.org/lkml/2012/3/6/586
66	*/
67
68	#include "rcu.h"
69	#include <fibril_synch.h>
70	#include <fibril.h>
71	#include <stdio.h>
72	#include <stddef.h>
73	#include <compiler/barrier.h>
74	#include <libarch/barrier.h>
75	#include <futex.h>
76	#include <macros.h>
77	#include <async.h>
78	#include <adt/list.h>
79	#include <smp_memory_barrier.h>
80	#include <assert.h>
81	#include <time.h>
82
83	#include "private/fibril.h"
84
85
86	/** RCU sleeps for RCU_SLEEP_MS before polling an active RCU reader again. */
87	#define RCU_SLEEP_MS 10
88
89	#define RCU_NESTING_SHIFT 1
90	#define RCU_NESTING_INC (1 << RCU_NESTING_SHIFT)
91	#define RCU_GROUP_BIT_MASK (size_t)(RCU_NESTING_INC - 1)
92	#define RCU_GROUP_A (size_t)(0 \| RCU_NESTING_INC)
93	#define RCU_GROUP_B (size_t)(1 \| RCU_NESTING_INC)
94
95
96	/** Fibril local RCU data. */
97	typedef struct fibril_rcu_data {
98	size_t nesting_cnt;
99	link_t link;
100	bool registered;
101	} fibril_rcu_data_t;
102
103	/** Process global RCU data. */
104	typedef struct rcu_data {
105	size_t cur_gp;
106	size_t reader_group;
107	fibril_rmutex_t list_mutex;
108	list_t fibrils_list;
109	struct {
110	fibril_rmutex_t mutex;
111	bool locked;
112	list_t blocked_fibrils;
113	} sync_lock;
114	} rcu_data_t;
115
116	typedef struct blocked_fibril {
117	fibril_event_t unblock;
118	link_t link;
119	bool is_ready;
120	} blocked_fibril_t;
121
122
123	/** Fibril local RCU data. */
124	static fibril_local fibril_rcu_data_t fibril_rcu = {
125	.nesting_cnt = 0,
126	.link = {
127	.next = NULL,
128	.prev = NULL
129	},
130	.registered = false
131	};
132
133	/** Process global RCU data. */
134	static rcu_data_t rcu = {
135	.cur_gp = 0,
136	.reader_group = RCU_GROUP_A,
137	.list_mutex = FIBRIL_RMUTEX_INITIALIZER(rcu.list_mutex),
138	.fibrils_list = LIST_INITIALIZER(rcu.fibrils_list),
139	.sync_lock = {
140	.mutex = FIBRIL_RMUTEX_INITIALIZER(rcu.sync_lock.mutex),
141	.locked = false,
142	.blocked_fibrils = LIST_INITIALIZER(rcu.sync_lock.blocked_fibrils),
143	},
144	};
145
146
147	static void wait_for_readers(size_t reader_group);
148	static void force_mb_in_all_threads(void);
149	static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group);
150
151	static void lock_sync(void);
152	static void unlock_sync(void);
153	static void sync_sleep(void);
154
155	static bool is_in_group(size_t nesting_cnt, size_t group);
156	static bool is_in_reader_section(size_t nesting_cnt);
157	static size_t get_other_group(size_t group);
158
159
160	/** Registers a fibril so it may start using RCU read sections.
161	*
162	* A fibril must be registered with rcu before it can enter RCU critical
163	* sections delineated by rcu_read_lock() and rcu_read_unlock().
164	*/
165	void rcu_register_fibril(void)
166	{
167	assert(!fibril_rcu.registered);
168
169	fibril_rmutex_lock(&rcu.list_mutex);
170	list_append(&fibril_rcu.link, &rcu.fibrils_list);
171	fibril_rmutex_unlock(&rcu.list_mutex);
172
173	fibril_rcu.registered = true;
174	}
175
176	/** Deregisters a fibril that had been using RCU read sections.
177	*
178	* A fibril must be deregistered before it exits if it had
179	* been registered with rcu via rcu_register_fibril().
180	*/
181	void rcu_deregister_fibril(void)
182	{
183	assert(fibril_rcu.registered);
184
185	/*
186	* Forcefully unlock any reader sections. The fibril is exiting
187	* so it is not holding any references to data protected by the
188	* rcu section. Therefore, it is safe to unlock. Otherwise,
189	* rcu_synchronize() would wait indefinitely.
190	*/
191	memory_barrier();
192	fibril_rcu.nesting_cnt = 0;
193
194	fibril_rmutex_lock(&rcu.list_mutex);
195	list_remove(&fibril_rcu.link);
196	fibril_rmutex_unlock(&rcu.list_mutex);
197
198	fibril_rcu.registered = false;
199	}
200
201	/** Delimits the start of an RCU reader critical section.
202	*
203	* RCU reader sections may be nested.
204	*/
205	void rcu_read_lock(void)
206	{
207	assert(fibril_rcu.registered);
208
209	size_t nesting_cnt = ACCESS_ONCE(fibril_rcu.nesting_cnt);
210
211	if (0 == (nesting_cnt >> RCU_NESTING_SHIFT)) {
212	ACCESS_ONCE(fibril_rcu.nesting_cnt) = ACCESS_ONCE(rcu.reader_group);
213	/* Required by MB_FORCE_L */
214	compiler_barrier(); /* CC_BAR_L */
215	} else {
216	ACCESS_ONCE(fibril_rcu.nesting_cnt) = nesting_cnt + RCU_NESTING_INC;
217	}
218	}
219
220	/** Delimits the end of an RCU reader critical section. */
221	void rcu_read_unlock(void)
222	{
223	assert(fibril_rcu.registered);
224	assert(rcu_read_locked());
225
226	/* Required by MB_FORCE_U */
227	compiler_barrier(); /* CC_BAR_U */
228	/* todo: ACCESS_ONCE(nesting_cnt) ? */
229	fibril_rcu.nesting_cnt -= RCU_NESTING_INC;
230	}
231
232	/** Returns true if the current fibril is in an RCU reader section. */
233	bool rcu_read_locked(void)
234	{
235	return 0 != (fibril_rcu.nesting_cnt >> RCU_NESTING_SHIFT);
236	}
237
238	/** Blocks until all preexisting readers exit their critical sections. */
239	void rcu_synchronize(void)
240	{
241	assert(!rcu_read_locked());
242
243	/* Contain load of rcu.cur_gp. */
244	memory_barrier();
245
246	/* Approximately the number of the GP in progress. */
247	size_t gp_in_progress = ACCESS_ONCE(rcu.cur_gp);
248
249	lock_sync();
250
251	/*
252	* Exit early if we were stuck waiting for the mutex for a full grace
253	* period. Started waiting during gp_in_progress (or gp_in_progress + 1
254	* if the value propagated to this cpu too late) so wait for the next
255	* full GP, gp_in_progress + 1, to finish. Ie don't wait if the GP
256	* after that, gp_in_progress + 2, already started.
257	*/
258	/* rcu.cur_gp >= gp_in_progress + 2, but tolerates overflows. */
259	if (rcu.cur_gp != gp_in_progress && rcu.cur_gp + 1 != gp_in_progress) {
260	unlock_sync();
261	return;
262	}
263
264	++ACCESS_ONCE(rcu.cur_gp);
265
266	/*
267	* Pairs up with MB_FORCE_L (ie CC_BAR_L). Makes changes prior
268	* to rcu_synchronize() visible to new readers.
269	*/
270	memory_barrier(); /* MB_A */
271
272	/*
273	* Pairs up with MB_A.
274	*
275	* If the memory barrier is issued before CC_BAR_L in the target
276	* thread, it pairs up with MB_A and the thread sees all changes
277	* prior to rcu_synchronize(). Ie any reader sections are new
278	* rcu readers.
279	*
280	* If the memory barrier is issued after CC_BAR_L, it pairs up
281	* with MB_B and it will make the most recent nesting_cnt visible
282	* in this thread. Since the reader may have already accessed
283	* memory protected by RCU (it ran instructions passed CC_BAR_L),
284	* it is a preexisting reader. Seeing the most recent nesting_cnt
285	* ensures the thread will be identified as a preexisting reader
286	* and we will wait for it in wait_for_readers(old_reader_group).
287	*/
288	force_mb_in_all_threads(); /* MB_FORCE_L */
289
290	/*
291	* Pairs with MB_FORCE_L (ie CC_BAR_L, CC_BAR_U) and makes the most
292	* current fibril.nesting_cnt visible to this cpu.
293	*/
294	read_barrier(); /* MB_B */
295
296	size_t new_reader_group = get_other_group(rcu.reader_group);
297	wait_for_readers(new_reader_group);
298
299	/* Separates waiting for readers in new_reader_group from group flip. */
300	memory_barrier();
301
302	/* Flip the group new readers should associate with. */
303	size_t old_reader_group = rcu.reader_group;
304	rcu.reader_group = new_reader_group;
305
306	/* Flip the group before waiting for preexisting readers in the old group.*/
307	memory_barrier();
308
309	wait_for_readers(old_reader_group);
310
311	/* MB_FORCE_U */
312	force_mb_in_all_threads(); /* MB_FORCE_U */
313
314	unlock_sync();
315	}
316
317	/** Issues a memory barrier in each thread of this process. */
318	static void force_mb_in_all_threads(void)
319	{
320	/*
321	* Only issue barriers in running threads. The scheduler will
322	* execute additional memory barriers when switching to threads
323	* of the process that are currently not running.
324	*/
325	smp_memory_barrier();
326	}
327
328	/** Waits for readers of reader_group to exit their readers sections. */
329	static void wait_for_readers(size_t reader_group)
330	{
331	fibril_rmutex_lock(&rcu.list_mutex);
332
333	list_t quiescent_fibrils;
334	list_initialize(&quiescent_fibrils);
335
336	while (!list_empty(&rcu.fibrils_list)) {
337	list_foreach_safe(rcu.fibrils_list, fibril_it, next_fibril) {
338	fibril_rcu_data_t *fib = member_to_inst(fibril_it,
339	fibril_rcu_data_t, link);
340
341	if (is_preexisting_reader(fib, reader_group)) {
342	fibril_rmutex_unlock(&rcu.list_mutex);
343	sync_sleep();
344	fibril_rmutex_lock(&rcu.list_mutex);
345	/* Break to while loop. */
346	break;
347	} else {
348	list_remove(fibril_it);
349	list_append(fibril_it, &quiescent_fibrils);
350	}
351	}
352	}
353
354	list_concat(&rcu.fibrils_list, &quiescent_fibrils);
355	fibril_rmutex_unlock(&rcu.list_mutex);
356	}
357
358	static void lock_sync(void)
359	{
360	fibril_rmutex_lock(&rcu.sync_lock.mutex);
361	if (rcu.sync_lock.locked) {
362	blocked_fibril_t blocked_fib;
363	blocked_fib.unblock = FIBRIL_EVENT_INIT;
364
365	list_append(&blocked_fib.link, &rcu.sync_lock.blocked_fibrils);
366
367	do {
368	blocked_fib.is_ready = false;
369	fibril_rmutex_unlock(&rcu.sync_lock.mutex);
370	fibril_wait_for(&blocked_fib.unblock);
371	fibril_rmutex_lock(&rcu.sync_lock.mutex);
372	} while (rcu.sync_lock.locked);
373
374	list_remove(&blocked_fib.link);
375	rcu.sync_lock.locked = true;
376	} else {
377	rcu.sync_lock.locked = true;
378	}
379	}
380
381	static void unlock_sync(void)
382	{
383	assert(rcu.sync_lock.locked);
384
385	/* Unlock but wake up any fibrils waiting for the lock. */
386
387	if (!list_empty(&rcu.sync_lock.blocked_fibrils)) {
388	blocked_fibril_t *blocked_fib = member_to_inst(
389	list_first(&rcu.sync_lock.blocked_fibrils), blocked_fibril_t, link);
390
391	if (!blocked_fib->is_ready) {
392	blocked_fib->is_ready = true;
393	fibril_notify(&blocked_fib->unblock);
394	}
395	}
396
397	rcu.sync_lock.locked = false;
398	fibril_rmutex_unlock(&rcu.sync_lock.mutex);
399	}
400
401	static void sync_sleep(void)
402	{
403	assert(rcu.sync_lock.locked);
404	/*
405	* Release the futex to avoid deadlocks in singlethreaded apps
406	* but keep sync locked.
407	*/
408	fibril_rmutex_unlock(&rcu.sync_lock.mutex);
409	fibril_usleep(RCU_SLEEP_MS * 1000);
410	fibril_rmutex_lock(&rcu.sync_lock.mutex);
411	}
412
413
414	static bool is_preexisting_reader(const fibril_rcu_data_t *fib, size_t group)
415	{
416	size_t nesting_cnt = ACCESS_ONCE(fib->nesting_cnt);
417
418	return is_in_group(nesting_cnt, group) && is_in_reader_section(nesting_cnt);
419	}
420
421	static size_t get_other_group(size_t group)
422	{
423	if (group == RCU_GROUP_A)
424	return RCU_GROUP_B;
425	else
426	return RCU_GROUP_A;
427	}
428
429	static bool is_in_reader_section(size_t nesting_cnt)
430	{
431	return RCU_NESTING_INC <= nesting_cnt;
432	}
433
434	static bool is_in_group(size_t nesting_cnt, size_t group)
435	{
436	return (nesting_cnt & RCU_GROUP_BIT_MASK) == (group & RCU_GROUP_BIT_MASK);
437	}
438
439
440
441	/** @}
442	*/

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: mainline/uspace/lib/c/generic/rcu.c@ 514d561

Download in other formats: