Index: kernel/generic/include/synch/rcu.h
===================================================================
--- kernel/generic/include/synch/rcu.h	(revision 2bcf6c654972c91913e02276dd74d51eb9229589)
+++ kernel/generic/include/synch/rcu.h	(revision 4ec9ea4158674bc15f751d826091779955c286a4)
@@ -111,4 +111,7 @@
 	bool expedite_arriving;
 	
+	/** Protected by global rcu.barrier_mtx. */
+	rcu_item_t barrier_item;
+	
 	/** Interruptable attached reclaimer thread. */
 	struct thread *reclaimer_thr;
@@ -202,5 +205,7 @@
 extern bool rcu_read_locked(void);
 extern void rcu_synchronize(void);
+extern void rcu_synchronize_expedite(void);
 extern void rcu_call(rcu_item_t *rcu_item, rcu_func_t func);
+extern void rcu_barrier(void);
 
 extern void rcu_print_stat(void);
@@ -215,7 +220,7 @@
 extern void rcu_before_thread_runs(void);
 
-/* Debugging/testing support. Not part of public API. Do not use! */
 extern uint64_t rcu_completed_gps(void);
 extern void _rcu_call(bool expedite, rcu_item_t *rcu_item, rcu_func_t func);
+extern void _rcu_synchronize(bool expedite);
 
 #endif
Index: kernel/generic/src/synch/rcu.c
===================================================================
--- kernel/generic/src/synch/rcu.c	(revision 2bcf6c654972c91913e02276dd74d51eb9229589)
+++ kernel/generic/src/synch/rcu.c	(revision 4ec9ea4158674bc15f751d826091779955c286a4)
@@ -41,4 +41,5 @@
 #include <synch/semaphore.h>
 #include <synch/spinlock.h>
+#include <synch/mutex.h>
 #include <proc/thread.h>
 #include <cpu/cpu_mask.h>
@@ -128,4 +129,11 @@
 	atomic_t delaying_cpu_cnt;
 	
+	/** Excludes simultaneous rcu_barrier() calls. */
+	mutex_t barrier_mtx;
+	/** Number of cpus that we are waiting for to complete rcu_barrier(). */
+	atomic_t barrier_wait_cnt;
+	/** rcu_barrier() waits for the completion of barrier callbacks on this wq.*/
+	waitq_t barrier_wq;
+	
 	/** Interruptible attached detector thread pointer. */
 	thread_t *detector_thr;
@@ -146,4 +154,6 @@
 static void rcu_read_unlock_impl(size_t *pnesting_cnt);
 static void synch_complete(rcu_item_t *rcu_item);
+static void add_barrier_cb(void *arg);
+static void barrier_complete(rcu_item_t *barrier_item);
 static void check_qs(void);
 static void record_qs(void);
@@ -196,4 +206,8 @@
 	rcu.preempt_blocking_det = false;
 	
+	mutex_initialize(&rcu.barrier_mtx, MUTEX_PASSIVE);
+	atomic_set(&rcu.barrier_wait_cnt, 0);
+	waitq_initialize(&rcu.barrier_wq);
+	
 	atomic_set(&rcu.delaying_cpu_cnt, 0);
 	
@@ -297,6 +311,4 @@
 void rcu_stop(void)
 {
-	/* todo: stop accepting new callbacks instead of just letting them linger?*/
-	
 	/* Stop and wait for reclaimers. */
 	for (unsigned int cpu_id = 0; cpu_id < config.cpu_active; ++cpu_id) {
@@ -539,4 +551,16 @@
 void rcu_synchronize(void)
 {
+	_rcu_synchronize(false);
+}
+
+/** Blocks until all preexisting readers exit their critical sections. */
+void rcu_synchronize_expedite(void)
+{
+	_rcu_synchronize(true);
+}
+
+/** Blocks until all preexisting readers exit their critical sections. */
+void _rcu_synchronize(bool expedite)
+{
 	/* Calling from a reader section will deadlock. */
 	ASSERT(THREAD == 0 || 0 == THREAD->rcu.nesting_cnt);
@@ -545,5 +569,5 @@
 
 	waitq_initialize(&completion.wq);
-	rcu_call(&completion.rcu_item, synch_complete);
+	_rcu_call(expedite, &completion.rcu_item, synch_complete);
 	waitq_sleep(&completion.wq);
 	waitq_complete_wakeup(&completion.wq);
@@ -556,4 +580,54 @@
 	ASSERT(completion);
 	waitq_wakeup(&completion->wq, WAKEUP_FIRST);
+}
+
+/** Waits for all outstanding rcu calls to complete. */
+void rcu_barrier(void)
+{
+	/* 
+	 * Serialize rcu_barrier() calls so we don't overwrite cpu.barrier_item
+	 * currently in use by rcu_barrier().
+	 */
+	mutex_lock(&rcu.barrier_mtx);
+	
+	/* 
+	 * Ensure we queue a barrier callback on all cpus before the already
+	 * enqueued barrier callbacks start signaling completion.
+	 */
+	atomic_set(&rcu.barrier_wait_cnt, 1);
+
+	DEFINE_CPU_MASK(cpu_mask);
+	cpu_mask_active(cpu_mask);
+	
+	cpu_mask_for_each(*cpu_mask, cpu_id) {
+		smp_call(cpu_id, add_barrier_cb, 0);
+	}
+	
+	if (0 < atomic_predec(&rcu.barrier_wait_cnt)) {
+		waitq_sleep(&rcu.barrier_wq);
+	}
+	
+	mutex_unlock(&rcu.barrier_mtx);
+}
+
+/** Issues a rcu_barrier() callback on the local cpu. 
+ * 
+ * Executed with interrupts disabled.  
+ */
+static void add_barrier_cb(void *arg)
+{
+	ASSERT(interrupts_disabled() || PREEMPTION_DISABLED);
+	atomic_inc(&rcu.barrier_wait_cnt);
+	rcu_call(&CPU->rcu.barrier_item, barrier_complete);
+}
+
+/** Local cpu's rcu_barrier() completion callback. */
+static void barrier_complete(rcu_item_t *barrier_item)
+{
+	/* Is this the last barrier callback completed? */
+	if (0 == atomic_predec(&rcu.barrier_wait_cnt)) {
+		/* Notify rcu_barrier() that we're done. */
+		waitq_wakeup(&rcu.barrier_wq, WAKEUP_FIRST);
+	}
 }
 
Index: kernel/test/synch/rcu1.c
===================================================================
--- kernel/test/synch/rcu1.c	(revision 2bcf6c654972c91913e02276dd74d51eb9229589)
+++ kernel/test/synch/rcu1.c	(revision 4ec9ea4158674bc15f751d826091779955c286a4)
@@ -806,4 +806,42 @@
 
 /*-------------------------------------------------------------------*/
+typedef struct {
+	rcu_item_t rcu_item;
+	atomic_t done;
+} barrier_t;
+
+static void barrier_callback(rcu_item_t *item)
+{
+	barrier_t *b = member_to_inst(item, barrier_t, rcu_item);
+	atomic_set(&b->done, 1);
+}
+
+static bool do_barrier(void)
+{
+	TPRINTF("\nrcu_barrier: Wait for outstanding rcu callbacks to complete\n");
+	
+	barrier_t *barrier = malloc(sizeof(barrier_t), FRAME_ATOMIC);
+	
+	if (!barrier) {
+		TPRINTF("[out-of-mem]\n");
+		return false;
+	}
+	
+	atomic_set(&barrier->done, 0);
+	
+	rcu_call(&barrier->rcu_item, barrier_callback);
+	rcu_barrier();
+	
+	if (1 == atomic_get(&barrier->done)) {
+		free(barrier);
+		return true;
+	} else {
+		TPRINTF("rcu_barrier() exited prematurely.\n");
+		/* Leak some mem. */
+		return false;
+	}
+}
+
+/*-------------------------------------------------------------------*/
 
 typedef struct {
@@ -969,4 +1007,5 @@
 		{ 1, do_reader_preempt, "do_reader_preempt" },
 		{ 1, do_synch, "do_synch" },
+		{ 1, do_barrier, "do_barrier" },
 		{ 1, do_reader_exit, "do_reader_exit" },
 		{ 1, do_nop_readers, "do_nop_readers" },
