Index: kernel/generic/src/synch/rcu.c
===================================================================
--- kernel/generic/src/synch/rcu.c	(revision 5b03a724f5c58712ae1e5a720a4c07c141603021)
+++ kernel/generic/src/synch/rcu.c	(revision f0fcb040f13a5e81a4e704790bcd3b6fc4fe52cb)
@@ -239,4 +239,5 @@
 	
 	CPU->rcu.is_delaying_gp = false;
+	CPU->rcu.signal_unlock = false;
 	
 	semaphore_initialize(&CPU->rcu.arrived_flag, 0);
@@ -410,4 +411,6 @@
 	ASSERT(PREEMPTION_DISABLED || interrupts_disabled());
 	
+	/* todo: make NMI safe with cpu-local atomic ops. */
+	
 	/*
 	 * We have to disable interrupts in order to make checking
@@ -457,4 +460,8 @@
 		irq_spinlock_unlock(&rcu.preempt_lock, false);
 	}
+	
+	/* If there was something to signal to the detector we have done so. */
+	CPU->rcu.signal_unlock = false;
+	
 	interrupts_restore(ipl);
 }
@@ -1201,4 +1208,12 @@
 			/* Note to notify the detector from rcu_read_unlock(). */
 			CPU->rcu.is_delaying_gp = true;
+			/* 
+			 * Set signal_unlock only after setting is_delaying_gp so
+			 * that NMI handlers do not accidentally clear it in unlock()
+			 * before seeing and acting upon is_delaying_gp.
+			 */
+			compiler_barrier();
+			CPU->rcu.signal_unlock = true;
+			
 			atomic_inc(&rcu.delaying_cpu_cnt);
 		} else {
@@ -1274,6 +1289,16 @@
 {
 	ASSERT(interrupts_disabled());
-	
-	/* Save the thread's nesting count when its not running. */
+	/* todo: make is_delaying_gp and was_preempted NMI safe via local atomics.*/
+
+	/* 
+	 * Prevent NMI handlers from interfering. The detector will be notified
+	 * here if CPU->rcu.is_delaying_gp and the current thread is no longer 
+	 * running so there is nothing to signal to the detector.
+	 */
+	CPU->rcu.signal_unlock = false;
+	/* Separates clearing of .signal_unlock from CPU->rcu.nesting_cnt = 0. */
+	compiler_barrier();
+	
+	/* Save the thread's nesting count when it is not running. */
 	THREAD->rcu.nesting_cnt = CPU->rcu.nesting_cnt;
 	/* Interrupt handlers might use RCU while idle in scheduler(). */
@@ -1300,4 +1325,5 @@
 	}
 	
+	
 	/* 
 	 * The preempted reader has been noted globally. There are therefore
@@ -1317,5 +1343,5 @@
 		semaphore_up(&rcu.remaining_readers);
 	}
-	
+
 	/* 
 	 * Forcefully associate the detector with the highest priority
@@ -1354,4 +1380,13 @@
 	/* Load the thread's saved nesting count from before it was preempted. */
 	CPU->rcu.nesting_cnt = THREAD->rcu.nesting_cnt;
+	/* 
+	 * In the unlikely event that a NMI occurs between the loading of the 
+	 * variables and setting signal_unlock, the NMI handler may invoke 
+	 * rcu_read_unlock() and clear signal_unlock. In that case we will
+	 * incorrectly overwrite signal_unlock from false to true. This event
+	 * situation benign and the next rcu_read_unlock() will at worst 
+	 * needlessly invoke _rcu_signal_unlock().
+	 */
+	CPU->rcu.signal_unlock = THREAD->rcu.was_preempted || CPU->rcu.is_delaying_gp;
 }
 
