=== modified file 'kernel/arch/ia32/include/atomic.h'
--- kernel/arch/ia32/include/atomic.h	2010-03-23 14:41:06 +0000
+++ kernel/arch/ia32/include/atomic.h	2010-04-24 19:48:59 +0000
@@ -100,7 +100,7 @@
 
 static inline atomic_count_t test_and_set(atomic_t *val)
 {
-	atomic_count_t v = 1;
+	atomic_count_t v = 0xf00dba11UL;
 	
 	asm volatile (
 		"xchgl %[v], %[count]\n"

=== modified file 'kernel/generic/include/cpu.h'
--- kernel/generic/include/cpu.h	2010-02-21 19:02:16 +0000
+++ kernel/generic/include/cpu.h	2010-04-24 19:45:28 +0000
@@ -37,6 +37,7 @@
 
 #include <mm/tlb.h>
 #include <synch/spinlock.h>
+#include <synch/condvar.h>
 #include <proc/scheduler.h>
 #include <arch/cpu.h>
 #include <arch/context.h>
@@ -59,6 +60,10 @@
 	runq_t rq[RQ_COUNT];
 	volatile size_t needs_relink;
 
+#ifdef CONFIG_SMP
+	condvar_t kcpulb_cv;
+#endif
+
 	SPINLOCK_DECLARE(timeoutlock);
 	link_t timeout_active_head;
 

=== modified file 'kernel/generic/include/synch/spinlock.h'
--- kernel/generic/include/synch/spinlock.h	2010-03-23 14:41:06 +0000
+++ kernel/generic/include/synch/spinlock.h	2010-04-24 19:45:28 +0000
@@ -41,10 +41,19 @@
 #include <atomic.h>
 #include <debug.h>
 
+#define LOCK_LOCKED	0xf00dba11UL
+#define LOCK_UNLOCKED	0xca11b00fUL
+#define LOCK_MAGIC1	0xdeadca1fUL
+#define LOCK_MAGIC2	0x0becedadUL
+
 #ifdef CONFIG_SMP
 
 typedef struct {
+	unative_t magic1;
 	atomic_t val;
+	unative_t magic2;
+
+	atomic_t cnt;
 	
 #ifdef CONFIG_DEBUG_SPINLOCK
 	const char *name;
@@ -67,13 +76,19 @@
 #define SPINLOCK_INITIALIZE_NAME(lock_name, desc_name) \
 	spinlock_t lock_name = { \
 		.name = desc_name, \
-		.val = { 0 } \
+		.magic1 = LOCK_MAGIC1, \
+		.magic2 = LOCK_MAGIC2, \
+		.val = { LOCK_UNLOCKED }, \
+		.cnt = { 0 } \
 	}
 
 #define SPINLOCK_STATIC_INITIALIZE_NAME(lock_name, desc_name) \
 	static spinlock_t lock_name = { \
 		.name = desc_name, \
-		.val = { 0 } \
+		.magic1 = LOCK_MAGIC1, \
+		.magic2 = LOCK_MAGIC2, \
+		.val = { LOCK_UNLOCKED }, \
+		.cnt = { 0 } \
 	}
 
 #define spinlock_lock(lock)  spinlock_lock_debug(lock)
@@ -103,25 +118,7 @@
 extern void spinlock_initialize(spinlock_t *lock, const char *name);
 extern int spinlock_trylock(spinlock_t *lock);
 extern void spinlock_lock_debug(spinlock_t *lock);
-
-/** Unlock spinlock
- *
- * Unlock spinlock.
- *
- * @param sl Pointer to spinlock_t structure.
- */
-static inline void spinlock_unlock(spinlock_t *lock)
-{
-	ASSERT(atomic_get(&lock->val) != 0);
-	
-	/*
-	 * Prevent critical section code from bleeding out this way down.
-	 */
-	CS_LEAVE_BARRIER();
-	
-	atomic_set(&lock->val, 0);
-	preemption_enable();
-}
+extern void spinlock_unlock(spinlock_t *lock);
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 

=== modified file 'kernel/generic/src/cpu/cpu.c'
--- kernel/generic/src/cpu/cpu.c	2010-03-23 14:41:06 +0000
+++ kernel/generic/src/cpu/cpu.c	2010-04-24 19:45:28 +0000
@@ -58,33 +58,33 @@
 void cpu_init(void) {
 	unsigned int i, j;
 	
-#ifdef CONFIG_SMP
 	if (config.cpu_active == 1) {
-#endif /* CONFIG_SMP */
 		cpus = (cpu_t *) malloc(sizeof(cpu_t) * config.cpu_count,
-					FRAME_ATOMIC);
+		    FRAME_ATOMIC);
 		if (!cpus)
 			panic("Cannot allocate CPU structures.");
 
-		/* initialize everything */
+		/* Initialize everything */
 		memsetb(cpus, sizeof(cpu_t) * config.cpu_count, 0);
 
 		for (i = 0; i < config.cpu_count; i++) {
-			cpus[i].stack = (uint8_t *) frame_alloc(STACK_FRAMES, FRAME_KA | FRAME_ATOMIC);
+			cpus[i].stack = (uint8_t *) frame_alloc(STACK_FRAMES,
+			    FRAME_KA | FRAME_ATOMIC);
 			
 			cpus[i].id = i;
 			
 			spinlock_initialize(&cpus[i].lock, "cpu_t.lock");
+#ifdef CONFIG_SMP
+			condvar_initialize(&cpus[i].kcpulb_cv);
+#endif
 
 			for (j = 0; j < RQ_COUNT; j++) {
-				spinlock_initialize(&cpus[i].rq[j].lock, "rq_t.lock");
+				spinlock_initialize(&cpus[i].rq[j].lock,
+				    "rq_t.lock");
 				list_initialize(&cpus[i].rq[j].rq_head);
 			}
 		}
-		
-#ifdef CONFIG_SMP
 	}
-#endif /* CONFIG_SMP */
 
 	CPU = &cpus[config.cpu_active - 1];
 	

=== modified file 'kernel/generic/src/proc/scheduler.c'
--- kernel/generic/src/proc/scheduler.c	2010-02-22 21:24:19 +0000
+++ kernel/generic/src/proc/scheduler.c	2010-04-24 19:45:28 +0000
@@ -189,6 +189,10 @@
 	interrupts_enable();
 	
 	if (atomic_get(&CPU->nrdy) == 0) {
+#ifdef CONFIG_SMP
+		condvar_signal(&CPU->kcpulb_cv);
+#endif
+
 		/*
 		 * For there was nothing to run, the CPU goes to sleep
 		 * until a hardware interrupt or an IPI comes.
@@ -530,6 +534,23 @@
 }
 
 #ifdef CONFIG_SMP
+
+static int get_cpu_balance(atomic_count_t *);
+
+/**
+ * Calculate the number of threads that will be migrated/stolen from
+ * other CPU's based on the actual situation.
+ *
+ * @param average[out]	Pointer to memory where the average number of threads
+ * 			per CPU will be stored.
+ * @return The number of threads that should be migrated by the current CPU.
+ */
+int get_cpu_balance(atomic_count_t *average)
+{
+	*average = atomic_get(&nrdy) / config.cpu_active + 1;
+	return *average - atomic_get(&CPU->nrdy);
+}
+
 /** Load balancing thread
  *
  * SMP load balancing thread, supervising thread supplies
@@ -547,29 +568,35 @@
 	int j;
 	int k = 0;
 	ipl_t ipl;
+	mutex_t dummy_lock;
+	uint32_t backoff_us = 0;
 
 	/*
 	 * Detach kcpulb as nobody will call thread_join_timeout() on it.
 	 */
 	thread_detach(THREAD);
-	
+
+	/*
+	 * The dummy lock is only needed to indulge the condvar_wait() API,
+	 * but other than that, it is not really necessary. We simply initialize
+	 * it and keep it locked.
+	 */
+	mutex_initialize(&dummy_lock, MUTEX_PASSIVE);
+	mutex_lock(&dummy_lock);
+
 loop:
-	/*
-	 * Work in 1s intervals.
-	 */
-	thread_sleep(1);
-
-not_satisfied:
-	/*
-	 * Calculate the number of threads that will be migrated/stolen from
-	 * other CPU's. Note that situation can have changed between two
-	 * passes. Each time get the most up to date counts.
-	 */
-	average = atomic_get(&nrdy) / config.cpu_active + 1;
-	count = average - atomic_get(&CPU->nrdy);
-
-	if (count <= 0)
-		goto satisfied;
+	if (backoff_us)
+		thread_usleep(backoff_us * 100000);
+
+	/*
+	 * Wait until our CPU wants to steal some threads or until a 1s
+	 * interval elapses. 
+	 */
+	while ((count = get_cpu_balance(&average)) <= 0)
+		condvar_wait_timeout(&CPU->kcpulb_cv, &dummy_lock, 1000000);
+
+	if (backoff_us < 1000000)
+		backoff_us <<= 5;
 
 	/*
 	 * Searching least priority queues on all CPU's first and most priority
@@ -656,8 +683,10 @@
 
 				interrupts_restore(ipl);
 	
-				if (--count == 0)
-					goto satisfied;
+				if (--count == 0) {
+					backoff_us >>= 1;
+					goto loop;
+				}
 					
 				/*
 				 * We are not satisfied yet, focus on another
@@ -676,17 +705,9 @@
 		 * Be a little bit light-weight and let migrated threads run.
 		 */
 		scheduler();
-	} else {
-		/*
-		 * We failed to migrate a single thread.
-		 * Give up this turn.
-		 */
-		goto loop;
+		backoff_us >>= 1;
 	}
 		
-	goto not_satisfied;
-
-satisfied:
 	goto loop;
 }
 

=== modified file 'kernel/generic/src/synch/spinlock.c'
--- kernel/generic/src/synch/spinlock.c	2010-02-25 19:11:25 +0000
+++ kernel/generic/src/synch/spinlock.c	2010-04-24 19:45:28 +0000
@@ -43,6 +43,10 @@
 #include <print.h>
 #include <debug.h>
 #include <symtab.h>
+#include <cpu.h>
+#include <proc/thread.h>
+#include <proc/task.h>
+#include <arch/cycle.h>
 
 #ifdef CONFIG_SMP
 
@@ -53,14 +57,105 @@
  */
 void spinlock_initialize(spinlock_t *lock, const char *name)
 {
-	atomic_set(&lock->val, 0);
+	atomic_set(&lock->val, LOCK_UNLOCKED);
+	atomic_set(&lock->cnt, 0);
 #ifdef CONFIG_DEBUG_SPINLOCK
+	lock->magic1 = LOCK_MAGIC1;
+	lock->magic2 = LOCK_MAGIC2;
 	lock->name = name;
 #endif
 }
 
 #ifdef CONFIG_DEBUG_SPINLOCK
 
+static const char *le_str[] = {
+	"invalid",
+	"spinlock_lock",
+	"spinlock_trylock",
+	"spinlock_unlock"
+};
+
+typedef enum {
+	LE_INVALID = 0,
+	LE_LOCK,
+	LE_TRYLOCK,
+	LE_UNLOCK
+} lock_event_type_t;
+
+typedef struct {
+	lock_event_type_t type;
+	spinlock_t *lock;
+	cpu_t *cpu;
+	thread_t *thread;
+	unative_t pc;
+	uint64_t cycle;
+	atomic_count_t cnt;
+} lock_event_t;
+
+#define LE_EVENTS	8000
+
+static lock_event_t lock_events[LE_EVENTS];
+static atomic_t ne;
+
+static int lock_tracing = 1;
+
+void lock_event_record(lock_event_type_t type, spinlock_t *l, unative_t pc);
+void lock_event_record(lock_event_type_t type, spinlock_t *l, unative_t pc)
+{
+	if (!lock_tracing)
+		return;
+	if (l->name[0] == '*')
+		return;
+
+	int i = atomic_postinc(&ne) % LE_EVENTS;
+
+	lock_events[i].type = type;
+	lock_events[i].lock = l;
+	lock_events[i].cpu = CPU;
+	lock_events[i].thread = THREAD;
+	lock_events[i].pc = pc;
+	lock_events[i].cycle = get_cycle();
+	lock_events[i].cnt = atomic_get(&l->cnt);
+}
+
+void lock_event_print(lock_event_t *le);
+void lock_event_print(lock_event_t *le)
+{
+
+	printf("cycle=%p ", le->cycle);
+	if (le->type > LE_UNLOCK)
+		le->type = 0;
+	printf("%s(%s/%p) ", le_str[le->type], le->lock->name, le->lock);
+	if (le->cpu)
+		printf("cpu%d, ", le->cpu->id);
+	else
+		printf("nocpu, ");
+	printf("pc=%p ", le->pc);
+	printf("cnt=%ld ", le->cnt);
+	printf("thread=%p\n", le->thread);
+}
+
+void lock_event_show(spinlock_t *l);
+void lock_event_show(spinlock_t *l)
+{
+	int i, j = 0;
+	uint64_t min = 0xffffffffffffffffULL;
+
+	for (i = 0; i < LE_EVENTS; i++) {
+		if (lock_events[i].cycle < min) {
+			min = lock_events[i].cycle;
+			j = i;
+		}
+	}
+
+	for (i = j; i < LE_EVENTS + j; i++) {
+		lock_event_t *e = &lock_events[i % LE_EVENTS];
+		
+		if (e->lock == l)
+			lock_event_print(e);
+	}
+}
+
 /** Lock spinlock
  *
  * Lock spinlock.
@@ -75,8 +170,11 @@
 	size_t i = 0;
 	bool deadlock_reported = false;
 	
+	ASSERT(lock->magic1 == LOCK_MAGIC1);
+	ASSERT(lock->magic2 == LOCK_MAGIC2);
+
 	preemption_disable();
-	while (test_and_set(&lock->val)) {
+	while (test_and_set(&lock->val) != LOCK_UNLOCKED) {
 		/*
 		 * We need to be careful about particular locks
 		 * which are directly used to report deadlocks
@@ -110,13 +208,28 @@
 		}
 	}
 	
+	atomic_inc(&lock->cnt);
+	if (atomic_get(&lock->cnt) != 1) {
+		lock_tracing = 0;
+		lock_event_show(lock);
+		panic("not alone in critical section");
+	}
+
 	if (deadlock_reported)
 		printf("cpu%u: not deadlocked\n", CPU->id);
 	
+
+	lock_event_record(LE_LOCK, lock, CALLER);
+	
 	/*
 	 * Prevent critical section code from bleeding out this way up.
 	 */
 	CS_ENTER_BARRIER();
+	if (atomic_get(&lock->cnt) != 1) {
+		lock_tracing = 0;
+		lock_event_show(lock);
+		panic("not alone in critical section");
+	}
 }
 
 #endif
@@ -135,19 +248,49 @@
 int spinlock_trylock(spinlock_t *lock)
 {
 	preemption_disable();
-	int rc = !test_and_set(&lock->val);
+	atomic_count_t ret = test_and_set(&lock->val);
+
+	ASSERT(lock->magic1 == LOCK_MAGIC1);
+	ASSERT(lock->magic2 == LOCK_MAGIC2);
+	ASSERT(ret == LOCK_LOCKED || ret == LOCK_UNLOCKED);
 	
 	/*
 	 * Prevent critical section code from bleeding out this way up.
 	 */
 	CS_ENTER_BARRIER();
 	
-	if (!rc)
+	if (ret == LOCK_LOCKED)
 		preemption_enable();
-	
-	return rc;
-}
-
+	else {
+		if (atomic_postinc(&lock->cnt) != 0)
+			panic("This lock is locked!");
+		lock_event_record(LE_TRYLOCK, lock, CALLER);
+	}
+	
+	return ret == LOCK_UNLOCKED;
+}
+
+void spinlock_unlock(spinlock_t *lock)
+{
+	ASSERT(lock->magic1 == LOCK_MAGIC1);
+	ASSERT(lock->magic2 == LOCK_MAGIC2);
+
+	lock_event_record(LE_UNLOCK, lock, CALLER);
+	if (atomic_get(&lock->val) != LOCK_LOCKED) {
+		lock_tracing = 0;
+		lock_event_show(lock);
+		panic("unlocking non-locked lock");
+	}
+	
+	/*
+	 * Prevent critical section code from bleeding out this way down.
+	 */
+	CS_LEAVE_BARRIER();
+	
+	atomic_dec(&lock->cnt);
+	atomic_set(&lock->val, LOCK_UNLOCKED);
+	preemption_enable();
+}
 #endif
 
 /** @}

