Index: kernel/generic/src/proc/task.c
===================================================================
--- kernel/generic/src/proc/task.c	(revision 04d66804623c1e241e9426d0016534aa65a96110)
+++ kernel/generic/src/proc/task.c	(revision 669f3d321e8829b7e31263be9622012f870d9539)
@@ -41,4 +41,5 @@
 #include <mm/slab.h>
 #include <atomic.h>
+#include <synch/futex.h>
 #include <synch/spinlock.h>
 #include <synch/waitq.h>
@@ -153,5 +154,4 @@
 	
 	irq_spinlock_initialize(&task->lock, "task_t_lock");
-	mutex_initialize(&task->futexes_lock, MUTEX_PASSIVE);
 	
 	list_initialize(&task->threads);
@@ -165,4 +165,7 @@
 	spinlock_initialize(&task->active_calls_lock, "active_calls_lock");
 	list_initialize(&task->active_calls);
+	
+	mutex_initialize(&task->futex_list_lock, MUTEX_PASSIVE);
+	list_initialize(&task->futex_list);
 	
 #ifdef CONFIG_UDEBUG
@@ -221,5 +224,5 @@
 		(void) ipc_phone_connect(&task->phones[0], ipc_phone_0);
 	
-	btree_create(&task->futexes);
+	futex_task_init(task);
 	
 	/*
@@ -262,5 +265,5 @@
 	 * Free up dynamically allocated state.
 	 */
-	btree_destroy(&task->futexes);
+	futex_task_deinit(task);
 	
 	/*
Index: kernel/generic/src/proc/thread.c
===================================================================
--- kernel/generic/src/proc/thread.c	(revision 04d66804623c1e241e9426d0016534aa65a96110)
+++ kernel/generic/src/proc/thread.c	(revision 669f3d321e8829b7e31263be9622012f870d9539)
@@ -518,5 +518,5 @@
 			 */
 			ipc_cleanup();
-			futex_cleanup();
+			futex_task_cleanup();
 			LOG("Cleanup of task %" PRIu64" completed.", TASK->taskid);
 		}
Index: kernel/generic/src/synch/futex.c
===================================================================
--- kernel/generic/src/synch/futex.c	(revision 04d66804623c1e241e9426d0016534aa65a96110)
+++ kernel/generic/src/synch/futex.c	(revision 669f3d321e8829b7e31263be9622012f870d9539)
@@ -1,4 +1,5 @@
 /*
  * Copyright (c) 2006 Jakub Jermar
+ * Copyright (c) 2012 Adam Hraska
  * All rights reserved.
  *
@@ -39,4 +40,5 @@
 #include <synch/mutex.h>
 #include <synch/spinlock.h>
+#include <synch/rcu.h>
 #include <mm/frame.h>
 #include <mm/page.h>
@@ -46,4 +48,5 @@
 #include <genarch/mm/page_pt.h>
 #include <genarch/mm/page_ht.h>
+#include <adt/cht.h>
 #include <adt/hash_table.h>
 #include <adt/list.h>
@@ -52,26 +55,55 @@
 #include <panic.h>
 #include <errno.h>
-#include <print.h>
 
 #define FUTEX_HT_SIZE	1024	/* keep it a power of 2 */
 
-static void futex_initialize(futex_t *futex);
-
-static futex_t *futex_find(uintptr_t paddr);
+/** Task specific pointer to a global kernel futex object. */
+typedef struct futex_ptr {
+	/** CHT link. */
+	cht_link_t cht_link;
+	/** List of all futex pointers used by the task. */
+	link_t all_link;
+	/** Kernel futex object. */
+	futex_t *futex;
+	/** User space virtual address of the futex variable in the task. */
+	uintptr_t uaddr;
+} futex_ptr_t;
+
+
+static void destroy_task_cache(work_t *work);
+
+static void futex_initialize(futex_t *futex, uintptr_t paddr);
+static void futex_add_ref(futex_t *futex);
+static void futex_release_ref(futex_t *futex);
+static void futex_release_ref_locked(futex_t *futex);
+
+static futex_t *get_futex(uintptr_t uaddr);
+static futex_t *find_cached_futex(uintptr_t uaddr);
+static futex_t *get_and_cache_futex(uintptr_t phys_addr, uintptr_t uaddr);
+static bool find_futex_paddr(uintptr_t uaddr, uintptr_t *phys_addr);
+
 static size_t futex_ht_hash(sysarg_t *key);
 static bool futex_ht_compare(sysarg_t *key, size_t keys, link_t *item);
 static void futex_ht_remove_callback(link_t *item);
 
-/**
- * Mutex protecting global futex hash table.
- * It is also used to serialize access to all futex_t structures.
- * Must be acquired before the task futex B+tree lock.
+static size_t task_fut_ht_hash(const cht_link_t *link);
+static size_t task_fut_ht_key_hash(void *key);
+static bool task_fut_ht_equal(const cht_link_t *item1, const cht_link_t *item2);
+static bool task_fut_ht_key_equal(void *key, const cht_link_t *item);
+
+
+/** Mutex protecting the global futex hash table.
+ * 
+ * Acquire task specific TASK->futex_list_lock before this mutex.
  */
 static mutex_t futex_ht_lock;
 
-/** Futex hash table. */
+/** Global kernel futex hash table. Lock futex_ht_lock before accessing.
+ * 
+ * Physical address of the futex variable is the lookup key.
+ */
 static hash_table_t futex_ht;
 
-/** Futex hash table operations. */
+/** Global kernel futex hash table operations. */
 static hash_table_operations_t futex_ht_ops = {
 	.hash = futex_ht_hash,
@@ -80,4 +112,13 @@
 };
 
+/** Task futex cache CHT operations. */
+static cht_ops_t task_futex_ht_ops = {
+	.hash = task_fut_ht_hash,
+	.key_hash = task_fut_ht_key_hash,
+	.equal = task_fut_ht_equal,
+	.key_equal = task_fut_ht_key_equal,
+	.remove_callback = NULL
+};
+
 /** Initialize futex subsystem. */
 void futex_init(void)
@@ -87,14 +128,224 @@
 }
 
-/** Initialize kernel futex structure.
- *
- * @param futex		Kernel futex structure.
- */
-void futex_initialize(futex_t *futex)
+/** Initializes the futex structures for the new task. */
+void futex_task_init(struct task *task)
+{
+	task->futexes = malloc(sizeof(struct futex_cache), 0);
+	
+	/* todo: make it block until mem is available */
+	bool ret = cht_create(&task->futexes->ht, 0, 0, 0, &task_futex_ht_ops);
+	ASSERT(ret);
+}
+
+/** Destroys the futex structures for the dying task. */
+void futex_task_deinit(struct task *task)
+{
+	/* Interrupts are disabled so we must not block (cannot run cht_destroy). */
+	if (interrupts_disabled()) {
+		/* Invoke the blocking cht_destroy in the background. */
+		workq_global_enqueue_noblock(&task->futexes->destroy_work, 
+			destroy_task_cache);
+	} else {
+		/* We can block. Invoke cht_destroy in this thread. */
+		destroy_task_cache(&task->futexes->destroy_work);
+	}
+}
+
+/** Deallocates a task's CHT futex cache (must already be empty). */
+static void destroy_task_cache(work_t *work)
+{
+	struct futex_cache *cache = 
+		member_to_inst(work, struct futex_cache, destroy_work);
+	
+	cht_destroy(&cache->ht);
+	free(cache);
+}
+
+/** Remove references from futexes known to the current task. */
+void futex_task_cleanup(void)
+{
+	/* All threads of this task have terminated. This is the last thread. */
+	mutex_lock(&TASK->futex_list_lock);
+	
+	list_foreach_safe(TASK->futex_list, cur_link, next_link) {
+		futex_ptr_t *fut_ptr = member_to_inst(cur_link, futex_ptr_t, cht_link);
+		
+		/*
+		 * The function is free to free the futex. All other threads of this
+		 * task have already terminated, so they have also definitely
+		 * exited their CHT futex cache protecting rcu reader sections.
+		 * Moreover release_ref() only frees the futex if this is the 
+		 * last task referencing the futex. Therefore, only threads
+		 * of this task may have referenced the futex if it is to be freed.
+		 */
+		futex_release_ref_locked(fut_ptr->futex);
+		
+		/* 
+		 * No need to remove the futex_ptr from the CHT cache of this task
+		 * because futex_task_deinit() will destroy the CHT shortly.
+		 */
+		list_remove(cur_link);
+		free(fut_ptr);
+	}
+	
+	mutex_unlock(&TASK->futex_list_lock);
+}
+
+
+/** Initialize the kernel futex structure.
+ *
+ * @param futex	Kernel futex structure.
+ * @param paddr Physical address of the futex variable.
+ */
+static void futex_initialize(futex_t *futex, uintptr_t paddr)
 {
 	waitq_initialize(&futex->wq);
 	link_initialize(&futex->ht_link);
-	futex->paddr = 0;
+	futex->paddr = paddr;
 	futex->refcount = 1;
+}
+
+/** Increments the counter of tasks referencing the futex. */
+static void futex_add_ref(futex_t *futex)
+{
+	ASSERT(mutex_locked(&futex_ht_lock));
+	ASSERT(0 < futex->refcount);
+	++futex->refcount;
+}
+
+/** Decrements the counter of tasks referencing the futex. May free the futex.*/
+static void futex_release_ref(futex_t *futex)
+{
+	ASSERT(mutex_locked(&futex_ht_lock));
+	ASSERT(0 < futex->refcount);
+	
+	--futex->refcount;
+	
+	if (0 == futex->refcount) {
+		hash_table_remove(&futex_ht, &futex->paddr, 1);
+	}
+}
+
+/** Decrements the counter of tasks referencing the futex. May free the futex.*/
+static void futex_release_ref_locked(futex_t *futex)
+{
+	mutex_lock(&futex_ht_lock);
+	futex_release_ref(futex);
+	mutex_unlock(&futex_ht_lock);
+}
+
+/** Returns a futex for the virtual address @a uaddr (or creates one). */
+static futex_t *get_futex(uintptr_t uaddr)
+{
+	futex_t *futex = find_cached_futex(uaddr);
+	
+	if (futex) 
+		return futex;
+
+	uintptr_t paddr;
+
+	if (!find_futex_paddr(uaddr, &paddr)) 
+		return 0;
+
+	return get_and_cache_futex(paddr, uaddr);
+}
+
+
+/** Finds the physical address of the futex variable. */
+static bool find_futex_paddr(uintptr_t uaddr, uintptr_t *paddr)
+{
+	page_table_lock(AS, true);
+
+	bool found = false;
+	pte_t *t = page_mapping_find(AS, ALIGN_DOWN(uaddr, PAGE_SIZE), false);
+	
+	if (t && PTE_VALID(t) && PTE_PRESENT(t)) {
+		found = true;
+		*paddr = PTE_GET_FRAME(t) + (uaddr - ALIGN_DOWN(uaddr, PAGE_SIZE));
+	}
+	
+	page_table_unlock(AS, true);
+	
+	return found;
+}
+
+/** Returns the futex cached in this task with the virtual address uaddr. */
+static futex_t *find_cached_futex(uintptr_t uaddr)
+{
+	cht_read_lock();
+	
+	futex_t *futex;
+	cht_link_t *futex_ptr_link = cht_find(&TASK->futexes->ht, &uaddr);
+
+	if (futex_ptr_link) {
+		futex_ptr_t *futex_ptr 
+			= member_to_inst(futex_ptr_link, futex_ptr_t, cht_link);
+		
+		futex = futex_ptr->futex;
+	} else {
+		futex = NULL;
+	}
+	
+	cht_read_unlock();
+	
+	return futex;
+}
+
+
+/** 
+ * Returns a kernel futex for the physical address @a phys_addr and caches 
+ * it in this task under the virtual address @a uaddr (if not already cached).
+ */
+static futex_t *get_and_cache_futex(uintptr_t phys_addr, uintptr_t uaddr)
+{
+	futex_t *futex = malloc(sizeof(futex_t), 0);
+	
+	/* 
+	 * Find the futex object in the global futex table (or insert it 
+	 * if it is not present).
+	 */
+	mutex_lock(&futex_ht_lock);
+	
+	link_t *fut_link = hash_table_find(&futex_ht, &phys_addr);
+	
+	if (fut_link) {
+		free(futex);
+		futex = member_to_inst(fut_link, futex_t, ht_link);
+		futex_add_ref(futex);
+	} else {
+		futex_initialize(futex, phys_addr);
+		hash_table_insert(&futex_ht, &phys_addr, &futex->ht_link);
+	}
+	
+	mutex_unlock(&futex_ht_lock);
+	
+	/* 
+	 * Cache the link to the futex object for this task. 
+	 */
+	futex_ptr_t *fut_ptr = malloc(sizeof(futex_ptr_t), 0);
+	cht_link_t *dup_link;
+	
+	fut_ptr->futex = futex;
+	fut_ptr->uaddr = uaddr;
+	
+	cht_read_lock();
+	
+	/* Cache the mapping from the virtual address to the futex for this task. */
+	if (cht_insert_unique(&TASK->futexes->ht, &fut_ptr->cht_link, &dup_link)) {
+		mutex_lock(&TASK->futex_list_lock);
+		list_append(&fut_ptr->all_link, &TASK->futex_list);
+		mutex_unlock(&TASK->futex_list_lock);
+	} else {
+		/* Another thread of this task beat us to it. Use that mapping instead.*/
+		free(fut_ptr);
+		futex_release_ref_locked(futex);
+		
+		futex_ptr_t *dup = member_to_inst(dup_link, futex_ptr_t, cht_link);
+		futex = dup->futex;		
+	}
+
+	cht_read_unlock();
+	
+	return futex;
 }
 
@@ -109,27 +360,13 @@
 sysarg_t sys_futex_sleep(uintptr_t uaddr)
 {
-	futex_t *futex;
-	uintptr_t paddr;
-	pte_t *t;
-	int rc;
-	
-	/*
-	 * Find physical address of futex counter.
-	 */
-	page_table_lock(AS, true);
-	t = page_mapping_find(AS, ALIGN_DOWN(uaddr, PAGE_SIZE), false);
-	if (!t || !PTE_VALID(t) || !PTE_PRESENT(t)) {
-		page_table_unlock(AS, true);
+	futex_t *futex = get_futex(uaddr);
+	
+	if (!futex) 
 		return (sysarg_t) ENOENT;
-	}
-	paddr = PTE_GET_FRAME(t) + (uaddr - ALIGN_DOWN(uaddr, PAGE_SIZE));
-	page_table_unlock(AS, true);
-	
-	futex = futex_find(paddr);
 
 #ifdef CONFIG_UDEBUG
 	udebug_stoppable_begin();
 #endif
-	rc = waitq_sleep_timeout(&futex->wq, 0, SYNCH_FLAGS_INTERRUPTIBLE); 
+	int rc = waitq_sleep_timeout(&futex->wq, 0, SYNCH_FLAGS_INTERRUPTIBLE); 
 #ifdef CONFIG_UDEBUG
 	udebug_stoppable_end();
@@ -146,84 +383,14 @@
 sysarg_t sys_futex_wakeup(uintptr_t uaddr)
 {
-	futex_t *futex;
-	uintptr_t paddr;
-	pte_t *t;
-	
-	/*
-	 * Find physical address of futex counter.
-	 */
-	page_table_lock(AS, true);
-	t = page_mapping_find(AS, ALIGN_DOWN(uaddr, PAGE_SIZE), false);
-	if (!t || !PTE_VALID(t) || !PTE_PRESENT(t)) {
-		page_table_unlock(AS, true);
+	futex_t *futex = get_futex(uaddr);
+	
+	if (futex) {
+		waitq_wakeup(&futex->wq, WAKEUP_FIRST);
+		return 0;
+	} else {
 		return (sysarg_t) ENOENT;
 	}
-	paddr = PTE_GET_FRAME(t) + (uaddr - ALIGN_DOWN(uaddr, PAGE_SIZE));
-	page_table_unlock(AS, true);
-	
-	futex = futex_find(paddr);
-		
-	waitq_wakeup(&futex->wq, WAKEUP_FIRST);
-	
-	return 0;
-}
-
-/** Find kernel address of the futex structure corresponding to paddr.
- *
- * If the structure does not exist already, a new one is created.
- *
- * @param paddr		Physical address of the userspace futex counter.
- *
- * @return		Address of the kernel futex structure.
- */
-futex_t *futex_find(uintptr_t paddr)
-{
-	link_t *item;
-	futex_t *futex;
-	btree_node_t *leaf;
-	
-	/*
-	 * Find the respective futex structure
-	 * or allocate new one if it does not exist already.
-	 */
-	mutex_lock(&futex_ht_lock);
-	item = hash_table_find(&futex_ht, &paddr);
-	if (item) {
-		futex = hash_table_get_instance(item, futex_t, ht_link);
-
-		/*
-		 * See if the current task knows this futex.
-		 */
-		mutex_lock(&TASK->futexes_lock);
-		if (!btree_search(&TASK->futexes, paddr, &leaf)) {
-			/*
-			 * The futex is new to the current task.
-			 * Upgrade its reference count and put it to the
-			 * current task's B+tree of known futexes.
-			 */
-			futex->refcount++;
-			btree_insert(&TASK->futexes, paddr, futex, leaf);
-		}
-		mutex_unlock(&TASK->futexes_lock);
-	} else {
-		futex = (futex_t *) malloc(sizeof(futex_t), 0);
-		futex_initialize(futex);
-		futex->paddr = paddr;
-		hash_table_insert(&futex_ht, &paddr, &futex->ht_link);
-			
-		/*
-		 * This is the first task referencing the futex.
-		 * It can be directly inserted into its
-		 * B+tree of known futexes.
-		 */
-		mutex_lock(&TASK->futexes_lock);
-		btree_insert(&TASK->futexes, paddr, futex, NULL);
-		mutex_unlock(&TASK->futexes_lock);
-		
-	}
-	mutex_unlock(&futex_ht_lock);
-	
-	return futex;
-}
+}
+
 
 /** Compute hash index into futex hash table.
@@ -268,28 +435,36 @@
 }
 
-/** Remove references from futexes known to the current task. */
-void futex_cleanup(void)
-{
-	mutex_lock(&futex_ht_lock);
-	mutex_lock(&TASK->futexes_lock);
-
-	list_foreach(TASK->futexes.leaf_list, cur) {
-		btree_node_t *node;
-		unsigned int i;
-		
-		node = list_get_instance(cur, btree_node_t, leaf_link);
-		for (i = 0; i < node->keys; i++) {
-			futex_t *ftx;
-			uintptr_t paddr = node->key[i];
-			
-			ftx = (futex_t *) node->value[i];
-			if (--ftx->refcount == 0)
-				hash_table_remove(&futex_ht, &paddr, 1);
-		}
-	}
-	
-	mutex_unlock(&TASK->futexes_lock);
-	mutex_unlock(&futex_ht_lock);
-}
+/*
+ * Operations of a task's CHT that caches mappings of futex user space 
+ * virtual addresses to kernel futex objects.
+ */
+
+static size_t task_fut_ht_hash(const cht_link_t *link)
+{
+	const futex_ptr_t *fut_ptr = member_to_inst(link, futex_ptr_t, cht_link);
+	return fut_ptr->uaddr;
+}
+
+static size_t task_fut_ht_key_hash(void *key)
+{
+	return *(uintptr_t*)key;
+}
+
+static bool task_fut_ht_equal(const cht_link_t *item1, const cht_link_t *item2)
+{
+	const futex_ptr_t *fut_ptr1 = member_to_inst(item1, futex_ptr_t, cht_link);
+	const futex_ptr_t *fut_ptr2 = member_to_inst(item2, futex_ptr_t, cht_link);
+	
+	return fut_ptr1->uaddr == fut_ptr2->uaddr;
+}
+
+static bool task_fut_ht_key_equal(void *key, const cht_link_t *item)
+{
+	const futex_ptr_t *fut_ptr = member_to_inst(item, futex_ptr_t, cht_link);
+	uintptr_t uaddr = *(uintptr_t*)key;
+	
+	return fut_ptr->uaddr == uaddr;
+}
+
 
 /** @}
