Index: uspace/lib/c/include/futex.h
===================================================================
--- uspace/lib/c/include/futex.h	(revision e3787a0641d8821cd1638504c2382d5ac39885a8)
+++ uspace/lib/c/include/futex.h	(revision 710c1e98e0e2fa820b257d17768cee467f42b107)
@@ -86,17 +86,4 @@
 #endif
 
-/** Try to down the futex.
- *
- * @param futex Futex.
- *
- * @return true if the futex was acquired.
- * @return false if the futex was not acquired.
- *
- */
-static inline bool futex_trydown(futex_t *futex)
-{
-	return cas(&futex->val, 1, 0);
-}
-
 /** Down the futex with timeout, composably.
  *
@@ -115,19 +102,25 @@
  *
  */
-static inline errno_t futex_down_composable(futex_t *futex, struct timeval *expires)
+static inline errno_t futex_down_composable(futex_t *futex, const struct timeval *expires)
 {
 	// TODO: Add tests for this.
 
-	/* No timeout by default. */
-	suseconds_t timeout = 0;
-
-	if (expires) {
-		struct timeval tv;
-		getuptime(&tv);
-		if (tv_gteq(&tv, expires)) {
+	if ((atomic_signed_t) atomic_predec(&futex->val) >= 0)
+		return EOK;
+
+	suseconds_t timeout;
+
+	if (!expires) {
+		/* No timeout. */
+		timeout = 0;
+	} else {
+		if (expires->tv_sec == 0) {
 			/* We can't just return ETIMEOUT. That wouldn't be composable. */
 			timeout = 1;
 		} else {
-			timeout = tv_sub_diff(expires, &tv);
+			struct timeval tv;
+			getuptime(&tv);
+			timeout = tv_gteq(&tv, expires) ? 1 :
+			    tv_sub_diff(expires, &tv);
 		}
 
@@ -135,8 +128,5 @@
 	}
 
-	if ((atomic_signed_t) atomic_predec(&futex->val) < 0)
-		return (errno_t) __SYSCALL2(SYS_FUTEX_SLEEP, (sysarg_t) &futex->val.count, (sysarg_t) timeout);
-
-	return EOK;
+	return __SYSCALL2(SYS_FUTEX_SLEEP, (sysarg_t) &futex->val.count, (sysarg_t) timeout);
 }
 
@@ -153,11 +143,45 @@
 {
 	if ((atomic_signed_t) atomic_postinc(&futex->val) < 0)
-		return (errno_t) __SYSCALL1(SYS_FUTEX_WAKEUP, (sysarg_t) &futex->val.count);
+		return __SYSCALL1(SYS_FUTEX_WAKEUP, (sysarg_t) &futex->val.count);
 
 	return EOK;
 }
 
-static inline errno_t futex_down_timeout(futex_t *futex, struct timeval *expires)
-{
+static inline errno_t futex_down_timeout(futex_t *futex, const struct timeval *expires)
+{
+	if (expires && expires->tv_sec == 0 && expires->tv_usec == 0) {
+		/* Nonblocking down. */
+
+		/*
+		 * Try good old CAS a few times.
+		 * Not too much though, we don't want to bloat the caller.
+		 */
+		for (int i = 0; i < 2; i++) {
+			atomic_signed_t old = atomic_get(&futex->val);
+			if (old <= 0)
+				return ETIMEOUT;
+
+			if (cas(&futex->val, old, old - 1))
+				return EOK;
+		}
+
+		// TODO: builtin atomics with relaxed ordering can make this
+		//       faster.
+
+		/*
+		 * If we don't succeed with CAS, we can't just return failure
+		 * because that would lead to spurious failures where
+		 * futex_down_timeout returns ETIMEOUT despite there being
+		 * available tokens. That could break some algorithms.
+		 * We also don't want to loop on CAS indefinitely, because
+		 * that would make the semaphore not wait-free, even when all
+		 * atomic operations and the underlying base semaphore are all
+		 * wait-free.
+		 * Instead, we fall back to regular down_timeout(), with
+		 * an already expired deadline. That way we delegate all these
+		 * concerns to the base semaphore.
+		 */
+	}
+
 	/*
 	 * This combination of a "composable" sleep followed by futex_up() on
@@ -171,4 +195,22 @@
 }
 
+/** Try to down the futex.
+ *
+ * @param futex Futex.
+ *
+ * @return true if the futex was acquired.
+ * @return false if the futex was not acquired.
+ *
+ */
+static inline bool futex_trydown(futex_t *futex)
+{
+	/*
+	 * down_timeout with an already expired deadline should behave like
+	 * trydown.
+	 */
+	struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+	return futex_down_timeout(futex, &tv) == EOK;
+}
+
 /** Down the futex.
  *
