Index: uspace/app/rcubench/rcubench.c
===================================================================
--- uspace/app/rcubench/rcubench.c	(revision af5dfa5b04a48b2c78ec79856c7bc5d1cfce6341)
+++ uspace/app/rcubench/rcubench.c	(revision 7554e41313cc1b20e404bcf3539a615006ffb1ea)
@@ -50,9 +50,11 @@
 #include <rcu.h>
 
+
+/* Results are printed to this file in addition to stdout. */
+static FILE *results_fd = NULL;
+
 typedef struct bench {
-	enum {
-		T_KERN_FUTEX,
-		T_LIBC_FUTEX
-	} type;
+	const char *name;
+	void (*func)(struct bench *);
 	size_t iters;
 	size_t nthreads;
@@ -61,6 +63,5 @@
 	futex_t done_threads;
 	
-	futex_t ke_bench_fut;
-	fibril_mutex_t libc_bench_mtx;
+	futex_t bench_fut;
 } bench_t;
 
@@ -82,5 +83,5 @@
 static void  kernel_futex_bench(bench_t *bench)
 {
-	futex_t * const fut = &bench->ke_bench_fut;
+	futex_t * const fut = &bench->bench_fut;
 	const size_t iters = bench->iters;
 	size_t sum = 0;
@@ -107,17 +108,29 @@
 }
 
-static void libc_futex_bench(bench_t *bench)
-{
-	fibril_mutex_t * const mtx = &bench->libc_bench_mtx;
+static void libc_futex_lock_bench(bench_t *bench)
+{
 	const size_t iters = bench->iters;
+	futex_t loc_fut = FUTEX_INITIALIZER;
 	
 	for (size_t i = 0; i < iters; ++i) {
-		fibril_mutex_lock(mtx);
+		futex_lock(&loc_fut);
 		/* no-op */
 		compiler_barrier();
-		fibril_mutex_unlock(mtx);
-	}
-}
-
+		futex_unlock(&loc_fut);
+	}
+}
+
+static void libc_futex_sema_bench(bench_t *bench)
+{
+	const size_t iters = bench->iters;
+	futex_t loc_fut = FUTEX_INITIALIZER;
+	
+	for (size_t i = 0; i < iters; ++i) {
+		futex_down(&loc_fut);
+		/* no-op */
+		compiler_barrier();
+		futex_up(&loc_fut);
+	}
+}
 
 static void thread_func(void *arg)
@@ -125,14 +138,5 @@
 	bench_t *bench = (bench_t*)arg;
 	
-	switch (bench->type) {
-	case T_KERN_FUTEX:
-		kernel_futex_bench(bench);
-		break;
-	case T_LIBC_FUTEX:
-		libc_futex_bench(bench);
-		break;
-	default:
-		assert(false);
-	}
+	bench->func(bench);
 	
 	/* Signal another thread completed. */
@@ -175,12 +179,43 @@
 }
 
+static const char *results_txt = "/tmp/rcu-bench-results.txt";
+
+static bool open_results(void)
+{
+	results_fd = fopen(results_txt, "a");
+	return NULL != results_fd;
+}
+
+static void close_results(void)
+{
+	if (results_fd) {
+		fclose(results_fd);
+	}
+}
+
+static void print_res(const char *fmt, ... )
+{
+	va_list args;
+	va_start(args, fmt);
+	
+	vfprintf(results_fd, fmt, args);
+	vprintf(fmt, args);
+	
+	va_end(args);
+}
+
 static void print_usage(void)
 {
 	printf("rcubench [test-name] [k-iterations] [n-threads] {work-size}\n");
+	printf("Available tests: \n");
+	printf("  ke-futex .. threads down/up a shared futex and do some work when\n");
+	printf("              in critical section; do a little less work outside CS.\n");
+	printf("  lock     .. threads lock/unlock separate futexes.\n");
+	printf("  sema     .. threads down/up separate futexes.\n");
 	printf("eg:\n");
-	printf("  rcubench ke   100000 3 4\n");
-	printf("  rcubench libc 100000 2\n");
-	printf("  rcubench def-ke  \n");
-	printf("  rcubench def-libc\n");
+	printf("  rcubench ke-futex  100000 3 4\n");
+	printf("  rcubench lock 100000 2 ..runs futex_lock/unlock in a loop\n");
+	printf("  rcubench sema 100000 2 ..runs futex_down/up in a loop\n");
+	printf("Results are stored in %s\n", results_txt);
 }
 
@@ -188,40 +223,25 @@
 	const char **err)
 {
-	if (argc < 2) {
-		*err = "Benchmark name not specified";
-		return false;
-	}
-
-	futex_initialize(&bench->ke_bench_fut, 1);
-	fibril_mutex_initialize(&bench->libc_bench_mtx);
-	
-	if (0 == str_cmp(argv[1], "def-ke")) {
-		bench->type = T_KERN_FUTEX;
-		bench->nthreads = 4;
-		bench->iters = 1000 * 1000;
-		bench->array_size = 10;
-		bench->array = malloc(bench->array_size * sizeof(size_t));
-		return NULL != bench->array;
-	} else if (0 == str_cmp(argv[1], "def-libc")) {
-		bench->type = T_LIBC_FUTEX;
-		bench->nthreads = 4;
-		bench->iters = 1000 * 1000;
-		bench->array_size = 0;
-		bench->array = NULL;
-		return true;
-	} else if (0 == str_cmp(argv[1], "ke")) {
-		bench->type = T_KERN_FUTEX;
-	} else if (0 == str_cmp(argv[1], "libc")) {
-		bench->type = T_LIBC_FUTEX;
-	} else {
-		*err = "Unknown test name";
-		return false;
-	}
-	
 	if (argc < 4) {
 		*err = "Not enough parameters";
 		return false;
 	}
-	
+
+	futex_initialize(&bench->bench_fut, 1);
+	
+	if (0 == str_cmp(argv[1], "ke-futex")) {
+		bench->func = kernel_futex_bench;
+	} else if (0 == str_cmp(argv[1], "lock")) {
+		bench->func = libc_futex_lock_bench;
+	} else if (0 == str_cmp(argv[1], "sema")) {
+		bench->func = libc_futex_sema_bench;
+	} else {
+		*err = "Unknown test name";
+		return false;
+	}
+	
+	bench->name = argv[1];
+	
+	/* Determine iteration count. */
 	uint32_t iter_cnt = 0;
 	int ret = str_uint32_t(argv[2], NULL, 0, true, &iter_cnt);
@@ -234,4 +254,5 @@
 	} 
 	
+	/* Determine thread count. */
 	uint32_t thread_cnt = 0;
 	ret = str_uint32_t(argv[3], NULL, 0, true, &thread_cnt);
@@ -244,4 +265,5 @@
 	} 
 	
+	/* Set work array size. */
 	if (argc > 4) {
 		uint32_t work_size = 0;
@@ -258,4 +280,5 @@
 	}
 	
+	/* Allocate work array. */
 	if (0 < bench->array_size) {
 		bench->array = malloc(bench->array_size * sizeof(size_t));
@@ -284,7 +307,8 @@
 	}
 	
-	printf("Running '%s' futex bench in '%zu' threads with '%zu' iterations.\n",
-		bench.type == T_KERN_FUTEX ? "kernel" : "libc", 
-		bench.nthreads, bench.iters);
+	open_results();
+	
+	print_res("Running '%s' futex bench in '%zu' threads with '%zu' iterations.\n",
+		bench.name, bench.nthreads, bench.iters);
 	
 	struct timeval start, end;
@@ -296,14 +320,17 @@
 	int64_t duration = tv_sub(&end, &start);
 	
-	if (0 == duration)
-		duration = 1;
-	
 	uint64_t secs = (uint64_t)duration / 1000 / 1000;
 	uint64_t total_iters = (uint64_t)bench.iters * bench.nthreads;
-	uint64_t iters_per_sec = total_iters * 1000 * 1000 / duration;
-	
-	printf("Completed %" PRIu64 " iterations in %" PRId64  " usecs (%" PRIu64 
+	uint64_t iters_per_sec = 0;
+	
+	if (0 < duration) {
+		iters_per_sec = total_iters * 1000 * 1000 / duration;
+	}
+	
+	print_res("Completed %" PRIu64 " iterations in %" PRId64  " usecs (%" PRIu64 
 		" secs); %" PRIu64 " iters/sec\n", 
 		total_iters, duration, secs, iters_per_sec);	
+
+	close_results();
 	
 	return 0;
