Index: uspace/srv/bd/hr/io.c
===================================================================
--- uspace/srv/bd/hr/io.c	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/io.c	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -39,7 +39,10 @@
 #include <inttypes.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <str.h>
 #include <str_error.h>
 
 #include "io.h"
+#include "parity_stripe.h"
 #include "util.h"
 #include "var.h"
@@ -115,4 +118,208 @@
 }
 
+errno_t hr_io_raid5_basic_reader(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt,
+	    io->data_read);
+	if (rc != EOK)
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+
+	return rc;
+}
+
+errno_t hr_io_raid5_reader(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt,
+	    io->data_read);
+	if (rc != EOK) {
+		hr_stripe_parity_abort(stripe);
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+	}
+
+	hr_stripe_commit_parity(stripe, io->strip_off, io->data_read,
+	    io->cnt * io->vol->bsize);
+
+	return rc;
+}
+
+errno_t hr_io_raid5_basic_writer(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt,
+	    io->data_write);
+	if (rc != EOK)
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+
+	return rc;
+}
+
+errno_t hr_io_raid5_writer(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	hr_stripe_commit_parity(stripe, io->strip_off, io->data_write,
+	    io->cnt * io->vol->bsize);
+
+	hr_stripe_wait_for_parity_commits(stripe);
+	if (stripe->abort)
+		return EAGAIN;
+
+	rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt,
+	    io->data_write);
+	if (rc != EOK)
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+
+	return rc;
+}
+
+errno_t hr_io_raid5_noop_writer(void *arg)
+{
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	hr_stripe_commit_parity(stripe, io->strip_off, io->data_write,
+	    io->cnt * io->vol->bsize);
+
+	return EOK;
+}
+
+errno_t hr_io_raid5_parity_getter(void *arg)
+{
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+	size_t bsize = stripe->vol->bsize;
+
+	hr_stripe_wait_for_parity_commits(stripe);
+	if (stripe->abort)
+		return EAGAIN;
+
+	memcpy(io->data_read, stripe->parity + io->strip_off, io->cnt * bsize);
+
+	return EOK;
+}
+
+errno_t hr_io_raid5_subtract_writer(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	uint8_t *data = malloc_waitok(io->cnt * io->vol->bsize);
+
+	rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt, data);
+	if (rc != EOK) {
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+		hr_stripe_parity_abort(stripe);
+		free(data);
+		return rc;
+	}
+
+	fibril_mutex_lock(&stripe->parity_lock);
+
+	hr_raid5_xor(stripe->parity + io->strip_off, data,
+	    io->cnt * io->vol->bsize);
+
+	hr_raid5_xor(stripe->parity + io->strip_off, io->data_write,
+	    io->cnt * io->vol->bsize);
+
+	stripe->ps_added++;
+	fibril_condvar_broadcast(&stripe->ps_added_cv);
+	fibril_mutex_unlock(&stripe->parity_lock);
+
+	hr_stripe_wait_for_parity_commits(stripe);
+	if (stripe->abort)
+		return EAGAIN;
+
+	rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt,
+	    io->data_write);
+	if (rc != EOK)
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+
+	free(data);
+
+	return rc;
+}
+
+errno_t hr_io_raid5_reconstruct_reader(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	size_t ext_idx = io->extent;
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	uint8_t *data = malloc_waitok(io->cnt * io->vol->bsize);
+
+	rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt, data);
+	if (rc != EOK) {
+		hr_stripe_parity_abort(stripe);
+		io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc);
+		free(data);
+		return rc;
+	}
+
+	hr_stripe_commit_parity(stripe, io->strip_off, data,
+	    io->cnt * io->vol->bsize);
+
+	free(data);
+
+	return EOK;
+}
+
+errno_t hr_io_raid5_parity_writer(void *arg)
+{
+	errno_t rc;
+
+	hr_io_raid5_t *io = arg;
+	hr_stripe_t *stripe = io->stripe;
+
+	hr_extent_t *extents = (hr_extent_t *)&io->vol->extents;
+
+	hr_stripe_wait_for_parity_commits(stripe);
+
+	if (stripe->abort)
+		return EAGAIN;
+
+	rc = hr_write_direct(extents[io->extent].svc_id, io->ba, io->cnt,
+	    stripe->parity + io->strip_off);
+	if (rc != EOK)
+		io->vol->hr_ops.ext_state_cb(io->vol, stripe->p_extent, rc);
+
+	return rc;
+}
+
 static errno_t exec_io_op(hr_io_t *io)
 {
@@ -154,5 +361,5 @@
 		break;
 	default:
-		return EINVAL;
+		assert(0);
 	}
 
Index: uspace/srv/bd/hr/io.h
===================================================================
--- uspace/srv/bd/hr/io.h	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/io.h	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -37,18 +37,28 @@
 #define _HR_IO_H
 
+#include "parity_stripe.h"
 #include "var.h"
+#include "util.h"
 
 typedef struct hr_io {
-	hr_bd_op_type_t type;
+	hr_bd_op_type_t type; /* read/write/sync */
 	uint64_t ba;
 	uint64_t cnt;
-	size_t extent;
 	void *data_read;
 	const void *data_write;
-	hr_volume_t *vol;
+	size_t extent; /* extent index */
+	hr_volume_t *vol; /* volume back-pointer */
 } hr_io_t;
 
-errno_t hr_io_worker(void *);
-errno_t hr_io_worker_basic(void *);
+typedef struct hr_io_raid5 {
+	uint64_t ba;
+	uint64_t cnt;
+	void *data_read;
+	const void *data_write;
+	size_t extent;
+	uint64_t strip_off; /* needed for offseting parity commits */
+	hr_stripe_t *stripe;
+	hr_volume_t *vol;
+} hr_io_raid5_t;
 
 extern errno_t hr_write_direct(service_id_t, uint64_t, size_t, const void *);
@@ -56,4 +66,17 @@
 extern errno_t hr_sync_cache(service_id_t, uint64_t, size_t);
 
+extern errno_t hr_io_worker(void *);
+extern errno_t hr_io_worker_basic(void *);
+
+extern errno_t hr_io_raid5_basic_reader(void *);
+extern errno_t hr_io_raid5_reader(void *);
+extern errno_t hr_io_raid5_basic_writer(void *);
+extern errno_t hr_io_raid5_writer(void *);
+extern errno_t hr_io_raid5_noop_writer(void *);
+extern errno_t hr_io_raid5_parity_getter(void *);
+extern errno_t hr_io_raid5_subtract_writer(void *);
+extern errno_t hr_io_raid5_reconstruct_reader(void *);
+extern errno_t hr_io_raid5_parity_writer(void *);
+
 #endif
 
Index: uspace/srv/bd/hr/meson.build
===================================================================
--- uspace/srv/bd/hr/meson.build	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/meson.build	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -37,4 +37,5 @@
             'metadata/foreign/softraid/softraid.c',
             'metadata/native.c',
+            'parity_stripe.c',
             'raid0.c',
             'raid1.c',
Index: uspace/srv/bd/hr/parity_stripe.c
===================================================================
--- uspace/srv/bd/hr/parity_stripe.c	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
+++ uspace/srv/bd/hr/parity_stripe.c	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -0,0 +1,926 @@
+/*
+ * Copyright (c) 2025 Miroslav Cimerman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * - The name of the author may not be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** @addtogroup hr
+ * @{
+ */
+/**
+ * @file
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <str.h>
+
+#include "io.h"
+#include "parity_stripe.h"
+#include "util.h"
+#include "var.h"
+
+static void execute_stripe_degraded_mixed(hr_stripe_t *, size_t);
+static void execute_stripe_degraded(hr_stripe_t *, size_t);
+static void execute_stripe_optimal_reconstruct(hr_stripe_t *);
+static void execute_stripe_optimal_subtract(hr_stripe_t *);
+static void execute_write_stripe(hr_stripe_t *, size_t);
+static void execute_read_stripe(hr_stripe_t *, size_t);
+static void execute_stripe_degraded_good(hr_stripe_t *, size_t);
+static bool hr_stripe_range_non_extension(const range_t *, const range_t *,
+    range_t *);
+static size_t hr_stripe_merge_extent_spans(hr_stripe_t *, size_t, range_t [2]);
+static void hr_reset_stripe(hr_stripe_t *);
+static void hr_stripe_extend_range(range_t *, const range_t *);
+static bool hr_ranges_overlap(const range_t *, const range_t *, range_t *);
+
+hr_stripe_t *hr_create_stripes(hr_volume_t *vol, size_t cnt, bool write)
+{
+	hr_stripe_t *stripes = calloc(cnt, sizeof(*stripes));
+	if (stripes == NULL)
+		return NULL;
+
+	for (size_t i = 0; i < cnt; i++) {
+		fibril_mutex_initialize(&stripes[i].parity_lock);
+		fibril_condvar_initialize(&stripes[i].ps_added_cv);
+		stripes[i].vol = vol;
+		stripes[i].write = write;
+		stripes[i].parity = calloc(1, vol->strip_size);
+		if (stripes[i].parity == NULL)
+			goto error;
+		stripes[i].extent_span =
+		    calloc(vol->extent_no, sizeof(*stripes[i].extent_span));
+		if (stripes[i].extent_span == NULL)
+			goto error;
+	}
+
+	return stripes;
+error:
+	hr_destroy_stripes(stripes, cnt);
+	return NULL;
+}
+
+void hr_destroy_stripes(hr_stripe_t *stripes, size_t cnt)
+{
+	if (stripes == NULL)
+		return;
+
+	for (size_t i = 0; i < cnt; i++) {
+		if (stripes[i].parity != NULL)
+			free(stripes[i].parity);
+		if (stripes[i].extent_span != NULL)
+			free(stripes[i].extent_span);
+	}
+
+	free(stripes);
+}
+
+void hr_stripe_commit_parity(hr_stripe_t *stripe, uint64_t strip_off,
+    const void *data, uint64_t size)
+{
+	fibril_mutex_lock(&stripe->parity_lock);
+	hr_raid5_xor(stripe->parity + strip_off, data, size);
+	stripe->ps_added++;
+	fibril_condvar_broadcast(&stripe->ps_added_cv);
+	fibril_mutex_unlock(&stripe->parity_lock);
+}
+
+void hr_stripe_wait_for_parity_commits(hr_stripe_t *stripe)
+{
+	fibril_mutex_lock(&stripe->parity_lock);
+	while ((!stripe->p_count_final ||
+	    stripe->ps_added < stripe->ps_to_be_added) && !stripe->abort) {
+		fibril_condvar_wait(&stripe->ps_added_cv, &stripe->parity_lock);
+	}
+	fibril_mutex_unlock(&stripe->parity_lock);
+}
+
+void hr_stripe_parity_abort(hr_stripe_t *stripe)
+{
+	fibril_mutex_lock(&stripe->parity_lock);
+	stripe->abort = true;
+	fibril_condvar_broadcast(&stripe->ps_added_cv);
+	fibril_mutex_unlock(&stripe->parity_lock);
+}
+
+void execute_stripe(hr_stripe_t *stripe, size_t bad_extent)
+{
+	if (stripe->write)
+		execute_write_stripe(stripe, bad_extent);
+	else
+		execute_read_stripe(stripe, bad_extent);
+}
+
+void wait_for_stripe(hr_stripe_t *stripe)
+{
+	stripe->rc = hr_fgroup_wait(stripe->worker_group, NULL, NULL);
+	if (stripe->rc == EAGAIN)
+		hr_reset_stripe(stripe);
+	else
+		stripe->done = true;
+}
+
+static void execute_stripe_degraded_good(hr_stripe_t *stripe, size_t bad_extent)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	stripe->ps_to_be_added = stripe->strips_touched; /* writers */
+	stripe->ps_to_be_added += stripe->range_count; /* parity readers */
+	stripe->p_count_final = true;
+
+	size_t worker_cnt = stripe->strips_touched + stripe->range_count * 2;
+	stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		if (e == bad_extent || e == stripe->p_extent)
+			continue;
+		if (stripe->extent_span[e].cnt == 0)
+			continue;
+
+		hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
+		io->extent = e;
+		io->data_write = stripe->extent_span[e].data_write;
+		io->ba = stripe->extent_span[e].range.start;
+		io->cnt = stripe->extent_span[e].cnt;
+		io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
+		io->vol = vol;
+		io->stripe = stripe;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_subtract_writer, io);
+	}
+
+	for (size_t r = 0; r < stripe->range_count; r++) {
+		hr_io_raid5_t *p_reader = hr_fgroup_alloc(stripe->worker_group);
+		p_reader->extent = stripe->p_extent;
+		p_reader->ba = stripe->total_height[r].start;
+		p_reader->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		p_reader->vol = vol;
+		p_reader->stripe = stripe;
+
+		p_reader->strip_off = p_reader->ba;
+		hr_sub_data_offset(vol, &p_reader->strip_off);
+		p_reader->strip_off %= vol->strip_size / vol->bsize;
+		p_reader->strip_off *= vol->bsize;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_reconstruct_reader, p_reader);
+
+		hr_io_raid5_t *p_writer = hr_fgroup_alloc(stripe->worker_group);
+		p_writer->extent = stripe->p_extent;
+		p_writer->ba = stripe->total_height[r].start;
+		p_writer->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		p_writer->vol = vol;
+		p_writer->stripe = stripe;
+
+		p_writer->strip_off = p_writer->ba;
+		hr_sub_data_offset(vol, &p_writer->strip_off);
+		p_writer->strip_off %= vol->strip_size / vol->bsize;
+		p_writer->strip_off *= vol->bsize;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_parity_writer, p_writer);
+	}
+}
+
+static void execute_stripe_degraded_mixed(hr_stripe_t *stripe, size_t bad_extent)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	size_t worker_cnt = (vol->extent_no - 2) * 3 + 3; /* upper bound */
+	stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
+
+	stripe->ps_to_be_added = 1;
+
+	hr_io_raid5_t *nop_write = hr_fgroup_alloc(stripe->worker_group);
+	nop_write->ba = stripe->extent_span[bad_extent].range.start;
+	nop_write->cnt = stripe->extent_span[bad_extent].cnt;
+	nop_write->strip_off =
+	    stripe->extent_span[bad_extent].strip_off * vol->bsize;
+	nop_write->data_write = stripe->extent_span[bad_extent].data_write;
+	nop_write->vol = vol;
+	nop_write->stripe = stripe;
+
+	hr_fgroup_submit(stripe->worker_group, hr_io_raid5_noop_writer,
+	    nop_write);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		if (e == bad_extent || e == stripe->p_extent)
+			continue;
+
+		range_t uncommon = { 0, 0 };
+		bool has_uncommon;
+		has_uncommon = hr_stripe_range_non_extension(
+		    &stripe->extent_span[bad_extent].range,
+		    &stripe->extent_span[e].range,
+		    &uncommon);
+
+		if (stripe->extent_span[e].cnt == 0 || has_uncommon) {
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			if (stripe->extent_span[bad_extent].cnt == 0) {
+				io->ba =
+				    stripe->extent_span[bad_extent].range.start;
+				io->cnt = stripe->extent_span[bad_extent].cnt;
+			} else {
+				io->ba = uncommon.start;
+				io->cnt = uncommon.end - uncommon.start + 1;
+			}
+			io->strip_off =
+			    stripe->extent_span[bad_extent].strip_off *
+			    vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_reconstruct_reader, io);
+
+			if (stripe->extent_span[e].cnt == 0)
+				continue;
+		}
+
+		range_t overlap_range;
+		bool overlap_up = true;
+		if (hr_ranges_overlap(&stripe->extent_span[e].range,
+		    &stripe->extent_span[bad_extent].range,
+		    &overlap_range)) {
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->ba = overlap_range.start;
+			io->cnt = overlap_range.end - overlap_range.start + 1;
+
+			size_t diff = overlap_range.start -
+			    stripe->extent_span[e].range.start;
+
+			io->strip_off =
+			    (stripe->extent_span[e].strip_off + diff) *
+			    vol->bsize;
+
+			io->data_write = stripe->extent_span[e].data_write;
+			io->data_write += diff * vol->bsize;
+			if (diff == 0)
+				overlap_up = false;
+
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_writer, io);
+		}
+
+		bool has_independent;
+		range_t independent = { 0, 0 };
+		has_independent = hr_stripe_range_non_extension(
+		    &stripe->extent_span[e].range,
+		    &stripe->extent_span[bad_extent].range,
+		    &independent);
+		if (has_independent) {
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->ba = independent.start;
+			io->cnt = independent.end - independent.start + 1;
+			size_t diff = 0;
+			if (!overlap_up) {
+				diff = overlap_range.end -
+				    overlap_range.start + 1;
+			}
+			io->strip_off =
+			    (stripe->extent_span[e].strip_off + diff) *
+			    vol->bsize;
+			io->data_write = stripe->extent_span[e].data_write;
+			io->data_write += diff * vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_subtract_writer, io);
+		}
+	}
+
+	bool has_independent = false;
+	range_t independent = { 0, 0 };
+	for (size_t r = 0; r < stripe->range_count; r++) {
+		has_independent = hr_stripe_range_non_extension(
+		    &stripe->total_height[r],
+		    &stripe->extent_span[bad_extent].range,
+		    &independent);
+		if (has_independent) {
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = stripe->p_extent;
+			io->ba = independent.start;
+			io->cnt = independent.end - independent.start + 1;
+
+			io->strip_off = io->ba;
+			hr_sub_data_offset(vol, &io->strip_off);
+			io->strip_off %= vol->strip_size / vol->bsize;
+			io->strip_off *= vol->bsize;
+
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_reconstruct_reader, io);
+		}
+
+		hr_io_raid5_t *pio = hr_fgroup_alloc(stripe->worker_group);
+		pio->extent = stripe->p_extent;
+		pio->ba = stripe->total_height[r].start;
+		pio->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		pio->strip_off = pio->ba;
+		hr_sub_data_offset(vol, &pio->strip_off);
+		pio->strip_off %= vol->strip_size / vol->bsize;
+		pio->strip_off *= vol->bsize;
+		pio->vol = vol;
+		pio->stripe = stripe;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_parity_writer, pio);
+	}
+
+	stripe->p_count_final = true;
+	fibril_condvar_broadcast(&stripe->ps_added_cv);
+}
+
+static void execute_stripe_degraded(hr_stripe_t *stripe, size_t bad_extent)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	/* parity is bad, issue non-redundant writes */
+	if (bad_extent == stripe->p_extent) {
+		stripe->worker_group =
+		    hr_fgroup_create(vol->fge, stripe->strips_touched);
+
+		for (size_t e = 0; e < vol->extent_no; e++) {
+			if (e == bad_extent)
+				continue;
+			if (stripe->extent_span[e].cnt == 0)
+				continue;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->data_write = stripe->extent_span[e].data_write;
+			io->ba = stripe->extent_span[e].range.start;
+			io->cnt = stripe->extent_span[e].cnt;
+			io->strip_off =
+			    stripe->extent_span[e].strip_off * vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_basic_writer, io);
+		}
+
+		return;
+	}
+
+	stripe->range_count = hr_stripe_merge_extent_spans(stripe,
+	    vol->extent_no, stripe->total_height);
+
+	if (stripe->extent_span[bad_extent].cnt > 0)
+		execute_stripe_degraded_mixed(stripe, bad_extent);
+	else
+		execute_stripe_degraded_good(stripe, bad_extent);
+}
+
+static void execute_stripe_optimal_reconstruct(hr_stripe_t *stripe)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	stripe->range_count = hr_stripe_merge_extent_spans(stripe,
+	    vol->extent_no, stripe->total_height);
+
+	bool full_stripe = false;
+	size_t worker_cnt;
+	if (stripe->strips_touched == vol->extent_no - 1 &&
+	    stripe->partial_strips_touched == 0) {
+		/* full-stripe */
+		worker_cnt = stripe->strips_touched; /* writers */
+		worker_cnt += 1; /* parity writer */
+
+		stripe->ps_to_be_added = stripe->strips_touched;
+		stripe->p_count_final = true;
+
+		full_stripe = true;
+	} else {
+		worker_cnt = stripe->strips_touched; /* writers */
+
+		/* readers (upper bound) */
+		worker_cnt += (vol->extent_no - 1) - stripe->strips_touched;
+		worker_cnt += stripe->partial_strips_touched;
+
+		worker_cnt += stripe->range_count; /* parity writer(s) */
+
+		stripe->ps_to_be_added = stripe->strips_touched; /* writers */
+	}
+
+	stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		if (e == stripe->p_extent)
+			continue;
+
+		if (stripe->extent_span[e].cnt == 0)
+			continue;
+
+		hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
+		io->extent = e;
+		io->data_write = stripe->extent_span[e].data_write;
+		io->ba = stripe->extent_span[e].range.start;
+		io->cnt = stripe->extent_span[e].cnt;
+		io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
+		io->vol = vol;
+		io->stripe = stripe;
+
+		hr_fgroup_submit(stripe->worker_group, hr_io_raid5_writer, io);
+	}
+
+	for (size_t r = 0; r < stripe->range_count; r++) {
+		if (full_stripe)
+			goto skip_readers;
+		for (size_t e = 0; e < vol->extent_no; e++) {
+			if (e == stripe->p_extent)
+				continue;
+
+			range_t range_extension = { 0, 0 };
+
+			bool need_reader = false;
+			if (stripe->extent_span[e].cnt == 0) {
+				range_extension = stripe->total_height[r];
+				need_reader = true;
+			} else {
+				need_reader = hr_stripe_range_non_extension(
+				    &stripe->total_height[r],
+				    &stripe->extent_span[e].range,
+				    &range_extension);
+			}
+
+			if (need_reader) {
+				stripe->ps_to_be_added++;
+
+				hr_io_raid5_t *io =
+				    hr_fgroup_alloc(stripe->worker_group);
+				io->extent = e;
+				io->ba = range_extension.start;
+				io->cnt = range_extension.end -
+				    range_extension.start + 1;
+				io->vol = vol;
+				io->stripe = stripe;
+
+				io->strip_off = io->ba;
+				hr_sub_data_offset(vol, &io->strip_off);
+				io->strip_off %= vol->strip_size / vol->bsize;
+				io->strip_off *= vol->bsize;
+
+				hr_fgroup_submit(stripe->worker_group,
+				    hr_io_raid5_reconstruct_reader, io);
+			}
+		}
+
+		stripe->p_count_final = true;
+		fibril_condvar_broadcast(&stripe->ps_added_cv);
+
+	skip_readers:
+
+		/* parity writer */
+		hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
+		io->extent = stripe->p_extent;
+		io->ba = stripe->total_height[r].start;
+		io->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		io->vol = vol;
+		io->stripe = stripe;
+
+		io->strip_off = io->ba;
+		hr_sub_data_offset(vol, &io->strip_off);
+		io->strip_off %= vol->strip_size / vol->bsize;
+		io->strip_off *= vol->bsize;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_parity_writer, io);
+	}
+}
+
+static void execute_stripe_optimal_subtract(hr_stripe_t *stripe)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	stripe->range_count = hr_stripe_merge_extent_spans(stripe,
+	    vol->extent_no, stripe->total_height);
+
+	size_t worker_cnt;
+	worker_cnt = stripe->strips_touched; /* writers */
+	worker_cnt += stripe->range_count * 2; /* parity readers & writers */
+
+	stripe->ps_to_be_added = stripe->strips_touched; /* writers */
+	stripe->ps_to_be_added += stripe->range_count; /* parity readers */
+	stripe->p_count_final = true;
+
+	stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		if (e == stripe->p_extent)
+			continue;
+
+		if (stripe->extent_span[e].cnt == 0)
+			continue;
+
+		hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
+		io->extent = e;
+		io->data_write = stripe->extent_span[e].data_write;
+		io->ba = stripe->extent_span[e].range.start;
+		io->cnt = stripe->extent_span[e].cnt;
+		io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
+		io->vol = vol;
+		io->stripe = stripe;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_subtract_writer, io);
+	}
+
+	for (size_t r = 0; r < stripe->range_count; r++) {
+		hr_io_raid5_t *p_reader = hr_fgroup_alloc(stripe->worker_group);
+		p_reader->extent = stripe->p_extent;
+		p_reader->ba = stripe->total_height[r].start;
+		p_reader->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		p_reader->vol = vol;
+		p_reader->stripe = stripe;
+
+		p_reader->strip_off = p_reader->ba;
+		hr_sub_data_offset(vol, &p_reader->strip_off);
+		p_reader->strip_off %= vol->strip_size / vol->bsize;
+		p_reader->strip_off *= vol->bsize;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_reconstruct_reader, p_reader);
+
+		hr_io_raid5_t *p_writer = hr_fgroup_alloc(stripe->worker_group);
+		p_writer->extent = stripe->p_extent;
+		p_writer->ba = stripe->total_height[r].start;
+		p_writer->cnt = stripe->total_height[r].end -
+		    stripe->total_height[r].start + 1;
+		p_writer->vol = vol;
+		p_writer->stripe = stripe;
+
+		p_writer->strip_off = p_writer->ba;
+		hr_sub_data_offset(vol, &p_writer->strip_off);
+		p_writer->strip_off %= vol->strip_size / vol->bsize;
+		p_writer->strip_off *= vol->bsize;
+
+		hr_fgroup_submit(stripe->worker_group,
+		    hr_io_raid5_parity_writer, p_writer);
+	}
+
+}
+
+static void execute_write_stripe(hr_stripe_t *stripe, size_t bad_extent)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	if (bad_extent < vol->extent_no) {
+		execute_stripe_degraded(stripe, bad_extent);
+		return;
+	}
+
+	if (stripe->subtract)
+		execute_stripe_optimal_subtract(stripe);
+	else
+		execute_stripe_optimal_reconstruct(stripe);
+}
+
+static void execute_read_stripe(hr_stripe_t *stripe, size_t bad_extent)
+{
+	hr_volume_t *vol = stripe->vol;
+
+	/* no parity involved */
+	if (bad_extent == vol->extent_no ||
+	    bad_extent == stripe->p_extent ||
+	    stripe->extent_span[bad_extent].cnt == 0) {
+		stripe->worker_group =
+		    hr_fgroup_create(vol->fge, stripe->strips_touched);
+		for (size_t e = 0; e < vol->extent_no; e++) {
+			if (e == bad_extent || e == stripe->p_extent)
+				continue;
+			if (stripe->extent_span[e].cnt == 0)
+				continue;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->data_read = stripe->extent_span[e].data_read;
+			io->ba = stripe->extent_span[e].range.start;
+			io->cnt = stripe->extent_span[e].cnt;
+			io->strip_off =
+			    stripe->extent_span[e].strip_off * vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_basic_reader, io);
+		}
+
+		return;
+	}
+
+	/* parity involved */
+
+	size_t worker_cnt = (vol->extent_no - 2) * 2 + 1; /* upper bound */
+	stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
+
+	stripe->ps_to_be_added = 0;
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		if (e == bad_extent || e == stripe->p_extent)
+			continue;
+
+		range_t uncommon = { 0, 0 };
+		bool has_uncommon;
+		has_uncommon = hr_stripe_range_non_extension(
+		    &stripe->extent_span[bad_extent].range,
+		    &stripe->extent_span[e].range,
+		    &uncommon);
+
+		if (stripe->extent_span[e].cnt == 0 || has_uncommon) {
+
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			if (stripe->extent_span[bad_extent].cnt == 0) {
+				io->ba =
+				    stripe->extent_span[bad_extent].range.start;
+				io->cnt = stripe->extent_span[bad_extent].cnt;
+			} else {
+				io->ba = uncommon.start;
+				io->cnt = uncommon.end - uncommon.start + 1;
+			}
+			io->strip_off =
+			    stripe->extent_span[bad_extent].strip_off *
+			    vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_reconstruct_reader, io);
+
+			if (stripe->extent_span[e].cnt == 0)
+				continue;
+		}
+
+		range_t overlap_range;
+		bool overlap_up = true;
+		if (hr_ranges_overlap(&stripe->extent_span[e].range,
+		    &stripe->extent_span[bad_extent].range,
+		    &overlap_range)) {
+
+			stripe->ps_to_be_added++;
+
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->ba = overlap_range.start;
+			io->cnt = overlap_range.end - overlap_range.start + 1;
+
+			size_t diff = overlap_range.start -
+			    stripe->extent_span[e].range.start;
+			io->strip_off =
+			    (stripe->extent_span[e].strip_off + diff) *
+			    vol->bsize;
+
+			io->data_read = stripe->extent_span[e].data_read;
+			io->data_read += diff * vol->bsize;
+			if (diff == 0)
+				overlap_up = false;
+
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_reader, io);
+		}
+
+		bool has_independent;
+		range_t independent = { 0, 0 };
+		has_independent = hr_stripe_range_non_extension(
+		    &stripe->extent_span[e].range,
+		    &uncommon,
+		    &independent);
+		if (has_independent) {
+			hr_io_raid5_t *io =
+			    hr_fgroup_alloc(stripe->worker_group);
+			io->extent = e;
+			io->ba = independent.start;
+			io->cnt = independent.end - independent.start + 1;
+			size_t diff = 0;
+			if (!overlap_up) {
+				diff =
+				    overlap_range.end - overlap_range.start + 1;
+			}
+			io->strip_off =
+			    (stripe->extent_span[e].strip_off + diff) *
+			    vol->bsize;
+			io->data_read = stripe->extent_span[e].data_read;
+			io->data_read += diff * vol->bsize;
+			io->vol = vol;
+			io->stripe = stripe;
+
+			hr_fgroup_submit(stripe->worker_group,
+			    hr_io_raid5_basic_reader, io);
+		}
+	}
+
+	stripe->ps_to_be_added++;
+
+	hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
+	io->extent = stripe->p_extent;
+	io->ba = stripe->extent_span[bad_extent].range.start;
+	io->cnt = stripe->extent_span[bad_extent].cnt;
+	io->strip_off = stripe->extent_span[bad_extent].strip_off * vol->bsize;
+	io->vol = vol;
+	io->stripe = stripe;
+
+	hr_fgroup_submit(stripe->worker_group, hr_io_raid5_reconstruct_reader,
+	    io);
+
+	stripe->p_count_final = true;
+	fibril_condvar_broadcast(&stripe->ps_added_cv);
+
+	hr_io_raid5_t *pcopier_io = hr_fgroup_alloc(stripe->worker_group);
+	pcopier_io->cnt = stripe->extent_span[bad_extent].cnt;
+	pcopier_io->strip_off =
+	    stripe->extent_span[bad_extent].strip_off * vol->bsize;
+	pcopier_io->data_read = stripe->extent_span[bad_extent].data_read;
+	pcopier_io->vol = vol;
+	pcopier_io->stripe = stripe;
+
+	hr_fgroup_submit(stripe->worker_group, hr_io_raid5_parity_getter,
+	    pcopier_io);
+}
+
+/** Get non-overlapping part of 2 ranges.
+ *
+ *  Return part of @param r1 not in @param r2.
+ *
+ *  @param r1 Main range.
+ *  @param r2 Queried range.
+ *  @param out Place to store resulting range.
+ *
+ *  @return true if output range is non-empty, else false.
+ */
+static bool hr_stripe_range_non_extension(const range_t *r1, const range_t *r2,
+    range_t *out)
+{
+	if (r1->end < r2->start) {
+		*out = *r1;
+		return true;
+	}
+
+	if (r1->start > r2->end) {
+		*out = *r1;
+		return true;
+	}
+
+	if (r1->start < r2->start && r1->end >= r2->start) {
+		out->start = r1->start;
+		out->end = r2->start - 1;
+		return out->start <= out->end;
+	}
+
+	if (r1->start <= r2->end && r1->end > r2->end) {
+		out->start = r2->end + 1;
+		out->end = r1->end;
+		return out->start <= out->end;
+	}
+
+	return false;
+}
+
+/** Merge adjascent or overlapping extent spans.
+ *
+ *  @param s Stripe.
+ *  @param extent_no Number of extents.
+ *  @param out Place to store resulting ranges.
+ *
+ *  @return Number of resulting ranges.
+ */
+static size_t hr_stripe_merge_extent_spans(hr_stripe_t *s, size_t extent_no,
+    range_t out[2])
+{
+	size_t out_count = 0;
+
+	for (size_t i = 0; i < extent_no; i++) {
+		if (s->extent_span[i].cnt == 0)
+			continue;
+		const range_t *r = &s->extent_span[i].range;
+		bool merged = false;
+
+		for (size_t j = 0; j < out_count; j++) {
+			if (hr_ranges_overlap(&out[j], r, NULL)) {
+				hr_stripe_extend_range(&out[j], r);
+				merged = true;
+
+				if (out_count == 2 &&
+				    hr_ranges_overlap(&out[0], &out[1], NULL)) {
+					hr_stripe_extend_range(&out[0], &out[1]);
+					out_count = 1;
+				}
+
+				break;
+			}
+		}
+
+		if (!merged) {
+			assert(out_count < 2);
+			out[out_count++] = *r;
+		}
+	}
+
+	return out_count;
+}
+
+static void hr_reset_stripe(hr_stripe_t *stripe)
+{
+	printf("%s\n", __func__);
+
+	memset(stripe->parity, 0, stripe->vol->strip_size);
+	stripe->ps_added = 0;
+	stripe->ps_to_be_added = 0;
+	stripe->p_count_final = false;
+
+	stripe->rc = EOK;
+	stripe->abort = false;
+	stripe->done = false;
+}
+
+/** Extend a range.
+ *
+ *  @param r1 Output range.
+ *  @param r2 Range to extend the output one with.
+ *
+ */
+static void hr_stripe_extend_range(range_t *r1, const range_t *r2)
+{
+	if (r2->start < r1->start)
+		r1->start = r2->start;
+	if (r2->end > r1->end)
+		r1->end = r2->end;
+}
+
+static bool hr_ranges_overlap(const range_t *a, const range_t *b, range_t *out)
+{
+	uint64_t start = a->start > b->start ? a->start : b->start;
+	uint64_t end = a->end < b->end ? a->end : b->end;
+
+	if (start <= end) {
+		if (out != NULL) {
+			out->start = start;
+			out->end = end;
+		}
+
+		return true;
+	}
+
+	return false;
+}
+
+/** @}
+ */
Index: uspace/srv/bd/hr/parity_stripe.h
===================================================================
--- uspace/srv/bd/hr/parity_stripe.h	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
+++ uspace/srv/bd/hr/parity_stripe.h	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2025 Miroslav Cimerman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ * - The name of the author may not be used to endorse or promote products
+ *   derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** @addtogroup hr
+ * @{
+ */
+/**
+ * @file
+ */
+
+#ifndef _HR_STRIPE_H
+#define _HR_STRIPE_H
+
+#include <fibril_synch.h>
+#include <errno.h>
+#include <hr.h>
+#include <io/log.h>
+
+#include "io.h"
+#include "var.h"
+
+typedef struct {
+	uint64_t start;
+	uint64_t end;
+} range_t;
+
+typedef struct hr_stripe {
+	hr_volume_t *vol;
+	bool write;
+	bool subtract;
+	size_t strips_touched;
+	size_t partial_strips_touched;
+	struct {
+		range_t range;
+		uint64_t cnt;
+		uint64_t strip_off;
+		const void *data_write;
+		void *data_read;
+	} *extent_span;
+	uint64_t p_extent; /* parity extent index for this stripe */
+
+	hr_fgroup_t *worker_group;
+
+	errno_t rc;
+	bool abort;
+	bool done;
+
+	fibril_mutex_t parity_lock;
+	uint8_t *parity; /* the actual parity strip */
+
+	/* parity writers waiting until this many parity commits */
+	size_t ps_to_be_added;
+	size_t ps_added; /* number of parities commited to stripe */
+	fibril_condvar_t ps_added_cv;
+	bool p_count_final;
+
+	/*
+	 * Possibly need 2 ranges because single IO that partially spans
+	 * 2 strips and overflows to second one without creating an adjacent
+	 * range results in parity not being continous.
+	 *
+	 * Example: 2+1 extents, 4 block strip, last extent parity
+	 *
+	 *  E0      E1     P
+	 * +----+ +----+ +-----+
+	 * |    | | IO | | IOP |
+	 * |----| |----| |-----|
+	 * |    | |    | |     |
+	 * |----| |----| |-----|
+	 * |    | |    | |     |
+	 * |----| |----| |-----|
+	 * | IO | |    | | IOP |
+	 * +----+ +----+ +-----+
+	 *
+	 * - need 2 parity writers
+	 */
+	range_t total_height[2]; /* for knowing writing parity range(s) */
+	size_t range_count;
+} hr_stripe_t;
+
+extern hr_stripe_t *hr_create_stripes(hr_volume_t *, size_t, bool);
+extern void hr_destroy_stripes(hr_stripe_t *, size_t);
+extern void hr_stripe_commit_parity(hr_stripe_t *, uint64_t, const void *,
+    uint64_t);
+extern void hr_stripe_wait_for_parity_commits(hr_stripe_t *);
+extern void hr_stripe_parity_abort(hr_stripe_t *);
+extern void execute_stripe(hr_stripe_t *, size_t);
+extern void wait_for_stripe(hr_stripe_t *);
+
+#endif
+
+/** @}
+ */
Index: uspace/srv/bd/hr/raid5.c
===================================================================
--- uspace/srv/bd/hr/raid5.c	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/raid5.c	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -50,21 +50,17 @@
 #include <str_error.h>
 
+#include "io.h"
+#include "parity_stripe.h"
 #include "superblock.h"
 #include "util.h"
 #include "var.h"
 
-static errno_t hr_raid5_vol_usable(hr_volume_t *);
-static ssize_t hr_raid5_get_bad_ext(hr_volume_t *);
-static errno_t hr_raid5_update_vol_state(hr_volume_t *);
-static void xor(void *, const void *, size_t);
-
-static errno_t hr_raid5_read_degraded(hr_volume_t *, uint64_t, uint64_t,
-    void *, size_t);
-static errno_t hr_raid5_write(hr_volume_t *, uint64_t, uint64_t, aoff64_t,
-    const void *, size_t);
-static errno_t hr_raid5_write_parity(hr_volume_t *, uint64_t, uint64_t,
-    uint64_t, const void *, size_t);
-static errno_t hr_raid5_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t,
-    void *, const void *, size_t);
+static void hr_raid5_vol_state_eval_forced(hr_volume_t *);
+
+static size_t hr_raid5_parity_extent(hr_level_t, hr_layout_t, size_t,
+    uint64_t);
+static size_t hr_raid5_data_extent(hr_level_t, hr_layout_t, size_t, uint64_t,
+    uint64_t);
+
 static errno_t hr_raid5_rebuild(void *);
 
@@ -104,19 +100,18 @@
 	}
 
-	fibril_rwlock_write_lock(&new_volume->states_lock);
-
-	errno_t rc = hr_raid5_update_vol_state(new_volume);
-	if (rc != EOK) {
-		HR_NOTE("\"%s\": unusable state, not creating\n",
-		    new_volume->devname);
-		fibril_rwlock_write_unlock(&new_volume->states_lock);
-		return rc;
-	}
-
 	bd_srvs_init(&new_volume->hr_bds);
 	new_volume->hr_bds.ops = &hr_raid5_bd_ops;
 	new_volume->hr_bds.sarg = new_volume;
 
-	fibril_rwlock_write_unlock(&new_volume->states_lock);
+	hr_raid5_vol_state_eval_forced(new_volume);
+
+	fibril_rwlock_read_lock(&new_volume->states_lock);
+	hr_vol_state_t state = new_volume->state;
+	fibril_rwlock_read_unlock(&new_volume->states_lock);
+	if (state == HR_VOL_FAULTY || state == HR_VOL_NONE) {
+		HR_NOTE("\"%s\": unusable state, not creating\n",
+		    new_volume->devname);
+		return EINVAL;
+	}
 
 	return EOK;
@@ -133,12 +128,8 @@
 		return EINVAL;
 
-	uint64_t total_blkno = vol->truncated_blkno * vol->extent_no;
-
 	vol->data_offset = vol->meta_ops->get_data_offset();
 
-	vol->data_blkno = total_blkno;
-	/* count md blocks */
-	vol->data_blkno -= vol->meta_ops->get_size() * vol->extent_no;
-	vol->data_blkno -= vol->truncated_blkno; /* count parity */
+	uint64_t single_sz = vol->truncated_blkno - vol->meta_ops->get_size();
+	vol->data_blkno = single_sz * (vol->extent_no - 1);
 
 	vol->strip_size = HR_STRIP_SIZE;
@@ -154,89 +145,454 @@
 void hr_raid5_vol_state_eval(hr_volume_t *vol)
 {
-	fibril_mutex_lock(&vol->lock);
+	HR_DEBUG("%s()", __func__);
+
+	bool exp = true;
+	if (!atomic_compare_exchange_strong(&vol->state_dirty, &exp, false))
+		return;
+
+	vol->meta_ops->inc_counter(vol);
+	(void)vol->meta_ops->save(vol, WITH_STATE_CALLBACK);
+
+	hr_raid5_vol_state_eval_forced(vol);
+}
+
+errno_t hr_raid5_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
+{
+	HR_DEBUG("%s()", __func__);
+
+	errno_t rc = hr_util_add_hotspare(vol, hotspare);
+
+	hr_raid5_vol_state_eval(vol);
+
+	return rc;
+}
+
+void hr_raid5_ext_state_cb(hr_volume_t *vol, size_t extent, errno_t rc)
+{
+	HR_DEBUG("%s()", __func__);
+
+	assert(fibril_rwlock_is_locked(&vol->extents_lock));
+
+	if (rc == EOK)
+		return;
+
 	fibril_rwlock_write_lock(&vol->states_lock);
-	(void)hr_raid5_update_vol_state(vol);
+
+	switch (rc) {
+	case ENOMEM:
+		hr_update_ext_state(vol, extent, HR_EXT_INVALID);
+		break;
+	case ENOENT:
+		hr_update_ext_state(vol, extent, HR_EXT_MISSING);
+		break;
+	default:
+		hr_update_ext_state(vol, extent, HR_EXT_FAILED);
+	}
+
+	hr_mark_vol_state_dirty(vol);
+
 	fibril_rwlock_write_unlock(&vol->states_lock);
-	fibril_mutex_unlock(&vol->lock);
-}
-
-errno_t hr_raid5_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
-{
-	HR_DEBUG("%s()", __func__);
-
-	fibril_mutex_lock(&vol->lock);
-
-	errno_t rc = hr_util_add_hotspare(vol, hotspare);
+}
+
+static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd)
+{
+	HR_DEBUG("%s()\n", __func__);
+
+	hr_volume_t *vol = bd->srvs->sarg;
+
+	atomic_fetch_add_explicit(&vol->open_cnt, 1, memory_order_relaxed);
+
+	return EOK;
+}
+
+static errno_t hr_raid5_bd_close(bd_srv_t *bd)
+{
+	HR_DEBUG("%s()\n", __func__);
+
+	hr_volume_t *vol = bd->srvs->sarg;
+
+	atomic_fetch_sub_explicit(&vol->open_cnt, 1, memory_order_relaxed);
+
+	return EOK;
+}
+
+static errno_t hr_raid5_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt)
+{
+	/* XXX */
+	return EOK;
+}
+
+static errno_t hr_raid5_bd_read_blocks(bd_srv_t *bd, uint64_t ba, size_t cnt,
+    void *data_read, size_t size)
+{
+	hr_volume_t *vol = bd->srvs->sarg;
+	errno_t rc;
+
+	if (size < cnt * vol->bsize)
+		return EINVAL;
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+	hr_vol_state_t vol_state = vol->state;
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	if (vol_state == HR_VOL_FAULTY || vol_state == HR_VOL_NONE)
+		return EIO;
+
+	rc = hr_check_ba_range(vol, cnt, ba);
 	if (rc != EOK)
+		return rc;
+
+	uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */
+	uint64_t strip_no = ba / strip_size;
+
+	/* calculate number of stripes touched */
+	uint64_t last_ba = ba + cnt - 1;
+	uint64_t end_strip_no = last_ba / strip_size;
+	uint64_t start_stripe = strip_no / (vol->extent_no - 1);
+	uint64_t end_stripe = end_strip_no / (vol->extent_no - 1);
+	size_t stripes_cnt = end_stripe - start_stripe + 1;
+
+	hr_stripe_t *stripes = hr_create_stripes(vol, stripes_cnt, false);
+	if (stripes == NULL)
+		return ENOMEM;
+
+	/*
+	 * Pre-allocate range locks, because after group creation and
+	 * firing off IO requests there is no easy consistent ENOMEM error
+	 * path.
+	 */
+	hr_range_lock_t **rlps = malloc_waitok(stripes_cnt * sizeof(*rlps));
+	for (size_t i = 0; i < stripes_cnt; i++)
+		rlps[i] = malloc_waitok(sizeof(**rlps));
+
+	/*
+	 * extent order has to be locked for the whole IO duration,
+	 * so that workers have consistent targets
+	 */
+	fibril_rwlock_read_lock(&vol->extents_lock);
+
+	for (uint64_t s = start_stripe; s <= end_stripe; s++) {
+		uint64_t relative = s - start_stripe;
+		hr_range_lock_acquire_noalloc(rlps[relative], vol, s, 1);
+	}
+
+	uint64_t phys_block, len;
+	size_t left;
+
+	hr_layout_t layout = vol->layout;
+	hr_level_t level = vol->level;
+
+	/* parity extent */
+	size_t p_extent = hr_raid5_parity_extent(level, layout,
+	    vol->extent_no, strip_no);
+
+	uint64_t strip_off = ba % strip_size;
+
+	left = cnt;
+
+	while (left != 0) {
+		if (level == HR_LVL_5) {
+			p_extent = hr_raid5_parity_extent(level, layout,
+			    vol->extent_no, strip_no);
+		}
+
+		size_t extent = hr_raid5_data_extent(level, layout,
+		    vol->extent_no, strip_no, p_extent);
+
+		uint64_t stripe_no = strip_no / (vol->extent_no - 1);
+		size_t relative_si = stripe_no - start_stripe; /* relative stripe index */
+		hr_stripe_t *stripe = &stripes[relative_si];
+		stripe->p_extent = p_extent;
+
+		stripe->strips_touched++;
+
+		phys_block = stripe_no * strip_size + strip_off;
+		cnt = min(left, strip_size - strip_off);
+		len = vol->bsize * cnt;
+		hr_add_data_offset(vol, &phys_block);
+
+		stripe->extent_span[extent].range.start = phys_block;
+		stripe->extent_span[extent].range.end = phys_block + cnt - 1;
+		stripe->extent_span[extent].cnt = cnt;
+		stripe->extent_span[extent].data_read = data_read;
+		stripe->extent_span[extent].strip_off = strip_off;
+
+		data_read += len;
+		left -= cnt;
+		strip_off = 0;
+		strip_no++;
+	}
+
+retry:
+	size_t bad_extent = vol->extent_no;
+
+	uint64_t rebuild_pos = atomic_load_explicit(&vol->rebuild_blk,
+	    memory_order_relaxed);
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		hr_ext_state_t s = vol->extents[e].state;
+		if ((vol->state == HR_VOL_DEGRADED && s != HR_EXT_ONLINE) ||
+		    (s == HR_EXT_REBUILD && rebuild_pos < start_stripe)) {
+			bad_extent = e;
+			break;
+		}
+	}
+
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	for (size_t s = 0; s < stripes_cnt; s++) {
+		if (stripes[s].done)
+			continue;
+		execute_stripe(&stripes[s], bad_extent);
+	}
+
+	for (size_t s = 0; s < stripes_cnt; s++) {
+		if (stripes[s].done)
+			continue;
+		wait_for_stripe(&stripes[s]);
+	}
+
+	hr_raid5_vol_state_eval(vol);
+
+	rc = EOK;
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+
+	if (vol->state == HR_VOL_FAULTY) {
+		fibril_rwlock_read_unlock(&vol->states_lock);
+		rc = EIO;
 		goto end;
+	}
+
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	for (size_t s = 0; s < stripes_cnt; s++)
+		if (stripes[s].rc == EAGAIN)
+			goto retry;
+
+	/* all stripes are done */
+end:
+	fibril_rwlock_read_unlock(&vol->extents_lock);
+
+	for (size_t i = 0; i < stripes_cnt; i++)
+		hr_range_lock_release(rlps[i]);
+
+	hr_destroy_stripes(stripes, stripes_cnt);
+
+	return rc;
+}
+
+static errno_t hr_raid5_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
+    const void *data_write, size_t size)
+{
+	hr_volume_t *vol = bd->srvs->sarg;
+	errno_t rc;
+
+	if (size < cnt * vol->bsize)
+		return EINVAL;
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+	hr_vol_state_t vol_state = vol->state;
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	if (vol_state == HR_VOL_FAULTY || vol_state == HR_VOL_NONE)
+		return EIO;
+
+	/* increment metadata counter only on first write */
+	bool exp = false;
+	if (atomic_compare_exchange_strong(&vol->first_write, &exp, true)) {
+		vol->meta_ops->inc_counter(vol);
+		vol->meta_ops->save(vol, WITH_STATE_CALLBACK);
+	}
+
+	rc = hr_check_ba_range(vol, cnt, ba);
+	if (rc != EOK)
+		return rc;
+
+	uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */
+	uint64_t strip_no = ba / strip_size;
+
+	/* calculate number of stripes touched */
+	uint64_t last_ba = ba + cnt - 1;
+	uint64_t end_strip_no = last_ba / strip_size;
+	uint64_t start_stripe = strip_no / (vol->extent_no - 1);
+	uint64_t end_stripe = end_strip_no / (vol->extent_no - 1);
+	size_t stripes_cnt = end_stripe - start_stripe + 1;
+
+	hr_stripe_t *stripes = hr_create_stripes(vol, stripes_cnt, true);
+	if (stripes == NULL)
+		return ENOMEM;
+
+	uint64_t stripe_size = strip_size * (vol->extent_no - 1);
+
+	for (uint64_t stripe = start_stripe; stripe <= end_stripe; stripe++) {
+		uint64_t relative_stripe = stripe - start_stripe;
+
+		uint64_t s_start = stripe * stripe_size;
+		uint64_t s_end = s_start + stripe_size - 1;
+
+		uint64_t overlap_start;
+		if (ba > s_start)
+			overlap_start = ba;
+		else
+			overlap_start = s_start;
+
+		uint64_t overlap_end;
+		if (last_ba < s_end)
+			overlap_end = last_ba;
+		else
+			overlap_end = s_end;
+
+		uint64_t start_strip_index =
+		    (overlap_start - s_start) / strip_size;
+		uint64_t end_strip_index = (overlap_end - s_start) / strip_size;
+		size_t strips_touched = end_strip_index - start_strip_index + 1;
+
+		stripes[relative_stripe].strips_touched = strips_touched;
+
+		uint64_t first_offset = (overlap_start - s_start) % strip_size;
+		uint64_t last_offset = (overlap_end - s_start) % strip_size;
+
+		size_t partials = 0;
+		if (first_offset != 0)
+			partials++;
+		if (last_offset != strip_size - 1)
+			partials++;
+		if (start_strip_index == end_strip_index && partials == 2)
+			partials = 1;
+
+		stripes[relative_stripe].strips_touched = strips_touched;
+		stripes[relative_stripe].partial_strips_touched = partials;
+
+		if (strips_touched < (vol->extent_no - 1) / 2)
+			stripes[relative_stripe].subtract = true;
+	}
 
 	/*
-	 * If the volume is degraded, start rebuild right away.
+	 * Pre-allocate range locks, because after group creation and
+	 * firing off IO requests there is no easy consistent ENOMEM error
+	 * path.
 	 */
-	if (vol->state == HR_VOL_DEGRADED) {
-		HR_DEBUG("hr_raid5_add_hotspare(): volume in DEGRADED state, "
-		    "spawning new rebuild fibril\n");
-		fid_t fib = fibril_create(hr_raid5_rebuild, vol);
-		if (fib == 0) {
-			fibril_mutex_unlock(&vol->hotspare_lock);
-			fibril_mutex_unlock(&vol->lock);
-			return ENOMEM;
-		}
-		fibril_start(fib);
-		fibril_detach(fib);
-	}
-
+	hr_range_lock_t **rlps = malloc_waitok(stripes_cnt * sizeof(*rlps));
+	for (size_t i = 0; i < stripes_cnt; i++)
+		rlps[i] = malloc_waitok(sizeof(**rlps));
+
+	/*
+	 * extent order has to be locked for the whole IO duration,
+	 * so that workers have consistent targets
+	 */
+	fibril_rwlock_read_lock(&vol->extents_lock);
+
+	for (uint64_t s = start_stripe; s <= end_stripe; s++) {
+		uint64_t relative = s - start_stripe;
+		hr_range_lock_acquire_noalloc(rlps[relative], vol, s, 1);
+	}
+
+	uint64_t phys_block, len;
+	size_t left;
+
+	hr_layout_t layout = vol->layout;
+	hr_level_t level = vol->level;
+
+	/* parity extent */
+	size_t p_extent = hr_raid5_parity_extent(level, layout,
+	    vol->extent_no, strip_no);
+
+	uint64_t strip_off = ba % strip_size;
+
+	left = cnt;
+
+	while (left != 0) {
+		if (level == HR_LVL_5) {
+			p_extent = hr_raid5_parity_extent(level, layout,
+			    vol->extent_no, strip_no);
+		}
+
+		size_t extent = hr_raid5_data_extent(level, layout,
+		    vol->extent_no, strip_no, p_extent);
+
+		uint64_t stripe_no = strip_no / (vol->extent_no - 1);
+		size_t relative_si = stripe_no - start_stripe; /* relative stripe index */
+		hr_stripe_t *stripe = &stripes[relative_si];
+		stripe->p_extent = p_extent;
+
+		phys_block = stripe_no * strip_size + strip_off;
+		cnt = min(left, strip_size - strip_off);
+		len = vol->bsize * cnt;
+		hr_add_data_offset(vol, &phys_block);
+
+		stripe->extent_span[extent].range.start = phys_block;
+		stripe->extent_span[extent].range.end = phys_block + cnt - 1;
+		stripe->extent_span[extent].cnt = cnt;
+		stripe->extent_span[extent].data_write = data_write;
+		stripe->extent_span[extent].strip_off = strip_off;
+
+		data_write += len;
+		left -= cnt;
+		strip_off = 0;
+		strip_no++;
+	}
+
+retry:
+	size_t bad_extent = vol->extent_no;
+
+	uint64_t rebuild_pos = atomic_load_explicit(&vol->rebuild_blk,
+	    memory_order_relaxed);
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+
+	for (size_t e = 0; e < vol->extent_no; e++) {
+		hr_ext_state_t s = vol->extents[e].state;
+		if ((vol->state == HR_VOL_DEGRADED && s != HR_EXT_ONLINE) ||
+		    (s == HR_EXT_REBUILD && rebuild_pos < start_stripe)) {
+			bad_extent = e;
+			break;
+		}
+	}
+
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	for (size_t s = 0; s < stripes_cnt; s++) {
+		if (stripes[s].done)
+			continue;
+		execute_stripe(&stripes[s], bad_extent);
+	}
+
+	for (size_t s = 0; s < stripes_cnt; s++) {
+		if (stripes[s].done)
+			continue;
+		wait_for_stripe(&stripes[s]);
+	}
+
+	hr_raid5_vol_state_eval(vol);
+
+	rc = EOK;
+
+	fibril_rwlock_read_lock(&vol->states_lock);
+
+	if (vol->state == HR_VOL_FAULTY) {
+		fibril_rwlock_read_unlock(&vol->states_lock);
+		rc = EIO;
+		goto end;
+	}
+
+	fibril_rwlock_read_unlock(&vol->states_lock);
+
+	for (size_t s = 0; s < stripes_cnt; s++)
+		if (stripes[s].rc == EAGAIN)
+			goto retry;
+
+	/* all stripes are done */
 end:
-	fibril_mutex_unlock(&vol->lock);
+	fibril_rwlock_read_unlock(&vol->extents_lock);
+
+	for (size_t i = 0; i < stripes_cnt; i++)
+		hr_range_lock_release(rlps[i]);
+
+	hr_destroy_stripes(stripes, stripes_cnt);
 
 	return rc;
-}
-
-void hr_raid5_ext_state_cb(hr_volume_t *vol, size_t extent,
-    errno_t rc)
-{
-	if (rc == ENOENT)
-		hr_update_ext_state(vol, extent, HR_EXT_MISSING);
-	else if (rc != EOK)
-		hr_update_ext_state(vol, extent, HR_EXT_FAILED);
-}
-
-static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd)
-{
-	HR_DEBUG("%s()\n", __func__);
-
-	hr_volume_t *vol = bd->srvs->sarg;
-
-	atomic_fetch_add_explicit(&vol->open_cnt, 1, memory_order_relaxed);
-
-	return EOK;
-}
-
-static errno_t hr_raid5_bd_close(bd_srv_t *bd)
-{
-	HR_DEBUG("%s()\n", __func__);
-
-	hr_volume_t *vol = bd->srvs->sarg;
-
-	atomic_fetch_sub_explicit(&vol->open_cnt, 1, memory_order_relaxed);
-
-	return EOK;
-}
-
-static errno_t hr_raid5_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt)
-{
-	return hr_raid5_bd_op(HR_BD_SYNC, bd, ba, cnt, NULL, NULL, 0);
-}
-
-static errno_t hr_raid5_bd_read_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
-    void *buf, size_t size)
-{
-	return hr_raid5_bd_op(HR_BD_READ, bd, ba, cnt, buf, NULL, size);
-}
-
-static errno_t hr_raid5_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
-    const void *data, size_t size)
-{
-	return hr_raid5_bd_op(HR_BD_WRITE, bd, ba, cnt, NULL, data, size);
 }
 
@@ -257,28 +613,11 @@
 }
 
-static errno_t hr_raid5_vol_usable(hr_volume_t *vol)
-{
-	if (vol->state == HR_VOL_ONLINE ||
-	    vol->state == HR_VOL_DEGRADED ||
-	    vol->state == HR_VOL_REBUILD)
-		return EOK;
-	return EIO;
-}
-
-/*
- * Returns (-1) if all extents are online,
- * else returns index of first bad one.
- */
-static ssize_t hr_raid5_get_bad_ext(hr_volume_t *vol)
-{
-	for (size_t i = 0; i < vol->extent_no; i++)
-		if (vol->extents[i].state != HR_EXT_ONLINE)
-			return i;
-	return -1;
-}
-
-static errno_t hr_raid5_update_vol_state(hr_volume_t *vol)
-{
-	hr_vol_state_t old_state = vol->state;
+static void hr_raid5_vol_state_eval_forced(hr_volume_t *vol)
+{
+	fibril_rwlock_read_lock(&vol->extents_lock);
+	fibril_rwlock_write_lock(&vol->states_lock);
+
+	hr_vol_state_t state = vol->state;
+
 	size_t bad = 0;
 	for (size_t i = 0; i < vol->extent_no; i++)
@@ -288,28 +627,34 @@
 	switch (bad) {
 	case 0:
-		if (old_state != HR_VOL_ONLINE)
+		if (state != HR_VOL_ONLINE)
 			hr_update_vol_state(vol, HR_VOL_ONLINE);
-		return EOK;
+		break;
 	case 1:
-		if (old_state != HR_VOL_DEGRADED &&
-		    old_state != HR_VOL_REBUILD) {
-
+		if (state != HR_VOL_DEGRADED && state != HR_VOL_REBUILD)
 			hr_update_vol_state(vol, HR_VOL_DEGRADED);
 
-			if (vol->hotspare_no > 0) {
+		if (state != HR_VOL_REBUILD) {
+			/* XXX: allow REBUILD on INVALID extents */
+			fibril_mutex_lock(&vol->hotspare_lock);
+			size_t hs_no = vol->hotspare_no;
+			fibril_mutex_unlock(&vol->hotspare_lock);
+			if (hs_no > 0) {
 				fid_t fib = fibril_create(hr_raid5_rebuild,
 				    vol);
 				if (fib == 0)
-					return ENOMEM;
+					break;
 				fibril_start(fib);
 				fibril_detach(fib);
 			}
 		}
-		return EOK;
+		break;
 	default:
-		if (old_state != HR_VOL_FAULTY)
+		if (state != HR_VOL_FAULTY)
 			hr_update_vol_state(vol, HR_VOL_FAULTY);
-		return EIO;
-	}
+		break;
+	}
+
+	fibril_rwlock_write_unlock(&vol->states_lock);
+	fibril_rwlock_read_unlock(&vol->extents_lock);
 }
 
@@ -324,391 +669,63 @@
 }
 
-static errno_t hr_raid5_read_degraded(hr_volume_t *vol, uint64_t bad,
-    uint64_t block, void *data, size_t cnt)
-{
-	errno_t rc;
-	size_t i;
-	void *xorbuf;
-	void *buf;
-	uint64_t len = vol->bsize * cnt;
-
-	xorbuf = malloc(len);
-	if (xorbuf == NULL)
-		return ENOMEM;
-
-	buf = malloc(len);
-	if (buf == NULL) {
-		free(xorbuf);
-		return ENOMEM;
-	}
-
-	/* read all other extents in the stripe */
-	bool first = true;
-	for (i = 0; i < vol->extent_no; i++) {
-		if (i == bad)
-			continue;
-
-		if (first) {
-			rc = block_read_direct(vol->extents[i].svc_id, block,
-			    cnt, xorbuf);
-			if (rc != EOK)
-				goto end;
-
-			first = false;
-		} else {
-			rc = block_read_direct(vol->extents[i].svc_id, block,
-			    cnt, buf);
-			if (rc != EOK)
-				goto end;
-			xor(xorbuf, buf, len);
-		}
-	}
-
-	memcpy(data, xorbuf, len);
-end:
-	free(xorbuf);
-	free(buf);
-	return rc;
-}
-
-static errno_t hr_raid5_write(hr_volume_t *vol, uint64_t p_extent,
-    uint64_t extent, aoff64_t ba, const void *data, size_t cnt)
-{
-	errno_t rc;
-	size_t i;
-	void *xorbuf;
-	void *buf;
-	uint64_t len = vol->bsize * cnt;
-
-	ssize_t bad = hr_raid5_get_bad_ext(vol);
-	if (bad == -1 || (size_t)bad == p_extent) {
-		rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt,
-		    data);
-		if (rc != EOK)
-			return rc;
-		/*
-		 * DEGRADED parity - skip parity write
-		 */
-		if ((size_t)bad == p_extent)
-			return EOK;
-
-		rc = hr_raid5_write_parity(vol, p_extent, extent, ba, data,
-		    cnt);
-		return rc;
-	}
-
-	xorbuf = malloc(len);
-	if (xorbuf == NULL)
-		return ENOMEM;
-
-	buf = malloc(len);
-	if (buf == NULL) {
-		free(xorbuf);
-		return ENOMEM;
-	}
-
-	if (extent == (size_t)bad) {
-		/*
-		 * new parity = read other and xor in new data
-		 *
-		 * write new parity
-		 */
-		bool first = true;
-		for (i = 0; i < vol->extent_no; i++) {
-			if (i == (size_t)bad)
-				continue;
-			if (i == p_extent)
-				continue;
-			if (first) {
-				rc = block_read_direct(vol->extents[i].svc_id,
-				    ba, cnt, xorbuf);
-				if (rc != EOK)
-					goto end;
-
-				first = false;
-			} else {
-				rc = block_read_direct(vol->extents[i].svc_id,
-				    ba, cnt, buf);
-				if (rc != EOK)
-					goto end;
-				xor(xorbuf, buf, len);
-			}
-		}
-		xor(xorbuf, data, len);
-		rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt,
-		    xorbuf);
-		if (rc != EOK)
-			goto end;
-	} else {
-		/*
-		 * new parity = xor original data and old parity and new data
-		 *
-		 * write parity, new data
-		 */
-		rc = block_read_direct(vol->extents[extent].svc_id, ba, cnt,
-		    xorbuf);
-		if (rc != EOK)
-			goto end;
-		rc = block_read_direct(vol->extents[p_extent].svc_id, ba, cnt,
-		    buf);
-		if (rc != EOK)
-			goto end;
-
-		xor(xorbuf, buf, len);
-
-		xor(xorbuf, data, len);
-
-		rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt,
-		    xorbuf);
-		if (rc != EOK)
-			goto end;
-		rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt,
-		    data);
-		if (rc != EOK)
-			goto end;
-	}
-end:
-	free(xorbuf);
-	free(buf);
-	return rc;
-}
-
-static errno_t hr_raid5_write_parity(hr_volume_t *vol, uint64_t p_extent,
-    uint64_t extent, uint64_t block, const void *data, size_t cnt)
-{
-	errno_t rc;
-	size_t i;
-	void *xorbuf;
-	void *buf;
-	uint64_t len = vol->bsize * cnt;
-
-	xorbuf = malloc(len);
-	if (xorbuf == NULL)
-		return ENOMEM;
-
-	buf = malloc(len);
-	if (buf == NULL) {
-		free(xorbuf);
-		return ENOMEM;
-	}
-
-	bool first = true;
-	for (i = 0; i < vol->extent_no; i++) {
-		if (i == p_extent)
-			continue;
-
-		if (first) {
-			if (i == extent) {
-				memcpy(xorbuf, data, len);
-			} else {
-				rc = block_read_direct(vol->extents[i].svc_id,
-				    block, cnt, xorbuf);
-				if (rc != EOK)
-					goto end;
-			}
-
-			first = false;
-		} else {
-			if (i == extent) {
-				xor(xorbuf, data, len);
-			} else {
-				rc = block_read_direct(vol->extents[i].svc_id,
-				    block, cnt, buf);
-				if (rc != EOK)
-					goto end;
-
-				xor(xorbuf, buf, len);
-			}
-		}
-	}
-
-	rc = block_write_direct(vol->extents[p_extent].svc_id, block, cnt,
-	    xorbuf);
-end:
-	free(xorbuf);
-	free(buf);
-	return rc;
-}
-
-static errno_t hr_raid5_bd_op(hr_bd_op_type_t type, bd_srv_t *bd, aoff64_t ba,
-    size_t cnt, void *dst, const void *src, size_t size)
-{
-	hr_volume_t *vol = bd->srvs->sarg;
-	errno_t rc;
-	uint64_t phys_block, len;
-	size_t left;
-	const uint8_t *data_write = src;
-	uint8_t *data_read = dst;
-
-	/* propagate sync */
-	if (type == HR_BD_SYNC && ba == 0 && cnt == 0) {
-		hr_sync_all_extents(vol);
-		rc = hr_raid5_update_vol_state(vol);
-		return rc;
-	}
-
-	if (type == HR_BD_READ || type == HR_BD_WRITE)
-		if (size < cnt * vol->bsize)
-			return EINVAL;
-
-	rc = hr_check_ba_range(vol, cnt, ba);
-	if (rc != EOK)
-		return rc;
-
-	hr_layout_t layout = vol->layout;
-	hr_level_t level = vol->level;
-
-	uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */
-	uint64_t stripe = (ba / strip_size); /* stripe number */
-
-	/* parity extent */
-	uint64_t p_extent;
-	if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) {
-		p_extent = 0;
-	} else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) {
-		p_extent = vol->extent_no - 1;
-	} else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_0R) {
-		p_extent = (stripe / (vol->extent_no - 1)) % vol->extent_no;
-	} else if (level == HR_LVL_5 &&
-	    (layout == HR_LAYOUT_RAID5_NR || layout == HR_LAYOUT_RAID5_NC)) {
-		p_extent = (vol->extent_no - 1) -
-		    (stripe / (vol->extent_no - 1)) % vol->extent_no;
-	} else {
-		return EINVAL;
-	}
-
-	uint64_t extent;
-	if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) {
-		extent = (stripe % (vol->extent_no - 1)) + 1;
-	} else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) {
-		extent = stripe % (vol->extent_no - 1);
-	} else if (level == HR_LVL_5 &&
-	    (layout == HR_LAYOUT_RAID5_0R || layout == HR_LAYOUT_RAID5_NR)) {
-		if ((stripe % (vol->extent_no - 1)) < p_extent)
-			extent = stripe % (vol->extent_no - 1);
-		else
-			extent = (stripe % (vol->extent_no - 1)) + 1;
-	} else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_NC) {
-		extent =
-		    ((stripe % (vol->extent_no - 1)) + p_extent + 1) %
-		    vol->extent_no;
-	} else {
-		return EINVAL;
-	}
-
-	uint64_t ext_stripe = stripe / (vol->extent_no - 1); /* stripe level */
-	uint64_t strip_off = ba % strip_size; /* strip offset */
-
-	fibril_mutex_lock(&vol->lock);
-
-	rc = hr_raid5_vol_usable(vol);
-	if (rc != EOK) {
-		fibril_mutex_unlock(&vol->lock);
-		return EIO;
-	}
-
-	left = cnt;
-
-	fibril_rwlock_write_lock(&vol->states_lock);
-	while (left != 0) {
-		phys_block = ext_stripe * strip_size + strip_off;
-		cnt = min(left, strip_size - strip_off);
-		len = vol->bsize * cnt;
-		hr_add_ba_offset(vol, &phys_block);
-		switch (type) {
-		case HR_BD_SYNC:
-			if (vol->extents[extent].state != HR_EXT_ONLINE)
-				break;
-			rc = block_sync_cache(vol->extents[extent].svc_id,
-			    phys_block, cnt);
-			/* allow unsupported sync */
-			if (rc == ENOTSUP)
-				rc = EOK;
-			break;
-		case HR_BD_READ:
-		retry_read:
-			ssize_t bad = hr_raid5_get_bad_ext(vol);
-			if (bad > -1 && extent == (size_t)bad) {
-				rc = hr_raid5_read_degraded(vol, bad,
-				    phys_block, data_read, cnt);
-			} else {
-				rc = block_read_direct(vol->extents[extent].svc_id,
-				    phys_block, cnt, data_read);
-			}
-			data_read += len;
-			break;
-		case HR_BD_WRITE:
-		retry_write:
-			rc = hr_raid5_write(vol, p_extent, extent, phys_block,
-			    data_write, cnt);
-			data_write += len;
-			break;
+static size_t hr_raid5_parity_extent(hr_level_t level,
+    hr_layout_t layout, size_t extent_no, uint64_t strip_no)
+{
+	switch (level) {
+	case HR_LVL_4:
+		switch (layout) {
+		case HR_LAYOUT_RAID4_0:
+			return (0);
+		case HR_LAYOUT_RAID4_N:
+			return (extent_no - 1);
 		default:
-			rc = EINVAL;
-			goto error;
-		}
-
-		if (rc == ENOMEM)
-			goto error;
-
-		hr_raid5_ext_state_cb(vol, extent, rc);
-
-		if (rc != EOK) {
-			rc = hr_raid5_update_vol_state(vol);
-			if (rc == EOK) {
-				/*
-				 * State changed from ONLINE -> DEGRADED,
-				 * rewind and retry
-				 */
-				if (type == HR_BD_WRITE) {
-					data_write -= len;
-					goto retry_write;
-				} else if (type == HR_BD_WRITE) {
-					data_read -= len;
-					goto retry_read;
-				}
-			} else {
-				rc = EIO;
-				goto error;
-			}
-		}
-
-		left -= cnt;
-		strip_off = 0;
-		stripe++;
-
-		ext_stripe = stripe / (vol->extent_no - 1); /* stripe level */
-
-		if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_0R) {
-			p_extent =
-			    (stripe / (vol->extent_no - 1)) % vol->extent_no;
-		} else if (level == HR_LVL_5 &&
-		    (layout == HR_LAYOUT_RAID5_NR || layout == HR_LAYOUT_RAID5_NC)) {
-			p_extent = (vol->extent_no - 1) -
-			    (stripe / (vol->extent_no - 1)) % vol->extent_no;
-		}
-
-		if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) {
-			extent = (stripe % (vol->extent_no - 1)) + 1;
-		} else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) {
-			extent = stripe % (vol->extent_no - 1);
-		} else if (level == HR_LVL_5 &&
-		    (layout == HR_LAYOUT_RAID5_0R || layout == HR_LAYOUT_RAID5_NR)) {
-			if ((stripe % (vol->extent_no - 1)) < p_extent)
-				extent = stripe % (vol->extent_no - 1);
+			assert(0 && "invalid layout configuration");
+		}
+	case HR_LVL_5:
+		switch (layout) {
+		case HR_LAYOUT_RAID5_0R:
+			return ((strip_no / (extent_no - 1)) % extent_no);
+		case HR_LAYOUT_RAID5_NR:
+		case HR_LAYOUT_RAID5_NC:
+			return ((extent_no - 1) -
+			    (strip_no / (extent_no - 1)) % extent_no);
+		default:
+			assert(0 && "invalid layout configuration");
+		}
+	default:
+		assert(0 && "invalid layout configuration");
+	}
+}
+
+static size_t hr_raid5_data_extent(hr_level_t level,
+    hr_layout_t layout, size_t extent_no, uint64_t strip_no, size_t p_extent)
+{
+	switch (level) {
+	case HR_LVL_4:
+		switch (layout) {
+		case HR_LAYOUT_RAID4_0:
+			return ((strip_no % (extent_no - 1)) + 1);
+		case HR_LAYOUT_RAID4_N:
+			return (strip_no % (extent_no - 1));
+		default:
+			assert(0 && "invalid layout configuration");
+		}
+	case HR_LVL_5:
+		switch (layout) {
+		case HR_LAYOUT_RAID5_0R:
+		case HR_LAYOUT_RAID5_NR:
+			if ((strip_no % (extent_no - 1)) < p_extent)
+				return (strip_no % (extent_no - 1));
 			else
-				extent = (stripe % (vol->extent_no - 1)) + 1;
-		} else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_NC) {
-			extent =
-			    ((stripe % (vol->extent_no - 1)) + p_extent + 1) %
-			    vol->extent_no;
-		}
-	}
-
-error:
-	(void)hr_raid5_update_vol_state(vol);
-	fibril_rwlock_write_unlock(&vol->states_lock);
-	fibril_mutex_unlock(&vol->lock);
-	return rc;
+				return ((strip_no % (extent_no - 1)) + 1);
+		case HR_LAYOUT_RAID5_NC:
+			return (((strip_no % (extent_no - 1)) + p_extent + 1) %
+			    extent_no);
+		default:
+			assert(0 && "invalid layout configuration");
+		}
+	default:
+		assert(0 && "invalid layout configuration");
+	}
 }
 
@@ -721,5 +738,4 @@
 	void *buf = NULL, *xorbuf = NULL;
 
-	fibril_mutex_lock(&vol->lock);
 	fibril_rwlock_read_lock(&vol->extents_lock);
 	fibril_rwlock_write_lock(&vol->states_lock);
@@ -785,5 +801,5 @@
 
 	uint64_t ba = 0, cnt;
-	hr_add_ba_offset(vol, &ba);
+	hr_add_data_offset(vol, &ba);
 
 	while (left != 0) {
@@ -852,18 +868,15 @@
 	fibril_rwlock_write_unlock(&vol->states_lock);
 	fibril_rwlock_read_unlock(&vol->extents_lock);
-	fibril_mutex_unlock(&vol->lock);
 
 	rc = vol->meta_ops->save(vol, WITH_STATE_CALLBACK);
 
-	fibril_mutex_lock(&vol->lock);
 	fibril_rwlock_read_lock(&vol->extents_lock);
 	fibril_rwlock_write_lock(&vol->states_lock);
 
 end:
-	(void)hr_raid5_update_vol_state(vol);
+	hr_raid5_vol_state_eval_forced(vol);
 
 	fibril_rwlock_write_unlock(&vol->states_lock);
 	fibril_rwlock_read_unlock(&vol->extents_lock);
-	fibril_mutex_unlock(&vol->lock);
 
 	if (buf != NULL)
Index: uspace/srv/bd/hr/util.c
===================================================================
--- uspace/srv/bd/hr/util.c	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/util.c	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -150,5 +150,9 @@
 	}
 
-	vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
+	if (level == HR_LVL_4 || level == HR_LVL_5)
+		vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
+	else
+		vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
+
 	if (vol->fge == NULL) {
 		rc = ENOMEM;
@@ -164,6 +168,4 @@
 
 	vol->state = HR_VOL_NONE;
-
-	fibril_mutex_initialize(&vol->lock); /* XXX: will remove this */
 
 	fibril_mutex_initialize(&vol->md_lock);
@@ -496,32 +498,4 @@
 	    "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
 	vol->hotspares[hs_idx].svc_id = new;
-}
-
-/*
- * Do a whole sync (ba = 0, cnt = 0) across all extents,
- * and update extent state. *For now*, the caller has to
- * update volume state after the syncs.
- *
- * TODO: add update_vol_state fcn ptr for each raid
- */
-void hr_sync_all_extents(hr_volume_t *vol)
-{
-	errno_t rc;
-
-	fibril_mutex_lock(&vol->lock);
-	for (size_t i = 0; i < vol->extent_no; i++) {
-		if (vol->extents[i].state != HR_EXT_ONLINE)
-			continue;
-		rc = block_sync_cache(vol->extents[i].svc_id, 0, 0);
-		if (rc == ENOMEM || rc == ENOTSUP)
-			continue;
-		if (rc != EOK) {
-			if (rc == ENOENT)
-				hr_update_ext_state(vol, i, HR_EXT_MISSING);
-			else if (rc != EOK)
-				hr_update_ext_state(vol, i, HR_EXT_FAILED);
-		}
-	}
-	fibril_mutex_unlock(&vol->lock);
 }
 
@@ -1116,4 +1090,14 @@
 }
 
+void hr_raid5_xor(void *dst, const void *src, size_t size)
+{
+	size_t i;
+	uint64_t *d = dst;
+	const uint64_t *s = src;
+
+	for (i = 0; i < size / sizeof(uint64_t); ++i)
+		*d++ ^= *s++;
+}
+
 /** @}
  */
Index: uspace/srv/bd/hr/util.h
===================================================================
--- uspace/srv/bd/hr/util.h	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/util.h	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -110,4 +110,5 @@
 extern errno_t hr_util_try_assemble(hr_config_t *, size_t *);
 extern errno_t hr_util_add_hotspare(hr_volume_t *, service_id_t);
+extern void hr_raid5_xor(void *, const void *, size_t);
 
 #endif
Index: uspace/srv/bd/hr/var.h
===================================================================
--- uspace/srv/bd/hr/var.h	(revision 9d1685b9d20320c0d757562d21216a101ea960c6)
+++ uspace/srv/bd/hr/var.h	(revision a3486f22ef90cb49886854387bb881de5188d3c7)
@@ -52,4 +52,5 @@
 struct hr_volume;
 typedef struct hr_volume hr_volume_t;
+typedef struct hr_stripe hr_stripe_t;
 typedef struct hr_metadata hr_metadata_t;
 typedef struct hr_superblock_ops hr_superblock_ops_t;
@@ -69,7 +70,7 @@
 	service_id_t svc_id; /* service id */
 
-	fibril_mutex_t lock; /* XXX: gone after para */
 	list_t range_lock_list; /* list of range locks */
 	fibril_mutex_t range_lock_list_lock; /* range locks list lock */
+
 	hr_fpool_t *fge; /* fibril pool */
 
