source: mainline/uspace/srv/bd/hr/parity_stripe.c@ cdfcaea

Last change on this file since cdfcaea was cdfcaea, checked in by Miroslav Cimerman <mc@…>, 6 weeks ago

hr: RAID 5 rebuild

  • Property mode set to 100644
File size: 25.2 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <stdlib.h>
37#include <stdio.h>
38#include <str.h>
39
40#include "io.h"
41#include "parity_stripe.h"
42#include "util.h"
43#include "var.h"
44
45static void execute_stripe_degraded_mixed(hr_stripe_t *, size_t);
46static void execute_stripe_degraded(hr_stripe_t *, size_t);
47static void execute_stripe_optimal_reconstruct(hr_stripe_t *);
48static void execute_stripe_optimal_subtract(hr_stripe_t *);
49static void execute_write_stripe(hr_stripe_t *, size_t);
50static void execute_read_stripe(hr_stripe_t *, size_t);
51static void execute_stripe_degraded_good(hr_stripe_t *, size_t);
52static bool hr_stripe_range_non_extension(const range_t *, const range_t *,
53 range_t *);
54static size_t hr_stripe_merge_extent_spans(hr_stripe_t *, size_t, range_t [2]);
55static void hr_stripe_extend_range(range_t *, const range_t *);
56static bool hr_ranges_overlap(const range_t *, const range_t *, range_t *);
57
58hr_stripe_t *hr_create_stripes(hr_volume_t *vol, uint64_t strip_size,
59 size_t cnt, bool write)
60{
61 hr_stripe_t *stripes = hr_calloc_waitok(cnt, sizeof(*stripes));
62
63 for (size_t i = 0; i < cnt; i++) {
64 fibril_mutex_initialize(&stripes[i].parity_lock);
65 fibril_condvar_initialize(&stripes[i].ps_added_cv);
66 stripes[i].vol = vol;
67 stripes[i].write = write;
68 stripes[i].parity = hr_calloc_waitok(1, strip_size);
69 stripes[i].parity_size = strip_size;
70 stripes[i].extent_span = hr_calloc_waitok(vol->extent_no,
71 sizeof(*stripes[i].extent_span));
72 }
73
74 return stripes;
75}
76
77void hr_destroy_stripes(hr_stripe_t *stripes, size_t cnt)
78{
79 if (stripes == NULL)
80 return;
81
82 for (size_t i = 0; i < cnt; i++) {
83 if (stripes[i].parity != NULL)
84 free(stripes[i].parity);
85 if (stripes[i].extent_span != NULL)
86 free(stripes[i].extent_span);
87 }
88
89 free(stripes);
90}
91
92void hr_reset_stripe(hr_stripe_t *stripe)
93{
94 memset(stripe->parity, 0, stripe->parity_size);
95 stripe->ps_added = 0;
96 stripe->ps_to_be_added = 0;
97 stripe->p_count_final = false;
98
99 stripe->rc = EOK;
100 stripe->abort = false;
101 stripe->done = false;
102}
103
104void hr_stripe_commit_parity(hr_stripe_t *stripe, uint64_t strip_off,
105 const void *data, uint64_t size)
106{
107 fibril_mutex_lock(&stripe->parity_lock);
108 hr_raid5_xor(stripe->parity + strip_off, data, size);
109 stripe->ps_added++;
110 fibril_condvar_broadcast(&stripe->ps_added_cv);
111 fibril_mutex_unlock(&stripe->parity_lock);
112}
113
114void hr_stripe_wait_for_parity_commits(hr_stripe_t *stripe)
115{
116 fibril_mutex_lock(&stripe->parity_lock);
117 while ((!stripe->p_count_final ||
118 stripe->ps_added < stripe->ps_to_be_added) && !stripe->abort) {
119 fibril_condvar_wait(&stripe->ps_added_cv, &stripe->parity_lock);
120 }
121 fibril_mutex_unlock(&stripe->parity_lock);
122}
123
124void hr_stripe_parity_abort(hr_stripe_t *stripe)
125{
126 fibril_mutex_lock(&stripe->parity_lock);
127 stripe->abort = true;
128 fibril_condvar_broadcast(&stripe->ps_added_cv);
129 fibril_mutex_unlock(&stripe->parity_lock);
130}
131
132void execute_stripe(hr_stripe_t *stripe, size_t bad_extent)
133{
134 if (stripe->write)
135 execute_write_stripe(stripe, bad_extent);
136 else
137 execute_read_stripe(stripe, bad_extent);
138}
139
140void wait_for_stripe(hr_stripe_t *stripe)
141{
142 stripe->rc = hr_fgroup_wait(stripe->worker_group, NULL, NULL);
143 if (stripe->rc == EAGAIN)
144 hr_reset_stripe(stripe);
145 else
146 stripe->done = true;
147}
148
149static void execute_stripe_degraded_good(hr_stripe_t *stripe, size_t bad_extent)
150{
151 hr_volume_t *vol = stripe->vol;
152
153 stripe->ps_to_be_added = stripe->strips_touched; /* writers */
154 stripe->ps_to_be_added += stripe->range_count; /* parity readers */
155 stripe->p_count_final = true;
156
157 size_t worker_cnt = stripe->strips_touched + stripe->range_count * 2;
158 stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
159
160 for (size_t e = 0; e < vol->extent_no; e++) {
161 if (e == bad_extent || e == stripe->p_extent)
162 continue;
163 if (stripe->extent_span[e].cnt == 0)
164 continue;
165
166 hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
167 io->extent = e;
168 io->data_write = stripe->extent_span[e].data_write;
169 io->ba = stripe->extent_span[e].range.start;
170 io->cnt = stripe->extent_span[e].cnt;
171 io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
172 io->vol = vol;
173 io->stripe = stripe;
174
175 hr_fgroup_submit(stripe->worker_group,
176 hr_io_raid5_subtract_writer, io);
177 }
178
179 for (size_t r = 0; r < stripe->range_count; r++) {
180 hr_io_raid5_t *p_reader = hr_fgroup_alloc(stripe->worker_group);
181 p_reader->extent = stripe->p_extent;
182 p_reader->ba = stripe->total_height[r].start;
183 p_reader->cnt = stripe->total_height[r].end -
184 stripe->total_height[r].start + 1;
185 p_reader->vol = vol;
186 p_reader->stripe = stripe;
187
188 p_reader->strip_off = p_reader->ba;
189 hr_sub_data_offset(vol, &p_reader->strip_off);
190 p_reader->strip_off %= vol->strip_size / vol->bsize;
191 p_reader->strip_off *= vol->bsize;
192
193 hr_fgroup_submit(stripe->worker_group,
194 hr_io_raid5_reconstruct_reader, p_reader);
195
196 hr_io_raid5_t *p_writer = hr_fgroup_alloc(stripe->worker_group);
197 p_writer->extent = stripe->p_extent;
198 p_writer->ba = stripe->total_height[r].start;
199 p_writer->cnt = stripe->total_height[r].end -
200 stripe->total_height[r].start + 1;
201 p_writer->vol = vol;
202 p_writer->stripe = stripe;
203
204 p_writer->strip_off = p_writer->ba;
205 hr_sub_data_offset(vol, &p_writer->strip_off);
206 p_writer->strip_off %= vol->strip_size / vol->bsize;
207 p_writer->strip_off *= vol->bsize;
208
209 hr_fgroup_submit(stripe->worker_group,
210 hr_io_raid5_parity_writer, p_writer);
211 }
212}
213
214static void execute_stripe_degraded_mixed(hr_stripe_t *stripe, size_t bad_extent)
215{
216 hr_volume_t *vol = stripe->vol;
217
218 size_t worker_cnt = (vol->extent_no - 2) * 3 + 3; /* upper bound */
219 stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
220
221 stripe->ps_to_be_added = 1;
222
223 hr_io_raid5_t *nop_write = hr_fgroup_alloc(stripe->worker_group);
224 nop_write->ba = stripe->extent_span[bad_extent].range.start;
225 nop_write->cnt = stripe->extent_span[bad_extent].cnt;
226 nop_write->strip_off =
227 stripe->extent_span[bad_extent].strip_off * vol->bsize;
228 nop_write->data_write = stripe->extent_span[bad_extent].data_write;
229 nop_write->vol = vol;
230 nop_write->stripe = stripe;
231
232 hr_fgroup_submit(stripe->worker_group, hr_io_raid5_noop_writer,
233 nop_write);
234
235 for (size_t e = 0; e < vol->extent_no; e++) {
236 if (e == bad_extent || e == stripe->p_extent)
237 continue;
238
239 range_t uncommon = { 0, 0 };
240 bool has_uncommon;
241 has_uncommon = hr_stripe_range_non_extension(
242 &stripe->extent_span[bad_extent].range,
243 &stripe->extent_span[e].range,
244 &uncommon);
245
246 if (stripe->extent_span[e].cnt == 0 || has_uncommon) {
247 stripe->ps_to_be_added++;
248
249 hr_io_raid5_t *io =
250 hr_fgroup_alloc(stripe->worker_group);
251 io->extent = e;
252 if (stripe->extent_span[bad_extent].cnt == 0) {
253 io->ba =
254 stripe->extent_span[bad_extent].range.start;
255 io->cnt = stripe->extent_span[bad_extent].cnt;
256 } else {
257 io->ba = uncommon.start;
258 io->cnt = uncommon.end - uncommon.start + 1;
259 }
260 io->strip_off =
261 stripe->extent_span[bad_extent].strip_off *
262 vol->bsize;
263 io->vol = vol;
264 io->stripe = stripe;
265
266 hr_fgroup_submit(stripe->worker_group,
267 hr_io_raid5_reconstruct_reader, io);
268
269 if (stripe->extent_span[e].cnt == 0)
270 continue;
271 }
272
273 range_t overlap_range;
274 bool overlap_up = true;
275 if (hr_ranges_overlap(&stripe->extent_span[e].range,
276 &stripe->extent_span[bad_extent].range,
277 &overlap_range)) {
278 stripe->ps_to_be_added++;
279
280 hr_io_raid5_t *io =
281 hr_fgroup_alloc(stripe->worker_group);
282 io->extent = e;
283 io->ba = overlap_range.start;
284 io->cnt = overlap_range.end - overlap_range.start + 1;
285
286 size_t diff = overlap_range.start -
287 stripe->extent_span[e].range.start;
288
289 io->strip_off =
290 (stripe->extent_span[e].strip_off + diff) *
291 vol->bsize;
292
293 io->data_write = stripe->extent_span[e].data_write;
294 io->data_write += diff * vol->bsize;
295 if (diff == 0)
296 overlap_up = false;
297
298 io->vol = vol;
299 io->stripe = stripe;
300
301 hr_fgroup_submit(stripe->worker_group,
302 hr_io_raid5_writer, io);
303 }
304
305 bool has_independent;
306 range_t independent = { 0, 0 };
307 has_independent = hr_stripe_range_non_extension(
308 &stripe->extent_span[e].range,
309 &stripe->extent_span[bad_extent].range,
310 &independent);
311 if (has_independent) {
312 stripe->ps_to_be_added++;
313
314 hr_io_raid5_t *io =
315 hr_fgroup_alloc(stripe->worker_group);
316 io->extent = e;
317 io->ba = independent.start;
318 io->cnt = independent.end - independent.start + 1;
319 size_t diff = 0;
320 if (!overlap_up) {
321 diff = overlap_range.end -
322 overlap_range.start + 1;
323 }
324 io->strip_off =
325 (stripe->extent_span[e].strip_off + diff) *
326 vol->bsize;
327 io->data_write = stripe->extent_span[e].data_write;
328 io->data_write += diff * vol->bsize;
329 io->vol = vol;
330 io->stripe = stripe;
331
332 hr_fgroup_submit(stripe->worker_group,
333 hr_io_raid5_subtract_writer, io);
334 }
335 }
336
337 bool has_independent = false;
338 range_t independent = { 0, 0 };
339 for (size_t r = 0; r < stripe->range_count; r++) {
340 has_independent = hr_stripe_range_non_extension(
341 &stripe->total_height[r],
342 &stripe->extent_span[bad_extent].range,
343 &independent);
344 if (has_independent) {
345 stripe->ps_to_be_added++;
346
347 hr_io_raid5_t *io =
348 hr_fgroup_alloc(stripe->worker_group);
349 io->extent = stripe->p_extent;
350 io->ba = independent.start;
351 io->cnt = independent.end - independent.start + 1;
352
353 io->strip_off = io->ba;
354 hr_sub_data_offset(vol, &io->strip_off);
355 io->strip_off %= vol->strip_size / vol->bsize;
356 io->strip_off *= vol->bsize;
357
358 io->vol = vol;
359 io->stripe = stripe;
360
361 hr_fgroup_submit(stripe->worker_group,
362 hr_io_raid5_reconstruct_reader, io);
363 }
364
365 hr_io_raid5_t *pio = hr_fgroup_alloc(stripe->worker_group);
366 pio->extent = stripe->p_extent;
367 pio->ba = stripe->total_height[r].start;
368 pio->cnt = stripe->total_height[r].end -
369 stripe->total_height[r].start + 1;
370 pio->strip_off = pio->ba;
371 hr_sub_data_offset(vol, &pio->strip_off);
372 pio->strip_off %= vol->strip_size / vol->bsize;
373 pio->strip_off *= vol->bsize;
374 pio->vol = vol;
375 pio->stripe = stripe;
376
377 hr_fgroup_submit(stripe->worker_group,
378 hr_io_raid5_parity_writer, pio);
379 }
380
381 stripe->p_count_final = true;
382 fibril_condvar_broadcast(&stripe->ps_added_cv);
383}
384
385static void execute_stripe_degraded(hr_stripe_t *stripe, size_t bad_extent)
386{
387 hr_volume_t *vol = stripe->vol;
388
389 /* parity is bad, issue non-redundant writes */
390 if (bad_extent == stripe->p_extent) {
391 stripe->worker_group =
392 hr_fgroup_create(vol->fge, stripe->strips_touched);
393
394 for (size_t e = 0; e < vol->extent_no; e++) {
395 if (e == bad_extent)
396 continue;
397 if (stripe->extent_span[e].cnt == 0)
398 continue;
399
400 hr_io_raid5_t *io =
401 hr_fgroup_alloc(stripe->worker_group);
402 io->extent = e;
403 io->data_write = stripe->extent_span[e].data_write;
404 io->ba = stripe->extent_span[e].range.start;
405 io->cnt = stripe->extent_span[e].cnt;
406 io->strip_off =
407 stripe->extent_span[e].strip_off * vol->bsize;
408 io->vol = vol;
409 io->stripe = stripe;
410
411 hr_fgroup_submit(stripe->worker_group,
412 hr_io_raid5_basic_writer, io);
413 }
414
415 return;
416 }
417
418 stripe->range_count = hr_stripe_merge_extent_spans(stripe,
419 vol->extent_no, stripe->total_height);
420
421 if (stripe->extent_span[bad_extent].cnt > 0)
422 execute_stripe_degraded_mixed(stripe, bad_extent);
423 else
424 execute_stripe_degraded_good(stripe, bad_extent);
425}
426
427static void execute_stripe_optimal_reconstruct(hr_stripe_t *stripe)
428{
429 hr_volume_t *vol = stripe->vol;
430
431 stripe->range_count = hr_stripe_merge_extent_spans(stripe,
432 vol->extent_no, stripe->total_height);
433
434 bool full_stripe = false;
435 size_t worker_cnt;
436 if (stripe->strips_touched == vol->extent_no - 1 &&
437 stripe->partial_strips_touched == 0) {
438 /* full-stripe */
439 worker_cnt = stripe->strips_touched; /* writers */
440 worker_cnt += 1; /* parity writer */
441
442 stripe->ps_to_be_added = stripe->strips_touched;
443 stripe->p_count_final = true;
444
445 full_stripe = true;
446 } else {
447 worker_cnt = stripe->strips_touched; /* writers */
448
449 /* readers (upper bound) */
450 worker_cnt += (vol->extent_no - 1) - stripe->strips_touched;
451 worker_cnt += stripe->partial_strips_touched;
452
453 worker_cnt += stripe->range_count; /* parity writer(s) */
454
455 stripe->ps_to_be_added = stripe->strips_touched; /* writers */
456 }
457
458 stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
459
460 for (size_t e = 0; e < vol->extent_no; e++) {
461 if (e == stripe->p_extent)
462 continue;
463
464 if (stripe->extent_span[e].cnt == 0)
465 continue;
466
467 hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
468 io->extent = e;
469 io->data_write = stripe->extent_span[e].data_write;
470 io->ba = stripe->extent_span[e].range.start;
471 io->cnt = stripe->extent_span[e].cnt;
472 io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
473 io->vol = vol;
474 io->stripe = stripe;
475
476 hr_fgroup_submit(stripe->worker_group, hr_io_raid5_writer, io);
477 }
478
479 for (size_t r = 0; r < stripe->range_count; r++) {
480 if (full_stripe)
481 goto skip_readers;
482 for (size_t e = 0; e < vol->extent_no; e++) {
483 if (e == stripe->p_extent)
484 continue;
485
486 range_t range_extension = { 0, 0 };
487
488 bool need_reader = false;
489 if (stripe->extent_span[e].cnt == 0) {
490 range_extension = stripe->total_height[r];
491 need_reader = true;
492 } else {
493 need_reader = hr_stripe_range_non_extension(
494 &stripe->total_height[r],
495 &stripe->extent_span[e].range,
496 &range_extension);
497 }
498
499 if (need_reader) {
500 stripe->ps_to_be_added++;
501
502 hr_io_raid5_t *io =
503 hr_fgroup_alloc(stripe->worker_group);
504 io->extent = e;
505 io->ba = range_extension.start;
506 io->cnt = range_extension.end -
507 range_extension.start + 1;
508 io->vol = vol;
509 io->stripe = stripe;
510
511 io->strip_off = io->ba;
512 hr_sub_data_offset(vol, &io->strip_off);
513 io->strip_off %= vol->strip_size / vol->bsize;
514 io->strip_off *= vol->bsize;
515
516 hr_fgroup_submit(stripe->worker_group,
517 hr_io_raid5_reconstruct_reader, io);
518 }
519 }
520
521 stripe->p_count_final = true;
522 fibril_condvar_broadcast(&stripe->ps_added_cv);
523
524 skip_readers:
525
526 /* parity writer */
527 hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
528 io->extent = stripe->p_extent;
529 io->ba = stripe->total_height[r].start;
530 io->cnt = stripe->total_height[r].end -
531 stripe->total_height[r].start + 1;
532 io->vol = vol;
533 io->stripe = stripe;
534
535 io->strip_off = io->ba;
536 hr_sub_data_offset(vol, &io->strip_off);
537 io->strip_off %= vol->strip_size / vol->bsize;
538 io->strip_off *= vol->bsize;
539
540 hr_fgroup_submit(stripe->worker_group,
541 hr_io_raid5_parity_writer, io);
542 }
543}
544
545static void execute_stripe_optimal_subtract(hr_stripe_t *stripe)
546{
547 hr_volume_t *vol = stripe->vol;
548
549 stripe->range_count = hr_stripe_merge_extent_spans(stripe,
550 vol->extent_no, stripe->total_height);
551
552 size_t worker_cnt;
553 worker_cnt = stripe->strips_touched; /* writers */
554 worker_cnt += stripe->range_count * 2; /* parity readers & writers */
555
556 stripe->ps_to_be_added = stripe->strips_touched; /* writers */
557 stripe->ps_to_be_added += stripe->range_count; /* parity readers */
558 stripe->p_count_final = true;
559
560 stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
561
562 for (size_t e = 0; e < vol->extent_no; e++) {
563 if (e == stripe->p_extent)
564 continue;
565
566 if (stripe->extent_span[e].cnt == 0)
567 continue;
568
569 hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
570 io->extent = e;
571 io->data_write = stripe->extent_span[e].data_write;
572 io->ba = stripe->extent_span[e].range.start;
573 io->cnt = stripe->extent_span[e].cnt;
574 io->strip_off = stripe->extent_span[e].strip_off * vol->bsize;
575 io->vol = vol;
576 io->stripe = stripe;
577
578 hr_fgroup_submit(stripe->worker_group,
579 hr_io_raid5_subtract_writer, io);
580 }
581
582 for (size_t r = 0; r < stripe->range_count; r++) {
583 hr_io_raid5_t *p_reader = hr_fgroup_alloc(stripe->worker_group);
584 p_reader->extent = stripe->p_extent;
585 p_reader->ba = stripe->total_height[r].start;
586 p_reader->cnt = stripe->total_height[r].end -
587 stripe->total_height[r].start + 1;
588 p_reader->vol = vol;
589 p_reader->stripe = stripe;
590
591 p_reader->strip_off = p_reader->ba;
592 hr_sub_data_offset(vol, &p_reader->strip_off);
593 p_reader->strip_off %= vol->strip_size / vol->bsize;
594 p_reader->strip_off *= vol->bsize;
595
596 hr_fgroup_submit(stripe->worker_group,
597 hr_io_raid5_reconstruct_reader, p_reader);
598
599 hr_io_raid5_t *p_writer = hr_fgroup_alloc(stripe->worker_group);
600 p_writer->extent = stripe->p_extent;
601 p_writer->ba = stripe->total_height[r].start;
602 p_writer->cnt = stripe->total_height[r].end -
603 stripe->total_height[r].start + 1;
604 p_writer->vol = vol;
605 p_writer->stripe = stripe;
606
607 p_writer->strip_off = p_writer->ba;
608 hr_sub_data_offset(vol, &p_writer->strip_off);
609 p_writer->strip_off %= vol->strip_size / vol->bsize;
610 p_writer->strip_off *= vol->bsize;
611
612 hr_fgroup_submit(stripe->worker_group,
613 hr_io_raid5_parity_writer, p_writer);
614 }
615
616}
617
618static void execute_write_stripe(hr_stripe_t *stripe, size_t bad_extent)
619{
620 hr_volume_t *vol = stripe->vol;
621
622 if (bad_extent < vol->extent_no) {
623 execute_stripe_degraded(stripe, bad_extent);
624 return;
625 }
626
627 if (stripe->subtract)
628 execute_stripe_optimal_subtract(stripe);
629 else
630 execute_stripe_optimal_reconstruct(stripe);
631}
632
633static void execute_read_stripe(hr_stripe_t *stripe, size_t bad_extent)
634{
635 hr_volume_t *vol = stripe->vol;
636
637 /* no parity involved */
638 if (bad_extent == vol->extent_no ||
639 bad_extent == stripe->p_extent ||
640 stripe->extent_span[bad_extent].cnt == 0) {
641 stripe->worker_group =
642 hr_fgroup_create(vol->fge, stripe->strips_touched);
643 for (size_t e = 0; e < vol->extent_no; e++) {
644 if (e == bad_extent || e == stripe->p_extent)
645 continue;
646 if (stripe->extent_span[e].cnt == 0)
647 continue;
648
649 hr_io_raid5_t *io =
650 hr_fgroup_alloc(stripe->worker_group);
651 io->extent = e;
652 io->data_read = stripe->extent_span[e].data_read;
653 io->ba = stripe->extent_span[e].range.start;
654 io->cnt = stripe->extent_span[e].cnt;
655 io->strip_off =
656 stripe->extent_span[e].strip_off * vol->bsize;
657 io->vol = vol;
658 io->stripe = stripe;
659
660 hr_fgroup_submit(stripe->worker_group,
661 hr_io_raid5_basic_reader, io);
662 }
663
664 return;
665 }
666
667 /* parity involved */
668
669 size_t worker_cnt = (vol->extent_no - 2) * 2 + 1; /* upper bound */
670 stripe->worker_group = hr_fgroup_create(vol->fge, worker_cnt);
671
672 stripe->ps_to_be_added = 0;
673
674 for (size_t e = 0; e < vol->extent_no; e++) {
675 if (e == bad_extent || e == stripe->p_extent)
676 continue;
677
678 range_t uncommon = { 0, 0 };
679 bool has_uncommon;
680 has_uncommon = hr_stripe_range_non_extension(
681 &stripe->extent_span[bad_extent].range,
682 &stripe->extent_span[e].range,
683 &uncommon);
684
685 if (stripe->extent_span[e].cnt == 0 || has_uncommon) {
686
687 stripe->ps_to_be_added++;
688
689 hr_io_raid5_t *io =
690 hr_fgroup_alloc(stripe->worker_group);
691 io->extent = e;
692 if (stripe->extent_span[bad_extent].cnt == 0) {
693 io->ba =
694 stripe->extent_span[bad_extent].range.start;
695 io->cnt = stripe->extent_span[bad_extent].cnt;
696 } else {
697 io->ba = uncommon.start;
698 io->cnt = uncommon.end - uncommon.start + 1;
699 }
700 io->strip_off =
701 stripe->extent_span[bad_extent].strip_off *
702 vol->bsize;
703 io->vol = vol;
704 io->stripe = stripe;
705
706 hr_fgroup_submit(stripe->worker_group,
707 hr_io_raid5_reconstruct_reader, io);
708
709 if (stripe->extent_span[e].cnt == 0)
710 continue;
711 }
712
713 range_t overlap_range;
714 bool overlap_up = true;
715 if (hr_ranges_overlap(&stripe->extent_span[e].range,
716 &stripe->extent_span[bad_extent].range,
717 &overlap_range)) {
718
719 stripe->ps_to_be_added++;
720
721 hr_io_raid5_t *io =
722 hr_fgroup_alloc(stripe->worker_group);
723 io->extent = e;
724 io->ba = overlap_range.start;
725 io->cnt = overlap_range.end - overlap_range.start + 1;
726
727 size_t diff = overlap_range.start -
728 stripe->extent_span[e].range.start;
729 io->strip_off =
730 (stripe->extent_span[e].strip_off + diff) *
731 vol->bsize;
732
733 io->data_read = stripe->extent_span[e].data_read;
734 io->data_read += diff * vol->bsize;
735 if (diff == 0)
736 overlap_up = false;
737
738 io->vol = vol;
739 io->stripe = stripe;
740
741 hr_fgroup_submit(stripe->worker_group,
742 hr_io_raid5_reader, io);
743 }
744
745 bool has_independent;
746 range_t independent = { 0, 0 };
747 has_independent = hr_stripe_range_non_extension(
748 &stripe->extent_span[e].range,
749 &uncommon,
750 &independent);
751 if (has_independent) {
752 hr_io_raid5_t *io =
753 hr_fgroup_alloc(stripe->worker_group);
754 io->extent = e;
755 io->ba = independent.start;
756 io->cnt = independent.end - independent.start + 1;
757 size_t diff = 0;
758 if (!overlap_up) {
759 diff =
760 overlap_range.end - overlap_range.start + 1;
761 }
762 io->strip_off =
763 (stripe->extent_span[e].strip_off + diff) *
764 vol->bsize;
765 io->data_read = stripe->extent_span[e].data_read;
766 io->data_read += diff * vol->bsize;
767 io->vol = vol;
768 io->stripe = stripe;
769
770 hr_fgroup_submit(stripe->worker_group,
771 hr_io_raid5_basic_reader, io);
772 }
773 }
774
775 stripe->ps_to_be_added++;
776
777 hr_io_raid5_t *io = hr_fgroup_alloc(stripe->worker_group);
778 io->extent = stripe->p_extent;
779 io->ba = stripe->extent_span[bad_extent].range.start;
780 io->cnt = stripe->extent_span[bad_extent].cnt;
781 io->strip_off = stripe->extent_span[bad_extent].strip_off * vol->bsize;
782 io->vol = vol;
783 io->stripe = stripe;
784
785 hr_fgroup_submit(stripe->worker_group, hr_io_raid5_reconstruct_reader,
786 io);
787
788 stripe->p_count_final = true;
789 fibril_condvar_broadcast(&stripe->ps_added_cv);
790
791 hr_io_raid5_t *pcopier_io = hr_fgroup_alloc(stripe->worker_group);
792 pcopier_io->cnt = stripe->extent_span[bad_extent].cnt;
793 pcopier_io->strip_off =
794 stripe->extent_span[bad_extent].strip_off * vol->bsize;
795 pcopier_io->data_read = stripe->extent_span[bad_extent].data_read;
796 pcopier_io->vol = vol;
797 pcopier_io->stripe = stripe;
798
799 hr_fgroup_submit(stripe->worker_group, hr_io_raid5_parity_getter,
800 pcopier_io);
801}
802
803/** Get non-overlapping part of 2 ranges.
804 *
805 * Return part of @param r1 not in @param r2.
806 *
807 * @param r1 Main range.
808 * @param r2 Queried range.
809 * @param out Place to store resulting range.
810 *
811 * @return true if output range is non-empty, else false.
812 */
813static bool hr_stripe_range_non_extension(const range_t *r1, const range_t *r2,
814 range_t *out)
815{
816 if (r1->end < r2->start) {
817 *out = *r1;
818 return true;
819 }
820
821 if (r1->start > r2->end) {
822 *out = *r1;
823 return true;
824 }
825
826 if (r1->start < r2->start && r1->end >= r2->start) {
827 out->start = r1->start;
828 out->end = r2->start - 1;
829 return out->start <= out->end;
830 }
831
832 if (r1->start <= r2->end && r1->end > r2->end) {
833 out->start = r2->end + 1;
834 out->end = r1->end;
835 return out->start <= out->end;
836 }
837
838 return false;
839}
840
841/** Merge adjascent or overlapping extent spans.
842 *
843 * @param s Stripe.
844 * @param extent_no Number of extents.
845 * @param out Place to store resulting ranges.
846 *
847 * @return Number of resulting ranges.
848 */
849static size_t hr_stripe_merge_extent_spans(hr_stripe_t *s, size_t extent_no,
850 range_t out[2])
851{
852 size_t out_count = 0;
853
854 for (size_t i = 0; i < extent_no; i++) {
855 if (s->extent_span[i].cnt == 0)
856 continue;
857 const range_t *r = &s->extent_span[i].range;
858 bool merged = false;
859
860 for (size_t j = 0; j < out_count; j++) {
861 if (hr_ranges_overlap(&out[j], r, NULL)) {
862 hr_stripe_extend_range(&out[j], r);
863 merged = true;
864
865 if (out_count == 2 &&
866 hr_ranges_overlap(&out[0], &out[1], NULL)) {
867 hr_stripe_extend_range(&out[0], &out[1]);
868 out_count = 1;
869 }
870
871 break;
872 }
873 }
874
875 if (!merged) {
876 assert(out_count < 2);
877 out[out_count++] = *r;
878 }
879 }
880
881 return out_count;
882}
883
884/** Extend a range.
885 *
886 * @param r1 Output range.
887 * @param r2 Range to extend the output one with.
888 *
889 */
890static void hr_stripe_extend_range(range_t *r1, const range_t *r2)
891{
892 if (r2->start < r1->start)
893 r1->start = r2->start;
894 if (r2->end > r1->end)
895 r1->end = r2->end;
896}
897
898static bool hr_ranges_overlap(const range_t *a, const range_t *b, range_t *out)
899{
900 uint64_t start = a->start > b->start ? a->start : b->start;
901 uint64_t end = a->end < b->end ? a->end : b->end;
902
903 if (start <= end) {
904 if (out != NULL) {
905 out->start = start;
906 out->end = end;
907 }
908
909 return true;
910 }
911
912 return false;
913}
914
915/** @}
916 */
Note: See TracBrowser for help on using the repository browser.