source: mainline/uspace/srv/bd/hr/raid1.c@ e2b417f

Last change on this file since e2b417f was e2b417f, checked in by Miroslav Cimerman <mc@…>, 6 months ago

hr: RAID1: use update_svc_id() wrappers from util

  • Property mode set to 100644
File size: 19.4 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <bd_srv.h>
37#include <block.h>
38#include <errno.h>
39#include <hr.h>
40#include <io/log.h>
41#include <ipc/hr.h>
42#include <ipc/services.h>
43#include <loc.h>
44#include <task.h>
45#include <stdatomic.h>
46#include <stdio.h>
47#include <stdlib.h>
48#include <str_error.h>
49
50#include "fge.h"
51#include "io.h"
52#include "superblock.h"
53#include "util.h"
54#include "var.h"
55
56extern loc_srv_t *hr_srv;
57
58static void process_deferred_invalidations(hr_volume_t *);
59static void hr_raid1_update_vol_status(hr_volume_t *);
60static void hr_raid1_ext_state_callback(hr_volume_t *, size_t, errno_t);
61static size_t hr_raid1_count_good_extents(hr_volume_t *, uint64_t, size_t,
62 uint64_t);
63static errno_t hr_raid1_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t,
64 void *, const void *, size_t);
65static errno_t swap_hs(hr_volume_t *, size_t, size_t);
66static errno_t init_rebuild(hr_volume_t *, size_t *);
67static errno_t hr_raid1_rebuild(void *);
68
69/* bdops */
70static errno_t hr_raid1_bd_open(bd_srvs_t *, bd_srv_t *);
71static errno_t hr_raid1_bd_close(bd_srv_t *);
72static errno_t hr_raid1_bd_read_blocks(bd_srv_t *, aoff64_t, size_t, void *,
73 size_t);
74static errno_t hr_raid1_bd_sync_cache(bd_srv_t *, aoff64_t, size_t);
75static errno_t hr_raid1_bd_write_blocks(bd_srv_t *, aoff64_t, size_t,
76 const void *, size_t);
77static errno_t hr_raid1_bd_get_block_size(bd_srv_t *, size_t *);
78static errno_t hr_raid1_bd_get_num_blocks(bd_srv_t *, aoff64_t *);
79
80static bd_ops_t hr_raid1_bd_ops = {
81 .open = hr_raid1_bd_open,
82 .close = hr_raid1_bd_close,
83 .sync_cache = hr_raid1_bd_sync_cache,
84 .read_blocks = hr_raid1_bd_read_blocks,
85 .write_blocks = hr_raid1_bd_write_blocks,
86 .get_block_size = hr_raid1_bd_get_block_size,
87 .get_num_blocks = hr_raid1_bd_get_num_blocks
88};
89
90errno_t hr_raid1_create(hr_volume_t *new_volume)
91{
92 errno_t rc;
93
94 assert(new_volume->level == HR_LVL_1);
95
96 if (new_volume->extent_no < 2) {
97 HR_ERROR("RAID 1 array needs at least 2 devices\n");
98 return EINVAL;
99 }
100
101 bd_srvs_init(&new_volume->hr_bds);
102 new_volume->hr_bds.ops = &hr_raid1_bd_ops;
103 new_volume->hr_bds.sarg = new_volume;
104
105 hr_raid1_update_vol_status(new_volume);
106 if (new_volume->status == HR_VOL_FAULTY)
107 return EINVAL;
108
109 rc = hr_register_volume(new_volume);
110
111 return rc;
112}
113
114errno_t hr_raid1_init(hr_volume_t *vol)
115{
116 errno_t rc;
117 size_t bsize;
118 uint64_t total_blkno;
119
120 assert(vol->level == HR_LVL_1);
121
122 rc = hr_check_devs(vol, &total_blkno, &bsize);
123 if (rc != EOK)
124 return rc;
125
126 vol->nblocks = total_blkno / vol->extent_no;
127 vol->bsize = bsize;
128 vol->data_offset = HR_DATA_OFF;
129 vol->data_blkno = vol->nblocks - vol->data_offset;
130 vol->strip_size = 0;
131
132 return EOK;
133}
134
135void hr_raid1_status_event(hr_volume_t *vol)
136{
137 hr_raid1_update_vol_status(vol);
138}
139
140errno_t hr_raid1_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
141{
142 HR_DEBUG("hr_raid1_add_hotspare()\n");
143
144 errno_t rc = EOK;
145
146 fibril_mutex_lock(&vol->hotspare_lock);
147
148 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
149 HR_ERROR("hr_raid1_add_hotspare(): cannot add more hotspares "
150 "to \"%s\"\n", vol->devname);
151 rc = ELIMIT;
152 goto error;
153 }
154
155 size_t hs_idx = vol->hotspare_no;
156
157 vol->hotspare_no++;
158
159 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
160 hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE);
161
162 /*
163 * If the volume is degraded, start rebuild right away.
164 */
165 if (vol->status == HR_VOL_DEGRADED) {
166 HR_DEBUG("hr_raid1_add_hotspare(): volume in DEGRADED state, "
167 "spawning new rebuild fibril\n");
168 fid_t fib = fibril_create(hr_raid1_rebuild, vol);
169 if (fib == 0) {
170 rc = ENOMEM;
171 goto error;
172 }
173 fibril_start(fib);
174 fibril_detach(fib);
175 }
176
177error:
178 fibril_mutex_unlock(&vol->hotspare_lock);
179
180 return rc;
181}
182
183static errno_t hr_raid1_bd_open(bd_srvs_t *bds, bd_srv_t *bd)
184{
185 HR_DEBUG("hr_bd_open()\n");
186 return EOK;
187}
188
189static errno_t hr_raid1_bd_close(bd_srv_t *bd)
190{
191 HR_DEBUG("hr_bd_close()\n");
192 return EOK;
193}
194
195static errno_t hr_raid1_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt)
196{
197 return hr_raid1_bd_op(HR_BD_SYNC, bd, ba, cnt, NULL, NULL, 0);
198}
199
200static errno_t hr_raid1_bd_read_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
201 void *buf, size_t size)
202{
203 return hr_raid1_bd_op(HR_BD_READ, bd, ba, cnt, buf, NULL, size);
204}
205
206static errno_t hr_raid1_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
207 const void *data, size_t size)
208{
209 return hr_raid1_bd_op(HR_BD_WRITE, bd, ba, cnt, NULL, data, size);
210}
211
212static errno_t hr_raid1_bd_get_block_size(bd_srv_t *bd, size_t *rsize)
213{
214 hr_volume_t *vol = bd->srvs->sarg;
215
216 *rsize = vol->bsize;
217 return EOK;
218}
219
220static errno_t hr_raid1_bd_get_num_blocks(bd_srv_t *bd, aoff64_t *rnb)
221{
222 hr_volume_t *vol = bd->srvs->sarg;
223
224 *rnb = vol->data_blkno;
225 return EOK;
226}
227
228static void process_deferred_invalidations(hr_volume_t *vol)
229{
230 HR_DEBUG("hr_raid1_update_vol_status(): deferred invalidations\n");
231
232 fibril_mutex_lock(&vol->halt_lock);
233 vol->halt_please = true;
234 fibril_rwlock_write_lock(&vol->extents_lock);
235 fibril_rwlock_write_lock(&vol->states_lock);
236 fibril_mutex_lock(&vol->hotspare_lock);
237
238 list_foreach(vol->deferred_invalidations_list, link,
239 hr_deferred_invalidation_t, di) {
240 assert(vol->extents[di->index].status == HR_EXT_INVALID);
241
242 HR_DEBUG("moving invalidated extent no. %lu to hotspares\n",
243 di->index);
244
245 block_fini(di->svc_id);
246
247 size_t hs_idx = vol->hotspare_no;
248
249 vol->hotspare_no++;
250
251 hr_update_hotspare_svc_id(vol, hs_idx, di->svc_id);
252 hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE);
253
254 hr_update_ext_svc_id(vol, di->index, 0);
255 hr_update_ext_status(vol, di->index, HR_EXT_MISSING);
256
257 assert(vol->hotspare_no < HR_MAX_HOTSPARES + HR_MAX_EXTENTS);
258 }
259
260 for (size_t i = 0; i < HR_MAX_EXTENTS; i++) {
261 hr_deferred_invalidation_t *di = &vol->deferred_inval[i];
262 if (di->svc_id != 0) {
263 list_remove(&di->link);
264 di->svc_id = 0;
265 }
266 }
267
268 fibril_mutex_unlock(&vol->hotspare_lock);
269 fibril_rwlock_write_unlock(&vol->states_lock);
270 fibril_rwlock_write_unlock(&vol->extents_lock);
271 vol->halt_please = false;
272 fibril_mutex_unlock(&vol->halt_lock);
273}
274
275static void hr_raid1_update_vol_status(hr_volume_t *vol)
276{
277 fibril_mutex_lock(&vol->deferred_list_lock);
278
279 if (list_count(&vol->deferred_invalidations_list) > 0)
280 process_deferred_invalidations(vol);
281
282 fibril_mutex_unlock(&vol->deferred_list_lock);
283
284 fibril_rwlock_read_lock(&vol->extents_lock);
285 fibril_rwlock_read_lock(&vol->states_lock);
286
287 hr_vol_status_t old_state = vol->status;
288 size_t healthy = hr_count_extents(vol, HR_EXT_ONLINE);
289
290 fibril_rwlock_read_unlock(&vol->states_lock);
291 fibril_rwlock_read_unlock(&vol->extents_lock);
292
293 if (healthy == 0) {
294 if (old_state != HR_VOL_FAULTY) {
295 fibril_rwlock_write_lock(&vol->states_lock);
296 hr_update_vol_status(vol, HR_VOL_FAULTY);
297 fibril_rwlock_write_unlock(&vol->states_lock);
298 }
299 } else if (healthy < vol->extent_no) {
300 if (old_state != HR_VOL_REBUILD) {
301 if (old_state != HR_VOL_DEGRADED) {
302 fibril_rwlock_write_lock(&vol->states_lock);
303 hr_update_vol_status(vol, HR_VOL_DEGRADED);
304 fibril_rwlock_write_unlock(&vol->states_lock);
305 }
306
307 if (vol->hotspare_no > 0) {
308 fid_t fib = fibril_create(hr_raid1_rebuild,
309 vol);
310 if (fib == 0)
311 return;
312 fibril_start(fib);
313 fibril_detach(fib);
314 }
315 }
316 } else {
317 if (old_state != HR_VOL_ONLINE) {
318 fibril_rwlock_write_lock(&vol->states_lock);
319 hr_update_vol_status(vol, HR_VOL_ONLINE);
320 fibril_rwlock_write_unlock(&vol->states_lock);
321 }
322 }
323}
324
325static void hr_raid1_ext_state_callback(hr_volume_t *vol, size_t extent,
326 errno_t rc)
327{
328 if (rc == EOK)
329 return;
330
331 assert(fibril_rwlock_is_locked(&vol->extents_lock));
332
333 fibril_rwlock_write_lock(&vol->states_lock);
334
335 switch (rc) {
336 case ENOMEM:
337 fibril_mutex_lock(&vol->deferred_list_lock);
338
339 service_id_t invalid_svc_id = vol->extents[extent].svc_id;
340
341 list_foreach(vol->deferred_invalidations_list, link,
342 hr_deferred_invalidation_t, di) {
343 if (di->svc_id == invalid_svc_id) {
344 assert(vol->extents[extent].status ==
345 HR_EXT_INVALID);
346 goto done;
347 }
348 }
349
350 assert(vol->extents[extent].svc_id != HR_EXT_INVALID);
351
352 hr_update_ext_status(vol, extent, HR_EXT_INVALID);
353
354 size_t i = list_count(&vol->deferred_invalidations_list);
355 vol->deferred_inval[i].svc_id = invalid_svc_id;
356 vol->deferred_inval[i].index = extent;
357
358 list_append(&vol->deferred_inval[i].link,
359 &vol->deferred_invalidations_list);
360 done:
361 fibril_mutex_unlock(&vol->deferred_list_lock);
362 break;
363 case ENOENT:
364 hr_update_ext_status(vol, extent, HR_EXT_MISSING);
365 break;
366 default:
367 hr_update_ext_status(vol, extent, HR_EXT_FAILED);
368 }
369
370 fibril_rwlock_write_unlock(&vol->states_lock);
371}
372
373static size_t hr_raid1_count_good_extents(hr_volume_t *vol, uint64_t ba,
374 size_t cnt, uint64_t rebuild_blk)
375{
376 assert(fibril_rwlock_is_locked(&vol->extents_lock));
377 assert(fibril_rwlock_is_locked(&vol->states_lock));
378
379 size_t count = 0;
380 for (size_t i = 0; i < vol->extent_no; i++) {
381 if (vol->extents[i].status == HR_EXT_ONLINE ||
382 (vol->extents[i].status == HR_EXT_REBUILD &&
383 ba < rebuild_blk)) {
384 count++;
385 }
386 }
387
388 return count;
389
390}
391
392static errno_t hr_raid1_bd_op(hr_bd_op_type_t type, bd_srv_t *bd, aoff64_t ba,
393 size_t cnt, void *data_read, const void *data_write, size_t size)
394{
395 hr_volume_t *vol = bd->srvs->sarg;
396 hr_range_lock_t *rl = NULL;
397 errno_t rc;
398 size_t i;
399 uint64_t rebuild_blk;
400
401 fibril_rwlock_read_lock(&vol->states_lock);
402 hr_vol_status_t vol_state = vol->status;
403 fibril_rwlock_read_unlock(&vol->states_lock);
404
405 if (vol_state == HR_VOL_FAULTY)
406 return EIO;
407
408 if (type == HR_BD_READ || type == HR_BD_WRITE)
409 if (size < cnt * vol->bsize)
410 return EINVAL;
411
412 rc = hr_check_ba_range(vol, cnt, ba);
413 if (rc != EOK)
414 return rc;
415
416 /* allow full dev sync */
417 if (type != HR_BD_SYNC || ba != 0)
418 hr_add_ba_offset(vol, &ba);
419
420 /*
421 * this is to allow adding hotspare or start a rebuild on
422 * very busy array, because of how rwlocks are implemented
423 * in HelenOS (no writer priority, so if there are multiple
424 * continuos readers, writer will never own the lock)
425 */
426 if (vol->halt_please) {
427 fibril_mutex_lock(&vol->halt_lock);
428 fibril_mutex_unlock(&vol->halt_lock);
429 }
430
431 /*
432 * extent order has to be locked for the whole IO duration,
433 * so that workers have consistent targets
434 */
435 fibril_rwlock_read_lock(&vol->extents_lock);
436
437 size_t successful = 0;
438 switch (type) {
439 case HR_BD_READ:
440 rebuild_blk = atomic_load_explicit(&vol->rebuild_blk,
441 memory_order_relaxed);
442
443 for (i = 0; i < vol->extent_no; i++) {
444 fibril_rwlock_read_lock(&vol->states_lock);
445 hr_ext_status_t state = vol->extents[i].status;
446 fibril_rwlock_read_unlock(&vol->states_lock);
447
448 if (state != HR_EXT_ONLINE &&
449 (state != HR_EXT_REBUILD ||
450 ba + cnt - 1 >= rebuild_blk)) {
451 continue;
452 }
453
454 rc = block_read_direct(vol->extents[i].svc_id, ba, cnt,
455 data_read);
456
457 if (rc == ENOMEM && i + 1 == vol->extent_no)
458 goto end;
459
460 if (rc == ENOMEM)
461 continue;
462
463 if (rc != EOK) {
464 hr_raid1_ext_state_callback(vol, i, rc);
465 } else {
466 successful++;
467 break;
468 }
469 }
470 break;
471 case HR_BD_SYNC:
472 case HR_BD_WRITE:
473 if (type == HR_BD_WRITE) {
474 rl = hr_range_lock_acquire(vol, ba, cnt);
475 if (rl == NULL) {
476 rc = ENOMEM;
477 goto end;
478 }
479 }
480
481 fibril_rwlock_read_lock(&vol->states_lock);
482
483 rebuild_blk = atomic_load_explicit(&vol->rebuild_blk,
484 memory_order_relaxed);
485
486 size_t good = hr_raid1_count_good_extents(vol, ba, cnt,
487 rebuild_blk);
488
489 hr_fgroup_t *group = hr_fgroup_create(vol->fge, good);
490 if (group == NULL) {
491 if (type == HR_BD_WRITE)
492 hr_range_lock_release(rl);
493 rc = ENOMEM;
494 fibril_rwlock_read_unlock(&vol->states_lock);
495 goto end;
496 }
497
498 for (i = 0; i < vol->extent_no; i++) {
499 if (vol->extents[i].status != HR_EXT_ONLINE &&
500 (vol->extents[i].status != HR_EXT_REBUILD ||
501 ba >= rebuild_blk)) {
502 /*
503 * When the extent is being rebuilt,
504 * we only write to the part that is already
505 * rebuilt. If IO starts after vol->rebuild_blk
506 * we do not proceed, the write is going to
507 * be replicated later in the rebuild.
508 */
509 continue;
510 }
511
512 hr_io_t *io = hr_fgroup_alloc(group);
513 io->extent = i;
514 io->data_write = data_write;
515 io->data_read = data_read;
516 io->ba = ba;
517 io->cnt = cnt;
518 io->type = type;
519 io->vol = vol;
520 io->state_callback = hr_raid1_ext_state_callback;
521
522 hr_fgroup_submit(group, hr_io_worker, io);
523 }
524
525 fibril_rwlock_read_unlock(&vol->states_lock);
526
527 (void)hr_fgroup_wait(group, &successful, NULL);
528
529 if (type == HR_BD_WRITE)
530 hr_range_lock_release(rl);
531
532 break;
533 default:
534 rc = EINVAL;
535 goto end;
536 }
537
538 if (successful > 0)
539 rc = EOK;
540 else
541 rc = EIO;
542
543end:
544 fibril_rwlock_read_unlock(&vol->extents_lock);
545
546 hr_raid1_update_vol_status(vol);
547
548 return rc;
549}
550
551static errno_t swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
552{
553 HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n");
554
555 service_id_t faulty_svc_id = vol->extents[bad].svc_id;
556 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
557
558 errno_t rc = block_init(hs_svc_id);
559 if (rc != EOK) {
560 HR_ERROR("hr_raid1_rebuild(): initing hotspare (%lu) failed\n",
561 hs_svc_id);
562 return rc;
563 }
564
565 hr_update_ext_svc_id(vol, bad, hs_svc_id);
566 hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE);
567
568 hr_update_hotspare_svc_id(vol, hs, 0);
569 hr_update_hotspare_status(vol, hs, HR_EXT_INVALID);
570
571 vol->hotspare_no--;
572
573 if (faulty_svc_id != 0)
574 block_fini(faulty_svc_id);
575
576 return EOK;
577}
578
579static errno_t init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)
580{
581 errno_t rc = EOK;
582
583 fibril_mutex_lock(&vol->halt_lock);
584 vol->halt_please = true;
585 fibril_rwlock_write_lock(&vol->extents_lock);
586 fibril_rwlock_write_lock(&vol->states_lock);
587 fibril_mutex_lock(&vol->hotspare_lock);
588
589 if (vol->hotspare_no == 0) {
590 HR_WARN("hr_raid1_rebuild(): no free hotspares on \"%s\", "
591 "aborting rebuild\n", vol->devname);
592 rc = EINVAL;
593 goto error;
594 }
595
596 size_t bad = vol->extent_no;
597 for (size_t i = 0; i < vol->extent_no; i++) {
598 if (vol->extents[i].status != HR_EXT_ONLINE) {
599 bad = i;
600 break;
601 }
602 }
603
604 if (bad == vol->extent_no) {
605 HR_WARN("hr_raid1_rebuild(): no bad extent on \"%s\", "
606 "aborting rebuild\n", vol->devname);
607 rc = EINVAL;
608 goto error;
609 }
610
611 size_t hotspare_idx = vol->hotspare_no - 1;
612
613 hr_ext_status_t hs_state = vol->hotspares[hotspare_idx].status;
614 if (hs_state != HR_EXT_HOTSPARE) {
615 HR_ERROR("hr_raid1_rebuild(): invalid hotspare state \"%s\", "
616 "aborting rebuild\n", hr_get_ext_status_msg(hs_state));
617 rc = EINVAL;
618 goto error;
619 }
620
621 rc = swap_hs(vol, bad, hotspare_idx);
622 if (rc != EOK) {
623 HR_ERROR("hr_raid1_rebuild(): swapping hotspare failed, "
624 "aborting rebuild\n");
625 goto error;
626 }
627
628 hr_extent_t *rebuild_ext = &vol->extents[bad];
629
630 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %lu (%lu)"
631 "\n", bad, rebuild_ext->svc_id);
632
633 atomic_store_explicit(&vol->rebuild_blk, 0, memory_order_relaxed);
634
635 hr_update_ext_status(vol, bad, HR_EXT_REBUILD);
636 hr_update_vol_status(vol, HR_VOL_REBUILD);
637
638 *rebuild_idx = bad;
639error:
640 fibril_mutex_unlock(&vol->hotspare_lock);
641 fibril_rwlock_write_unlock(&vol->states_lock);
642 fibril_rwlock_write_unlock(&vol->extents_lock);
643 vol->halt_please = false;
644 fibril_mutex_unlock(&vol->halt_lock);
645
646 return rc;
647}
648
649static errno_t hr_raid1_restore_blocks(hr_volume_t *vol, size_t rebuild_idx,
650 uint64_t ba, size_t cnt, void *buf)
651{
652 HR_DEBUG("REBUILD restoring blocks (ba: %lu, cnt: %lu)\n", ba, cnt);
653
654 assert(fibril_rwlock_is_locked(&vol->extents_lock));
655
656 errno_t rc = ENOENT;
657 hr_extent_t *ext, *rebuild_ext = &vol->extents[rebuild_idx];
658
659 for (size_t i = 0; i < vol->extent_no; i++) {
660 fibril_rwlock_read_lock(&vol->states_lock);
661
662 ext = &vol->extents[i];
663 if (ext->status != HR_EXT_ONLINE)
664 continue;
665
666 fibril_rwlock_read_unlock(&vol->states_lock);
667
668 rc = block_read_direct(ext->svc_id, ba, cnt, buf);
669 if (rc == EOK)
670 break;
671
672 if (rc != ENOMEM)
673 hr_raid1_ext_state_callback(vol, i, rc);
674
675 if (i + 1 >= vol->extent_no) {
676 HR_ERROR("rebuild on \"%s\" (%lu), failed due to "
677 "too many failed extents\n",
678 vol->devname, vol->svc_id);
679
680 return rc;
681 }
682 }
683
684 rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf);
685 if (rc != EOK) {
686 if (rc != ENOMEM)
687 hr_raid1_ext_state_callback(vol, rebuild_idx, rc);
688
689 HR_ERROR("rebuild on \"%s\" (%lu), failed due to "
690 "the rebuilt extent no. %lu WRITE (rc: %s)\n",
691 vol->devname, vol->svc_id, rebuild_idx, str_error(rc));
692
693 return rc;
694 }
695
696 return EOK;
697}
698
699/*
700 * Put the last HOTSPARE extent in place
701 * of first that != ONLINE, and start the rebuild.
702 */
703static errno_t hr_raid1_rebuild(void *arg)
704{
705 HR_DEBUG("hr_raid1_rebuild()\n");
706
707 hr_volume_t *vol = arg;
708 void *buf = NULL;
709 size_t rebuild_idx;
710 errno_t rc;
711
712 rc = init_rebuild(vol, &rebuild_idx);
713 if (rc != EOK)
714 return rc;
715
716 size_t left = vol->data_blkno;
717 size_t max_blks = DATA_XFER_LIMIT / vol->bsize;
718 buf = malloc(max_blks * vol->bsize);
719
720 size_t cnt;
721 uint64_t ba = 0;
722 hr_add_ba_offset(vol, &ba);
723
724 fibril_rwlock_read_lock(&vol->extents_lock);
725
726 hr_range_lock_t *rl = NULL;
727
728 while (left != 0) {
729 if (vol->halt_please) {
730 fibril_rwlock_read_unlock(&vol->extents_lock);
731 fibril_mutex_lock(&vol->halt_lock);
732 fibril_mutex_unlock(&vol->halt_lock);
733 fibril_rwlock_read_lock(&vol->extents_lock);
734 }
735
736 cnt = min(max_blks, left);
737
738 rl = hr_range_lock_acquire(vol, ba, cnt);
739 if (rl == NULL) {
740 rc = ENOMEM;
741 goto end;
742 }
743
744 atomic_store_explicit(&vol->rebuild_blk, ba,
745 memory_order_relaxed);
746
747 rc = hr_raid1_restore_blocks(vol, rebuild_idx, ba, cnt, buf);
748
749 hr_range_lock_release(rl);
750
751 if (rc != EOK)
752 goto end;
753
754 ba += cnt;
755 left -= cnt;
756 }
757
758 HR_DEBUG("hr_raid1_rebuild(): rebuild finished on \"%s\" (%lu), "
759 "extent no. %lu\n", vol->devname, vol->svc_id, rebuild_idx);
760
761 fibril_rwlock_write_lock(&vol->states_lock);
762 hr_update_ext_status(vol, rebuild_idx, HR_EXT_ONLINE);
763 fibril_rwlock_write_unlock(&vol->states_lock);
764
765 /*
766 * For now write metadata at the end, because
767 * we don't sync metada accross extents yet.
768 */
769 hr_write_meta_to_ext(vol, rebuild_idx);
770end:
771 if (rc != EOK) {
772 fibril_rwlock_write_lock(&vol->states_lock);
773 hr_update_vol_status(vol, HR_VOL_DEGRADED);
774 fibril_rwlock_write_unlock(&vol->states_lock);
775 }
776
777 fibril_rwlock_read_unlock(&vol->extents_lock);
778
779 hr_raid1_update_vol_status(vol);
780
781 if (buf != NULL)
782 free(buf);
783
784 return rc;
785}
786
787/** @}
788 */
Note: See TracBrowser for help on using the repository browser.