source: mainline/uspace/srv/bd/hr/util.c@ 2de7c1f

Last change on this file since 2de7c1f was aa9bad8, checked in by Miroslav Cimerman <mc@…>, 4 months ago

hr: let each format implement own probe

This will allow metadata to have superblocks in
different locations accross versions.

This also greatly reduces the format handling interface,
because now, functions used by the probing done
publicly in superblock.c are now used just
inside specific metadata format code.

  • Property mode set to 100644
File size: 27.3 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <adt/list.h>
37#include <block.h>
38#include <errno.h>
39#include <fibril_synch.h>
40#include <hr.h>
41#include <inttypes.h>
42#include <io/log.h>
43#include <loc.h>
44#include <mem.h>
45#include <stdatomic.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <str.h>
49#include <str_error.h>
50#include <vbd.h>
51
52#include "io.h"
53#include "superblock.h"
54#include "util.h"
55#include "var.h"
56
57static bool hr_range_lock_overlap(hr_range_lock_t *, hr_range_lock_t *);
58static errno_t hr_add_svc_linked_to_list(list_t *, service_id_t, bool, void *);
59static void free_dev_list_member(struct dev_list_member *);
60static void free_svc_id_list(list_t *);
61static errno_t hr_fill_disk_part_svcs_list(list_t *);
62static errno_t block_init_dev_list(list_t *);
63static void block_fini_dev_list(list_t *);
64static errno_t hr_util_get_matching_md_svcs_list(list_t *, list_t *,
65 service_id_t, hr_metadata_type_t, void *);
66static errno_t hr_util_assemble_from_matching_list(list_t *,
67 hr_metadata_type_t);
68static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *, list_t *);
69static errno_t hr_swap_hs(hr_volume_t *, size_t, size_t);
70
71#define HR_RL_LIST_LOCK(vol) (fibril_mutex_lock(&(vol)->range_lock_list_lock))
72#define HR_RL_LIST_UNLOCK(vol) \
73 (fibril_mutex_unlock(&(vol)->range_lock_list_lock))
74
75extern loc_srv_t *hr_srv;
76extern list_t hr_volumes;
77extern fibril_rwlock_t hr_volumes_lock;
78
79/*
80 * malloc() wrapper that behaves like
81 * FreeBSD malloc(9) with M_WAITOK flag.
82 *
83 * Return value is never NULL.
84 */
85void *hr_malloc_waitok(size_t size)
86{
87 void *ret;
88 while ((ret = malloc(size)) == NULL)
89 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
90
91 return ret;
92}
93
94void *hr_calloc_waitok(size_t nmemb, size_t size)
95{
96 void *ret;
97 while ((ret = calloc(nmemb, size)) == NULL)
98 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
99
100 return ret;
101}
102
103errno_t hr_create_vol_struct(hr_volume_t **rvol, hr_level_t level,
104 const char *devname, hr_metadata_type_t metadata_type)
105{
106 HR_DEBUG("%s()", __func__);
107
108 errno_t rc;
109
110 hr_volume_t *vol = calloc(1, sizeof(hr_volume_t));
111 if (vol == NULL)
112 return ENOMEM;
113
114 str_cpy(vol->devname, HR_DEVNAME_LEN, devname);
115 vol->level = level;
116
117 vol->meta_ops = hr_get_meta_type_ops(metadata_type);
118
119 uint8_t meta_flags = vol->meta_ops->get_flags();
120
121 switch (level) {
122 case HR_LVL_0:
123 vol->hr_ops.create = hr_raid0_create;
124 vol->hr_ops.init = hr_raid0_init;
125 vol->hr_ops.vol_state_eval = hr_raid0_vol_state_eval;
126 vol->hr_ops.ext_state_cb = hr_raid0_ext_state_cb;
127 break;
128 case HR_LVL_1:
129 vol->hr_ops.create = hr_raid1_create;
130 vol->hr_ops.init = hr_raid1_init;
131 vol->hr_ops.vol_state_eval = hr_raid1_vol_state_eval;
132 vol->hr_ops.ext_state_cb = hr_raid1_ext_state_cb;
133 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
134 vol->hr_ops.add_hotspare = hr_raid1_add_hotspare;
135 break;
136 case HR_LVL_4:
137 case HR_LVL_5:
138 vol->hr_ops.create = hr_raid5_create;
139 vol->hr_ops.init = hr_raid5_init;
140 vol->hr_ops.vol_state_eval = hr_raid5_vol_state_eval;
141 vol->hr_ops.ext_state_cb = hr_raid5_ext_state_cb;
142 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
143 vol->hr_ops.add_hotspare = hr_raid5_add_hotspare;
144 break;
145 default:
146 HR_DEBUG("unkown level: %d, aborting\n", vol->level);
147 rc = EINVAL;
148 goto error;
149 }
150
151 if (level == HR_LVL_4 || level == HR_LVL_5)
152 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
153 else
154 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
155
156 if (vol->fge == NULL) {
157 rc = ENOMEM;
158 goto error;
159 }
160
161 vol->state = HR_VOL_NONE;
162
163 fibril_mutex_initialize(&vol->md_lock);
164
165 fibril_rwlock_initialize(&vol->extents_lock);
166 fibril_rwlock_initialize(&vol->states_lock);
167
168 fibril_mutex_initialize(&vol->hotspare_lock);
169
170 list_initialize(&vol->range_lock_list);
171 fibril_mutex_initialize(&vol->range_lock_list_lock);
172
173 atomic_init(&vol->state_dirty, false);
174 atomic_init(&vol->first_write, false);
175 atomic_init(&vol->rebuild_blk, 0);
176 atomic_init(&vol->open_cnt, 0);
177
178 *rvol = vol;
179
180 return EOK;
181error:
182 free(vol);
183 return rc;
184}
185
186void hr_destroy_vol_struct(hr_volume_t *vol)
187{
188 HR_DEBUG("%s()", __func__);
189
190 if (vol == NULL)
191 return;
192
193 hr_fpool_destroy(vol->fge);
194 hr_fini_devs(vol);
195 free(vol->in_mem_md);
196 free(vol);
197}
198
199errno_t hr_get_volume_svcs(size_t *rcnt, service_id_t **rsvcs)
200{
201 size_t i;
202 service_id_t *vol_svcs;
203
204 if (rcnt == NULL || rsvcs == NULL)
205 return EINVAL;
206
207 fibril_rwlock_read_lock(&hr_volumes_lock);
208
209 size_t vol_cnt = list_count(&hr_volumes);
210 vol_svcs = malloc(vol_cnt * sizeof(service_id_t));
211 if (vol_svcs == NULL) {
212 fibril_rwlock_read_unlock(&hr_volumes_lock);
213 return ENOMEM;
214 }
215
216 i = 0;
217 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter)
218 vol_svcs[i++] = iter->svc_id;
219
220 fibril_rwlock_read_unlock(&hr_volumes_lock);
221
222 *rcnt = vol_cnt;
223 *rsvcs = vol_svcs;
224
225 return EOK;
226}
227
228hr_volume_t *hr_get_volume(service_id_t svc_id)
229{
230 HR_DEBUG("%s()", __func__);
231
232 hr_volume_t *rvol = NULL;
233
234 fibril_rwlock_read_lock(&hr_volumes_lock);
235 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter) {
236 if (iter->svc_id == svc_id) {
237 rvol = iter;
238 break;
239 }
240 }
241 fibril_rwlock_read_unlock(&hr_volumes_lock);
242
243 return rvol;
244}
245
246errno_t hr_remove_volume(service_id_t svc_id)
247{
248 HR_DEBUG("%s()", __func__);
249
250 hr_volume_t *vol = hr_get_volume(svc_id);
251 if (vol == NULL)
252 return ENOENT;
253
254 fibril_rwlock_write_lock(&hr_volumes_lock);
255
256 int open_cnt = atomic_load_explicit(&vol->open_cnt,
257 memory_order_relaxed);
258
259 /*
260 * The atomicity of this if condition (and this whole
261 * operation) is provided by the write lock - no new
262 * bd connection can come, because we need to get the
263 * bd_srvs_t from the volume, which we get from the list.
264 * (see hr_client_conn() in hr.c)
265 */
266 if (open_cnt > 0) {
267 fibril_rwlock_write_unlock(&hr_volumes_lock);
268 return EBUSY;
269 }
270
271 list_remove(&vol->lvolumes);
272
273 fibril_rwlock_write_unlock(&hr_volumes_lock);
274
275 /* save metadata, but we don't care about states anymore */
276 vol->meta_ops->save(vol, NO_STATE_CALLBACK);
277
278 HR_NOTE("deactivating volume \"%s\"\n", vol->devname);
279
280 hr_destroy_vol_struct(vol);
281
282 errno_t rc = loc_service_unregister(hr_srv, svc_id);
283 return rc;
284}
285
286errno_t hr_init_extents_from_cfg(hr_volume_t *vol, hr_config_t *cfg)
287{
288 HR_DEBUG("%s()", __func__);
289
290 errno_t rc;
291 uint64_t blkno, smallest_blkno = ~0ULL;
292 size_t i, bsize;
293 size_t last_bsize = 0;
294
295 for (i = 0; i < cfg->dev_no; i++) {
296 service_id_t svc_id = cfg->devs[i];
297 if (svc_id == 0) {
298 rc = EINVAL;
299 goto error;
300 }
301
302 HR_DEBUG("%s(): block_init() on (%" PRIun ")\n", __func__,
303 svc_id);
304 rc = block_init(svc_id);
305 if (rc != EOK) {
306 HR_DEBUG("%s(): initing (%" PRIun ") failed, "
307 "aborting\n", __func__, svc_id);
308 goto error;
309 }
310
311 rc = block_get_nblocks(svc_id, &blkno);
312 if (rc != EOK)
313 goto error;
314
315 rc = block_get_bsize(svc_id, &bsize);
316 if (rc != EOK)
317 goto error;
318
319 if (last_bsize != 0 && bsize != last_bsize) {
320 HR_DEBUG("block sizes differ\n");
321 rc = EINVAL;
322 goto error;
323 }
324
325 vol->extents[i].svc_id = svc_id;
326 vol->extents[i].state = HR_EXT_ONLINE;
327
328 if (blkno < smallest_blkno)
329 smallest_blkno = blkno;
330 last_bsize = bsize;
331 }
332
333 vol->bsize = last_bsize;
334 vol->extent_no = cfg->dev_no;
335 vol->truncated_blkno = smallest_blkno;
336
337 for (i = 0; i < HR_MAX_HOTSPARES; i++)
338 vol->hotspares[i].state = HR_EXT_MISSING;
339
340 return EOK;
341
342error:
343 for (i = 0; i < HR_MAX_EXTENTS; i++) {
344 if (vol->extents[i].svc_id != 0)
345 block_fini(vol->extents[i].svc_id);
346 }
347
348 return rc;
349}
350
351void hr_fini_devs(hr_volume_t *vol)
352{
353 HR_DEBUG("%s()", __func__);
354
355 size_t i;
356
357 for (i = 0; i < vol->extent_no; i++) {
358 if (vol->extents[i].svc_id != 0) {
359 HR_DEBUG("hr_fini_devs(): block_fini() on "
360 "(%" PRIun ")\n", vol->extents[i].svc_id);
361 block_fini(vol->extents[i].svc_id);
362 }
363 }
364
365 for (i = 0; i < vol->hotspare_no; i++) {
366 if (vol->hotspares[i].svc_id != 0) {
367 HR_DEBUG("hr_fini_devs(): block_fini() on "
368 "(%" PRIun ")\n",
369 vol->hotspares[i].svc_id);
370 block_fini(vol->hotspares[i].svc_id);
371 }
372 }
373}
374
375errno_t hr_register_volume(hr_volume_t *vol)
376{
377 HR_DEBUG("%s()", __func__);
378
379 errno_t rc;
380 service_id_t new_id;
381 category_id_t cat_id;
382 const char *devname = vol->devname;
383
384 rc = loc_service_register(hr_srv, devname, fallback_port_id, &new_id);
385 if (rc != EOK) {
386 HR_ERROR("unable to register device \"%s\": %s\n",
387 devname, str_error(rc));
388 return rc;
389 }
390
391 rc = loc_category_get_id("raid", &cat_id, IPC_FLAG_BLOCKING);
392 if (rc != EOK) {
393 HR_ERROR("failed resolving category \"raid\": %s\n",
394 str_error(rc));
395 goto error;
396 }
397
398 rc = loc_service_add_to_cat(hr_srv, new_id, cat_id);
399 if (rc != EOK) {
400 HR_ERROR("failed adding \"%s\" to category \"raid\": %s\n",
401 devname, str_error(rc));
402 goto error;
403 }
404
405 vol->svc_id = new_id;
406 return EOK;
407error:
408 rc = loc_service_unregister(hr_srv, new_id);
409 return rc;
410}
411
412errno_t hr_check_ba_range(hr_volume_t *vol, size_t cnt, uint64_t ba)
413{
414 if (ba + cnt > vol->data_blkno)
415 return ERANGE;
416 return EOK;
417}
418
419void hr_add_data_offset(hr_volume_t *vol, uint64_t *ba)
420{
421 *ba = *ba + vol->data_offset;
422}
423
424void hr_sub_data_offset(hr_volume_t *vol, uint64_t *ba)
425{
426 *ba = *ba - vol->data_offset;
427}
428
429void hr_update_ext_state(hr_volume_t *vol, size_t ext_idx, hr_ext_state_t s)
430{
431 if (vol->level != HR_LVL_0)
432 assert(fibril_rwlock_is_locked(&vol->extents_lock));
433
434 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
435
436 assert(ext_idx < vol->extent_no);
437
438 hr_ext_state_t old = vol->extents[ext_idx].state;
439 HR_DEBUG("\"%s\": changing extent %zu state: %s -> %s\n",
440 vol->devname, ext_idx, hr_get_ext_state_str(old),
441 hr_get_ext_state_str(s));
442 vol->extents[ext_idx].state = s;
443}
444
445void hr_update_hotspare_state(hr_volume_t *vol, size_t hs_idx,
446 hr_ext_state_t s)
447{
448 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
449
450 assert(hs_idx < vol->hotspare_no);
451
452 hr_ext_state_t old = vol->hotspares[hs_idx].state;
453 HR_DEBUG("\"%s\": changing hotspare %zu state: %s -> %s\n",
454 vol->devname, hs_idx, hr_get_ext_state_str(old),
455 hr_get_ext_state_str(s));
456 vol->hotspares[hs_idx].state = s;
457}
458
459void hr_update_vol_state(hr_volume_t *vol, hr_vol_state_t new)
460{
461 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
462
463 HR_NOTE("\"%s\": volume state changed: %s -> %s\n", vol->devname,
464 hr_get_vol_state_str(vol->state), hr_get_vol_state_str(new));
465 vol->state = new;
466}
467
468void hr_update_ext_svc_id(hr_volume_t *vol, size_t ext_idx, service_id_t new)
469{
470 if (vol->level != HR_LVL_0)
471 assert(fibril_rwlock_is_write_locked(&vol->extents_lock));
472
473 assert(ext_idx < vol->extent_no);
474
475 service_id_t old = vol->extents[ext_idx].svc_id;
476 HR_DEBUG("\"%s\": changing extent no. %zu svc_id: (%" PRIun ") -> "
477 "(%" PRIun ")\n", vol->devname, ext_idx, old, new);
478 vol->extents[ext_idx].svc_id = new;
479}
480
481void hr_update_hotspare_svc_id(hr_volume_t *vol, size_t hs_idx,
482 service_id_t new)
483{
484 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
485
486 assert(hs_idx < vol->hotspare_no);
487
488 service_id_t old = vol->hotspares[hs_idx].svc_id;
489 HR_DEBUG("\"%s\": changing hotspare no. %zu svc_id: (%" PRIun ") -> "
490 "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
491 vol->hotspares[hs_idx].svc_id = new;
492}
493
494size_t hr_count_extents(hr_volume_t *vol, hr_ext_state_t state)
495{
496 if (vol->level != HR_LVL_0)
497 assert(fibril_rwlock_is_locked(&vol->extents_lock));
498 assert(fibril_rwlock_is_locked(&vol->states_lock));
499
500 size_t count = 0;
501 for (size_t i = 0; i < vol->extent_no; i++)
502 if (vol->extents[i].state == state)
503 count++;
504
505 return count;
506}
507
508hr_range_lock_t *hr_range_lock_acquire(hr_volume_t *vol, uint64_t ba,
509 uint64_t cnt)
510{
511 hr_range_lock_t *rl = hr_malloc_waitok(sizeof(hr_range_lock_t));
512
513 rl->vol = vol;
514 rl->off = ba;
515 rl->len = cnt;
516
517 rl->pending = 1;
518 rl->ignore = false;
519
520 link_initialize(&rl->link);
521 fibril_mutex_initialize(&rl->lock);
522
523 fibril_mutex_lock(&rl->lock);
524
525again:
526 HR_RL_LIST_LOCK(vol);
527 list_foreach(vol->range_lock_list, link, hr_range_lock_t, rlp) {
528 if (rlp->ignore)
529 continue;
530 if (hr_range_lock_overlap(rlp, rl)) {
531 rlp->pending++;
532
533 HR_RL_LIST_UNLOCK(vol);
534
535 fibril_mutex_lock(&rlp->lock);
536
537 HR_RL_LIST_LOCK(vol);
538
539 rlp->pending--;
540
541 /*
542 * when ignore is set, after HR_RL_LIST_UNLOCK(),
543 * noone new is going to be able to start sleeping
544 * on the ignored range lock, only already waiting
545 * IOs will come through here
546 */
547 rlp->ignore = true;
548
549 fibril_mutex_unlock(&rlp->lock);
550
551 if (rlp->pending == 0) {
552 list_remove(&rlp->link);
553 free(rlp);
554 }
555
556 HR_RL_LIST_UNLOCK(vol);
557 goto again;
558 }
559 }
560
561 list_append(&rl->link, &vol->range_lock_list);
562
563 HR_RL_LIST_UNLOCK(vol);
564 return rl;
565}
566
567void hr_range_lock_release(hr_range_lock_t *rl)
568{
569 if (rl == NULL)
570 return;
571
572 HR_RL_LIST_LOCK(rl->vol);
573
574 rl->pending--;
575
576 fibril_mutex_unlock(&rl->lock);
577
578 if (rl->pending == 0) {
579 list_remove(&rl->link);
580 free(rl);
581 }
582
583 HR_RL_LIST_UNLOCK(rl->vol);
584}
585
586static bool hr_range_lock_overlap(hr_range_lock_t *rl1, hr_range_lock_t *rl2)
587{
588 uint64_t rl1_start = rl1->off;
589 uint64_t rl1_end = rl1->off + rl1->len - 1;
590 uint64_t rl2_start = rl2->off;
591 uint64_t rl2_end = rl2->off + rl2->len - 1;
592
593 /* one ends before the other starts */
594 if (rl1_end < rl2_start || rl2_end < rl1_start)
595 return false;
596
597 return true;
598}
599
600void hr_mark_vol_state_dirty(hr_volume_t *vol)
601{
602 atomic_store(&vol->state_dirty, true);
603}
604
605static errno_t hr_add_svc_linked_to_list(list_t *list, service_id_t svc_id,
606 bool inited, void *md)
607{
608 HR_DEBUG("%s()", __func__);
609
610 errno_t rc = EOK;
611 struct dev_list_member *to_add;
612
613 if (list == NULL)
614 return EINVAL;
615
616 to_add = malloc(sizeof(struct dev_list_member));
617 if (to_add == NULL) {
618 rc = ENOMEM;
619 goto error;
620 }
621
622 to_add->svc_id = svc_id;
623 to_add->inited = inited;
624
625 if (md != NULL) {
626 to_add->md = md;
627 to_add->md_present = true;
628 } else {
629 to_add->md_present = false;
630 }
631
632 list_append(&to_add->link, list);
633
634error:
635 return rc;
636}
637
638static void free_dev_list_member(struct dev_list_member *p)
639{
640 HR_DEBUG("%s()", __func__);
641
642 if (p->md_present)
643 free(p->md);
644 free(p);
645}
646
647static void free_svc_id_list(list_t *list)
648{
649 HR_DEBUG("%s()", __func__);
650
651 struct dev_list_member *dev_id;
652 while (!list_empty(list)) {
653 dev_id = list_pop(list, struct dev_list_member, link);
654
655 free_dev_list_member(dev_id);
656 }
657}
658
659static errno_t hr_fill_disk_part_svcs_list(list_t *list)
660{
661 HR_DEBUG("%s()", __func__);
662
663 errno_t rc;
664 size_t disk_count;
665 service_id_t *disk_svcs = NULL;
666 vbd_t *vbd = NULL;
667
668 rc = vbd_create(&vbd);
669 if (rc != EOK)
670 goto error;
671
672 rc = vbd_get_disks(vbd, &disk_svcs, &disk_count);
673 if (rc != EOK)
674 goto error;
675
676 for (size_t i = 0; i < disk_count; i++) {
677 vbd_disk_info_t disk_info;
678 rc = vbd_disk_info(vbd, disk_svcs[i], &disk_info);
679 if (rc != EOK)
680 goto error;
681
682 if (disk_info.ltype != lt_none) {
683 size_t part_count;
684 service_id_t *part_ids = NULL;
685 rc = vbd_label_get_parts(vbd, disk_svcs[i], &part_ids,
686 &part_count);
687 if (rc != EOK)
688 goto error;
689
690 for (size_t j = 0; j < part_count; j++) {
691 vbd_part_info_t part_info;
692 rc = vbd_part_get_info(vbd, part_ids[j],
693 &part_info);
694 if (rc != EOK) {
695 free(part_ids);
696 goto error;
697 }
698
699 rc = hr_add_svc_linked_to_list(list,
700 part_info.svc_id, false, NULL);
701 if (rc != EOK) {
702 free(part_ids);
703 goto error;
704 }
705 }
706
707 free(part_ids);
708
709 /*
710 * vbd can detect some bogus label type, but
711 * no partitions. In that case we handle the
712 * svc_id as a label-less disk.
713 *
714 * This can happen when creating an exfat fs
715 * in FreeBSD for example.
716 */
717 if (part_count == 0)
718 disk_info.ltype = lt_none;
719 }
720
721 if (disk_info.ltype == lt_none) {
722 rc = hr_add_svc_linked_to_list(list, disk_svcs[i],
723 false, NULL);
724 if (rc != EOK)
725 goto error;
726 }
727 }
728
729 free(disk_svcs);
730 vbd_destroy(vbd);
731 return EOK;
732error:
733 free_svc_id_list(list);
734 if (disk_svcs != NULL)
735 free(disk_svcs);
736 vbd_destroy(vbd);
737
738 return rc;
739}
740
741static errno_t block_init_dev_list(list_t *list)
742{
743 HR_DEBUG("%s()", __func__);
744
745 list_foreach_safe(*list, cur_link, next_link) {
746 struct dev_list_member *iter;
747 iter = list_get_instance(cur_link, struct dev_list_member,
748 link);
749
750 if (iter->inited)
751 continue;
752
753 errno_t rc = block_init(iter->svc_id);
754
755 /* already used as an extent of active volume */
756 /* XXX: figure out how it is with hotspares too */
757 if (rc == EEXIST) {
758 list_remove(cur_link);
759 free_dev_list_member(iter);
760 continue;
761 }
762
763 if (rc != EOK)
764 return rc;
765
766 iter->inited = true;
767 iter->fini = true;
768 }
769
770 return EOK;
771}
772
773static void block_fini_dev_list(list_t *list)
774{
775 HR_DEBUG("%s()", __func__);
776
777 list_foreach(*list, link, struct dev_list_member, iter) {
778 if (iter->inited && iter->fini) {
779 block_fini(iter->svc_id);
780 iter->inited = false;
781 iter->fini = false;
782 }
783 }
784}
785
786static errno_t hr_util_get_matching_md_svcs_list(list_t *rlist, list_t *list,
787 service_id_t svc_id, hr_metadata_type_t type_main,
788 void *metadata_struct_main)
789{
790 HR_DEBUG("%s()", __func__);
791
792 errno_t rc = EOK;
793
794 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type_main);
795
796 list_foreach(*list, link, struct dev_list_member, iter) {
797 if (iter->svc_id == svc_id)
798 continue;
799
800 void *metadata_struct;
801 hr_metadata_type_t type;
802
803 rc = hr_find_metadata(iter->svc_id, &metadata_struct, &type);
804 if (rc == ENOFS)
805 continue;
806 if (rc != EOK)
807 goto error;
808
809 if (type != type_main) {
810 free(metadata_struct);
811 continue;
812 }
813
814 if (!meta_ops->compare_uuids(metadata_struct_main,
815 metadata_struct)) {
816 free(metadata_struct);
817 continue;
818 }
819
820 rc = hr_add_svc_linked_to_list(rlist, iter->svc_id, true,
821 metadata_struct);
822 if (rc != EOK)
823 goto error;
824 }
825
826 return EOK;
827error:
828 free_svc_id_list(rlist);
829 return rc;
830}
831
832static errno_t hr_util_assemble_from_matching_list(list_t *list,
833 hr_metadata_type_t type)
834{
835 HR_DEBUG("%s()", __func__);
836
837 errno_t rc = EOK;
838
839 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type);
840
841 link_t *memb_l = list_first(list);
842 struct dev_list_member *memb = list_get_instance(memb_l,
843 struct dev_list_member, link);
844
845 hr_level_t level = meta_ops->get_level(memb->md);
846 const char *devname = meta_ops->get_devname(memb->md);
847
848 hr_volume_t *vol;
849 rc = hr_create_vol_struct(&vol, level, devname, type);
850 if (rc != EOK)
851 return rc;
852
853 meta_ops->init_meta2vol(list, vol);
854
855 rc = vol->hr_ops.create(vol);
856 if (rc != EOK)
857 goto error;
858
859 rc = hr_register_volume(vol);
860 if (rc != EOK)
861 goto error;
862
863 fibril_rwlock_write_lock(&hr_volumes_lock);
864 list_append(&vol->lvolumes, &hr_volumes);
865 fibril_rwlock_write_unlock(&hr_volumes_lock);
866
867 HR_NOTE("assembled volume \"%s\"\n", vol->devname);
868
869 return EOK;
870error:
871 hr_destroy_vol_struct(vol);
872 return rc;
873}
874
875static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *cfg, list_t *list)
876{
877 HR_DEBUG("%s()", __func__);
878
879 errno_t rc = EOK;
880 for (size_t i = 0; i < cfg->dev_no; ++i) {
881 rc = hr_add_svc_linked_to_list(list, cfg->devs[i], false,
882 NULL);
883 if (rc != EOK)
884 goto error;
885 }
886
887 return EOK;
888error:
889 free_svc_id_list(list);
890 return rc;
891}
892
893errno_t hr_util_try_assemble(hr_config_t *cfg, size_t *rassembled_cnt)
894{
895 HR_DEBUG("%s()", __func__);
896
897 /*
898 * scan partitions or disks:
899 *
900 * When we find a metadata block with valid
901 * magic, take UUID and try to find other matching
902 * UUIDs.
903 *
904 * We ignore extents that are a part of already
905 * active volumes. (even when the counter is lower
906 * on active volumes... XXX: use timestamp as initial counter value
907 * when assembling, or writing dirty metadata?)
908 */
909
910 size_t asm_cnt = 0;
911 errno_t rc;
912 list_t dev_id_list;
913
914 list_initialize(&dev_id_list);
915
916 if (cfg == NULL)
917 rc = hr_fill_disk_part_svcs_list(&dev_id_list);
918 else
919 rc = hr_fill_svcs_list_from_cfg(cfg, &dev_id_list);
920
921 if (rc != EOK)
922 goto error;
923
924 rc = block_init_dev_list(&dev_id_list);
925 if (rc != EOK)
926 goto error;
927
928 struct dev_list_member *iter;
929 while (!list_empty(&dev_id_list)) {
930 iter = list_pop(&dev_id_list, struct dev_list_member, link);
931
932 void *metadata_struct_main;
933 hr_metadata_type_t type;
934
935 rc = hr_find_metadata(iter->svc_id, &metadata_struct_main, &type);
936 if (rc == ENOFS) {
937 block_fini(iter->svc_id);
938 free_dev_list_member(iter);
939 rc = EOK;
940 continue;
941 }
942
943 if (rc != EOK)
944 goto error;
945
946 char *svc_name = NULL;
947 rc = loc_service_get_name(iter->svc_id, &svc_name);
948 if (rc != EOK)
949 goto error;
950 HR_DEBUG("found valid metadata on %s (type = %s), matching "
951 "other extents\n",
952 svc_name, hr_get_metadata_type_str(type));
953 free(svc_name);
954
955 list_t matching_svcs_list;
956 list_initialize(&matching_svcs_list);
957
958 rc = hr_util_get_matching_md_svcs_list(&matching_svcs_list,
959 &dev_id_list, iter->svc_id, type, metadata_struct_main);
960 if (rc != EOK)
961 goto error;
962
963 /* add current iter to list as well */
964 rc = hr_add_svc_linked_to_list(&matching_svcs_list,
965 iter->svc_id, true, metadata_struct_main);
966 if (rc != EOK) {
967 free_svc_id_list(&matching_svcs_list);
968 goto error;
969 }
970
971 /* remove matching list members from dev_id_list */
972 list_foreach(matching_svcs_list, link, struct dev_list_member,
973 iter2) {
974 struct dev_list_member *to_remove;
975 list_foreach_safe(dev_id_list, cur_link, next_link) {
976 to_remove = list_get_instance(cur_link,
977 struct dev_list_member, link);
978 if (to_remove->svc_id == iter2->svc_id) {
979 list_remove(cur_link);
980 free_dev_list_member(to_remove);
981 }
982 }
983 }
984
985 rc = hr_util_assemble_from_matching_list(&matching_svcs_list,
986 type);
987 switch (rc) {
988 case EOK:
989 asm_cnt++;
990 break;
991 case ENOMEM:
992 goto error;
993 default:
994 rc = EOK;
995 }
996 block_fini_dev_list(&matching_svcs_list);
997 free_svc_id_list(&matching_svcs_list);
998 }
999
1000error:
1001 if (rassembled_cnt != NULL)
1002 *rassembled_cnt = asm_cnt;
1003
1004 block_fini_dev_list(&dev_id_list);
1005 free_svc_id_list(&dev_id_list);
1006
1007 return rc;
1008}
1009
1010errno_t hr_util_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
1011{
1012 HR_DEBUG("%s()", __func__);
1013
1014 errno_t rc = EOK;
1015
1016 fibril_mutex_lock(&vol->hotspare_lock);
1017
1018 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
1019 HR_ERROR("%s(): cannot add more hotspares "
1020 "to \"%s\"\n", __func__, vol->devname);
1021 rc = ELIMIT;
1022 goto error;
1023 }
1024
1025 for (size_t i = 0; i < vol->hotspare_no; i++) {
1026 if (vol->hotspares[i].svc_id == hotspare) {
1027 HR_ERROR("%s(): hotspare (%" PRIun ") already used in "
1028 "%s\n", __func__, hotspare, vol->devname);
1029 rc = EEXIST;
1030 goto error;
1031 }
1032 }
1033
1034 rc = block_init(hotspare);
1035 if (rc != EOK)
1036 goto error;
1037
1038 uint64_t hs_blkno;
1039 rc = block_get_nblocks(hotspare, &hs_blkno);
1040 if (rc != EOK) {
1041 block_fini(hotspare);
1042 goto error;
1043 }
1044
1045 if (hs_blkno < vol->truncated_blkno) {
1046 HR_ERROR("%s(): hotspare (%" PRIun ") doesn't have enough "
1047 "blocks\n", __func__, hotspare);
1048
1049 rc = EINVAL;
1050 block_fini(hotspare);
1051 goto error;
1052 }
1053
1054 size_t hs_idx = vol->hotspare_no;
1055
1056 vol->hotspare_no++;
1057
1058 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
1059 hr_update_hotspare_state(vol, hs_idx, HR_EXT_HOTSPARE);
1060
1061 hr_mark_vol_state_dirty(vol);
1062error:
1063 fibril_mutex_unlock(&vol->hotspare_lock);
1064 return rc;
1065}
1066
1067void hr_raid5_xor(void *dst, const void *src, size_t size)
1068{
1069 size_t i;
1070 uint64_t *d = dst;
1071 const uint64_t *s = src;
1072
1073 for (i = 0; i < size / sizeof(uint64_t); ++i)
1074 *d++ ^= *s++;
1075}
1076
1077errno_t hr_sync_extents(hr_volume_t *vol)
1078{
1079 errno_t rc = EOK;
1080
1081 fibril_rwlock_read_lock(&vol->extents_lock);
1082 for (size_t e = 0; e < vol->extent_no; e++) {
1083 fibril_rwlock_read_lock(&vol->states_lock);
1084 hr_ext_state_t s = vol->extents[e].state;
1085 fibril_rwlock_read_unlock(&vol->states_lock);
1086
1087 service_id_t svc_id = vol->extents[e].svc_id;
1088
1089 if (s == HR_EXT_ONLINE || s == HR_EXT_REBUILD) {
1090 errno_t rc = hr_sync_cache(svc_id, 0, 0);
1091 if (rc != EOK && rc != ENOTSUP)
1092 vol->hr_ops.ext_state_cb(vol, e, rc);
1093 }
1094 }
1095 fibril_rwlock_read_unlock(&vol->extents_lock);
1096
1097 vol->hr_ops.vol_state_eval(vol);
1098
1099 fibril_rwlock_read_lock(&vol->states_lock);
1100 hr_vol_state_t s = vol->state;
1101 fibril_rwlock_read_unlock(&vol->states_lock);
1102
1103 if (s == HR_VOL_FAULTY)
1104 rc = EIO;
1105
1106 return rc;
1107}
1108
1109errno_t hr_init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)
1110{
1111 errno_t rc = EOK;
1112 size_t bad = vol->extent_no;
1113
1114 if (vol->level == HR_LVL_0)
1115 return EINVAL;
1116
1117 fibril_rwlock_write_lock(&vol->extents_lock);
1118 fibril_rwlock_write_lock(&vol->states_lock);
1119 fibril_mutex_lock(&vol->hotspare_lock);
1120
1121 if (vol->state != HR_VOL_DEGRADED) {
1122 rc = EINVAL;
1123 goto error;
1124 }
1125
1126 size_t rebuild = vol->extent_no;
1127 for (size_t i = 0; i < vol->extent_no; i++) {
1128 if (vol->extents[i].state == HR_EXT_REBUILD) {
1129 rebuild = i;
1130 break;
1131 }
1132 }
1133
1134 if (rebuild < vol->extent_no) {
1135 bad = rebuild;
1136 goto init_rebuild;
1137 }
1138
1139 size_t invalid = vol->extent_no;
1140 for (size_t i = 0; i < vol->extent_no; i++) {
1141 if (vol->extents[i].state == HR_EXT_INVALID) {
1142 invalid = i;
1143 break;
1144 }
1145 }
1146
1147 if (invalid < vol->extent_no) {
1148 bad = invalid;
1149 goto init_rebuild;
1150 }
1151
1152 for (size_t i = 0; i < vol->extent_no; i++) {
1153 if (vol->extents[i].state != HR_EXT_ONLINE) {
1154 bad = i;
1155 break;
1156 }
1157 }
1158
1159 if (bad == vol->extent_no || vol->hotspare_no == 0) {
1160 rc = EINVAL;
1161 goto error;
1162 }
1163
1164 size_t hotspare_idx = vol->hotspare_no - 1;
1165
1166 hr_ext_state_t hs_state = vol->hotspares[hotspare_idx].state;
1167 if (hs_state != HR_EXT_HOTSPARE) {
1168 HR_ERROR("hr_raid1_rebuild(): invalid hotspare"
1169 "state \"%s\", aborting rebuild\n",
1170 hr_get_ext_state_str(hs_state));
1171 rc = EINVAL;
1172 goto error;
1173 }
1174
1175 rc = hr_swap_hs(vol, bad, hotspare_idx);
1176 if (rc != EOK) {
1177 HR_ERROR("hr_raid1_rebuild(): swapping "
1178 "hotspare failed, aborting rebuild\n");
1179 goto error;
1180 }
1181
1182 hr_extent_t *rebuild_ext = &vol->extents[bad];
1183
1184 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %zu "
1185 "(%" PRIun ")\n", bad, rebuild_ext->svc_id);
1186
1187init_rebuild:
1188 hr_update_ext_state(vol, bad, HR_EXT_REBUILD);
1189 hr_update_vol_state(vol, HR_VOL_REBUILD);
1190
1191 *rebuild_idx = bad;
1192error:
1193 fibril_mutex_unlock(&vol->hotspare_lock);
1194 fibril_rwlock_write_unlock(&vol->states_lock);
1195 fibril_rwlock_write_unlock(&vol->extents_lock);
1196
1197 return rc;
1198}
1199
1200static errno_t hr_swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
1201{
1202 HR_DEBUG("%s()", __func__);
1203
1204 service_id_t faulty_svc_id = vol->extents[bad].svc_id;
1205 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
1206
1207 hr_update_ext_svc_id(vol, bad, hs_svc_id);
1208 hr_update_ext_state(vol, bad, HR_EXT_HOTSPARE);
1209
1210 hr_update_hotspare_svc_id(vol, hs, 0);
1211 hr_update_hotspare_state(vol, hs, HR_EXT_MISSING);
1212
1213 vol->hotspare_no--;
1214
1215 if (faulty_svc_id != 0)
1216 block_fini(faulty_svc_id);
1217
1218 return EOK;
1219}
1220
1221/** @}
1222 */
Note: See TracBrowser for help on using the repository browser.