source: mainline/uspace/srv/bd/hr/util.c@ 78433bb

Last change on this file since 78433bb was 78433bb, checked in by Miroslav Cimerman <mc@…>, 7 weeks ago

hr: allocate in-memory metadata in format code

This will allow some formats to store in-memory
copy of metadata for each extent.

  • Property mode set to 100644
File size: 27.3 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <adt/list.h>
37#include <block.h>
38#include <errno.h>
39#include <fibril_synch.h>
40#include <hr.h>
41#include <inttypes.h>
42#include <io/log.h>
43#include <loc.h>
44#include <mem.h>
45#include <stdatomic.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <str.h>
49#include <str_error.h>
50#include <vbd.h>
51
52#include "io.h"
53#include "superblock.h"
54#include "util.h"
55#include "var.h"
56
57static bool hr_range_lock_overlap(hr_range_lock_t *, hr_range_lock_t *);
58static errno_t hr_add_svc_linked_to_list(list_t *, service_id_t, bool, void *);
59static void free_dev_list_member(struct dev_list_member *);
60static void free_svc_id_list(list_t *);
61static errno_t hr_fill_disk_part_svcs_list(list_t *);
62static errno_t block_init_dev_list(list_t *);
63static void block_fini_dev_list(list_t *);
64static errno_t hr_util_get_matching_md_svcs_list(list_t *, list_t *,
65 service_id_t, hr_metadata_type_t, void *);
66static errno_t hr_util_assemble_from_matching_list(list_t *,
67 hr_metadata_type_t);
68static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *, list_t *);
69static errno_t hr_swap_hs(hr_volume_t *, size_t, size_t);
70
71#define HR_RL_LIST_LOCK(vol) (fibril_mutex_lock(&(vol)->range_lock_list_lock))
72#define HR_RL_LIST_UNLOCK(vol) \
73 (fibril_mutex_unlock(&(vol)->range_lock_list_lock))
74
75extern loc_srv_t *hr_srv;
76extern list_t hr_volumes;
77extern fibril_rwlock_t hr_volumes_lock;
78
79/*
80 * malloc() wrapper that behaves like
81 * FreeBSD malloc(9) with M_WAITOK flag.
82 *
83 * Return value is never NULL.
84 */
85void *hr_malloc_waitok(size_t size)
86{
87 void *ret;
88 while ((ret = malloc(size)) == NULL)
89 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
90
91 return ret;
92}
93
94void *hr_calloc_waitok(size_t nmemb, size_t size)
95{
96 void *ret;
97 while ((ret = calloc(nmemb, size)) == NULL)
98 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
99
100 return ret;
101}
102
103errno_t hr_create_vol_struct(hr_volume_t **rvol, hr_level_t level,
104 const char *devname, hr_metadata_type_t metadata_type)
105{
106 HR_DEBUG("%s()", __func__);
107
108 errno_t rc;
109
110 hr_volume_t *vol = calloc(1, sizeof(hr_volume_t));
111 if (vol == NULL)
112 return ENOMEM;
113
114 str_cpy(vol->devname, HR_DEVNAME_LEN, devname);
115 vol->level = level;
116
117 vol->meta_ops = get_type_ops(metadata_type);
118
119 uint8_t meta_flags = vol->meta_ops->get_flags();
120
121 switch (level) {
122 case HR_LVL_0:
123 vol->hr_ops.create = hr_raid0_create;
124 vol->hr_ops.init = hr_raid0_init;
125 vol->hr_ops.vol_state_eval = hr_raid0_vol_state_eval;
126 vol->hr_ops.ext_state_cb = hr_raid0_ext_state_cb;
127 break;
128 case HR_LVL_1:
129 vol->hr_ops.create = hr_raid1_create;
130 vol->hr_ops.init = hr_raid1_init;
131 vol->hr_ops.vol_state_eval = hr_raid1_vol_state_eval;
132 vol->hr_ops.ext_state_cb = hr_raid1_ext_state_cb;
133 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
134 vol->hr_ops.add_hotspare = hr_raid1_add_hotspare;
135 break;
136 case HR_LVL_4:
137 case HR_LVL_5:
138 vol->hr_ops.create = hr_raid5_create;
139 vol->hr_ops.init = hr_raid5_init;
140 vol->hr_ops.vol_state_eval = hr_raid5_vol_state_eval;
141 vol->hr_ops.ext_state_cb = hr_raid5_ext_state_cb;
142 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
143 vol->hr_ops.add_hotspare = hr_raid5_add_hotspare;
144 break;
145 default:
146 HR_DEBUG("unkown level: %d, aborting\n", vol->level);
147 rc = EINVAL;
148 goto error;
149 }
150
151 if (level == HR_LVL_4 || level == HR_LVL_5)
152 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
153 else
154 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
155
156 if (vol->fge == NULL) {
157 rc = ENOMEM;
158 goto error;
159 }
160
161 vol->state = HR_VOL_NONE;
162
163 fibril_mutex_initialize(&vol->md_lock);
164
165 fibril_rwlock_initialize(&vol->extents_lock);
166 fibril_rwlock_initialize(&vol->states_lock);
167
168 fibril_mutex_initialize(&vol->hotspare_lock);
169
170 list_initialize(&vol->range_lock_list);
171 fibril_mutex_initialize(&vol->range_lock_list_lock);
172
173 atomic_init(&vol->state_dirty, false);
174 atomic_init(&vol->first_write, false);
175 atomic_init(&vol->rebuild_blk, 0);
176 atomic_init(&vol->open_cnt, 0);
177
178 *rvol = vol;
179
180 return EOK;
181error:
182 free(vol);
183 return rc;
184}
185
186void hr_destroy_vol_struct(hr_volume_t *vol)
187{
188 HR_DEBUG("%s()", __func__);
189
190 if (vol == NULL)
191 return;
192
193 hr_fpool_destroy(vol->fge);
194 hr_fini_devs(vol);
195 free(vol->in_mem_md);
196 free(vol);
197}
198
199errno_t hr_get_volume_svcs(size_t *rcnt, service_id_t **rsvcs)
200{
201 size_t i;
202 service_id_t *vol_svcs;
203
204 if (rcnt == NULL || rsvcs == NULL)
205 return EINVAL;
206
207 fibril_rwlock_read_lock(&hr_volumes_lock);
208
209 size_t vol_cnt = list_count(&hr_volumes);
210 vol_svcs = malloc(vol_cnt * sizeof(service_id_t));
211 if (vol_svcs == NULL) {
212 fibril_rwlock_read_unlock(&hr_volumes_lock);
213 return ENOMEM;
214 }
215
216 i = 0;
217 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter)
218 vol_svcs[i++] = iter->svc_id;
219
220 fibril_rwlock_read_unlock(&hr_volumes_lock);
221
222 *rcnt = vol_cnt;
223 *rsvcs = vol_svcs;
224
225 return EOK;
226}
227
228hr_volume_t *hr_get_volume(service_id_t svc_id)
229{
230 HR_DEBUG("%s()", __func__);
231
232 hr_volume_t *rvol = NULL;
233
234 fibril_rwlock_read_lock(&hr_volumes_lock);
235 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter) {
236 if (iter->svc_id == svc_id) {
237 rvol = iter;
238 break;
239 }
240 }
241 fibril_rwlock_read_unlock(&hr_volumes_lock);
242
243 return rvol;
244}
245
246errno_t hr_remove_volume(service_id_t svc_id)
247{
248 HR_DEBUG("%s()", __func__);
249
250 hr_volume_t *vol = hr_get_volume(svc_id);
251 if (vol == NULL)
252 return ENOENT;
253
254 fibril_rwlock_write_lock(&hr_volumes_lock);
255
256 int open_cnt = atomic_load_explicit(&vol->open_cnt,
257 memory_order_relaxed);
258
259 /*
260 * The atomicity of this if condition (and this whole
261 * operation) is provided by the write lock - no new
262 * bd connection can come, because we need to get the
263 * bd_srvs_t from the volume, which we get from the list.
264 * (see hr_client_conn() in hr.c)
265 */
266 if (open_cnt > 0) {
267 fibril_rwlock_write_unlock(&hr_volumes_lock);
268 return EBUSY;
269 }
270
271 list_remove(&vol->lvolumes);
272
273 fibril_rwlock_write_unlock(&hr_volumes_lock);
274
275 /* save metadata, but we don't care about states anymore */
276 vol->meta_ops->save(vol, NO_STATE_CALLBACK);
277
278 HR_NOTE("deactivating volume \"%s\"\n", vol->devname);
279
280 hr_destroy_vol_struct(vol);
281
282 errno_t rc = loc_service_unregister(hr_srv, svc_id);
283 return rc;
284}
285
286errno_t hr_init_extents_from_cfg(hr_volume_t *vol, hr_config_t *cfg)
287{
288 HR_DEBUG("%s()", __func__);
289
290 errno_t rc;
291 uint64_t blkno, smallest_blkno = ~0ULL;
292 size_t i, bsize;
293 size_t last_bsize = 0;
294
295 for (i = 0; i < cfg->dev_no; i++) {
296 service_id_t svc_id = cfg->devs[i];
297 if (svc_id == 0) {
298 rc = EINVAL;
299 goto error;
300 }
301
302 HR_DEBUG("%s(): block_init() on (%" PRIun ")\n", __func__,
303 svc_id);
304 rc = block_init(svc_id);
305 if (rc != EOK) {
306 HR_DEBUG("%s(): initing (%" PRIun ") failed, "
307 "aborting\n", __func__, svc_id);
308 goto error;
309 }
310
311 rc = block_get_nblocks(svc_id, &blkno);
312 if (rc != EOK)
313 goto error;
314
315 rc = block_get_bsize(svc_id, &bsize);
316 if (rc != EOK)
317 goto error;
318
319 if (last_bsize != 0 && bsize != last_bsize) {
320 HR_DEBUG("block sizes differ\n");
321 rc = EINVAL;
322 goto error;
323 }
324
325 vol->extents[i].svc_id = svc_id;
326 vol->extents[i].state = HR_EXT_ONLINE;
327
328 if (blkno < smallest_blkno)
329 smallest_blkno = blkno;
330 last_bsize = bsize;
331 }
332
333 vol->bsize = last_bsize;
334 vol->extent_no = cfg->dev_no;
335 vol->truncated_blkno = smallest_blkno;
336
337 for (i = 0; i < HR_MAX_HOTSPARES; i++)
338 vol->hotspares[i].state = HR_EXT_MISSING;
339
340 return EOK;
341
342error:
343 for (i = 0; i < HR_MAX_EXTENTS; i++) {
344 if (vol->extents[i].svc_id != 0)
345 block_fini(vol->extents[i].svc_id);
346 }
347
348 return rc;
349}
350
351void hr_fini_devs(hr_volume_t *vol)
352{
353 HR_DEBUG("%s()", __func__);
354
355 size_t i;
356
357 for (i = 0; i < vol->extent_no; i++) {
358 if (vol->extents[i].svc_id != 0) {
359 HR_DEBUG("hr_fini_devs(): block_fini() on "
360 "(%" PRIun ")\n", vol->extents[i].svc_id);
361 block_fini(vol->extents[i].svc_id);
362 }
363 }
364
365 for (i = 0; i < vol->hotspare_no; i++) {
366 if (vol->hotspares[i].svc_id != 0) {
367 HR_DEBUG("hr_fini_devs(): block_fini() on "
368 "(%" PRIun ")\n",
369 vol->hotspares[i].svc_id);
370 block_fini(vol->hotspares[i].svc_id);
371 }
372 }
373}
374
375errno_t hr_register_volume(hr_volume_t *vol)
376{
377 HR_DEBUG("%s()", __func__);
378
379 errno_t rc;
380 service_id_t new_id;
381 category_id_t cat_id;
382 const char *devname = vol->devname;
383
384 rc = loc_service_register(hr_srv, devname, fallback_port_id, &new_id);
385 if (rc != EOK) {
386 HR_ERROR("unable to register device \"%s\": %s\n",
387 devname, str_error(rc));
388 return rc;
389 }
390
391 rc = loc_category_get_id("raid", &cat_id, IPC_FLAG_BLOCKING);
392 if (rc != EOK) {
393 HR_ERROR("failed resolving category \"raid\": %s\n",
394 str_error(rc));
395 goto error;
396 }
397
398 rc = loc_service_add_to_cat(hr_srv, new_id, cat_id);
399 if (rc != EOK) {
400 HR_ERROR("failed adding \"%s\" to category \"raid\": %s\n",
401 devname, str_error(rc));
402 goto error;
403 }
404
405 vol->svc_id = new_id;
406 return EOK;
407error:
408 rc = loc_service_unregister(hr_srv, new_id);
409 return rc;
410}
411
412errno_t hr_check_ba_range(hr_volume_t *vol, size_t cnt, uint64_t ba)
413{
414 if (ba + cnt > vol->data_blkno)
415 return ERANGE;
416 return EOK;
417}
418
419void hr_add_data_offset(hr_volume_t *vol, uint64_t *ba)
420{
421 *ba = *ba + vol->data_offset;
422}
423
424void hr_sub_data_offset(hr_volume_t *vol, uint64_t *ba)
425{
426 *ba = *ba - vol->data_offset;
427}
428
429void hr_update_ext_state(hr_volume_t *vol, size_t ext_idx, hr_ext_state_t s)
430{
431 if (vol->level != HR_LVL_0)
432 assert(fibril_rwlock_is_locked(&vol->extents_lock));
433
434 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
435
436 assert(ext_idx < vol->extent_no);
437
438 hr_ext_state_t old = vol->extents[ext_idx].state;
439 HR_DEBUG("\"%s\": changing extent %zu state: %s -> %s\n",
440 vol->devname, ext_idx, hr_get_ext_state_str(old),
441 hr_get_ext_state_str(s));
442 vol->extents[ext_idx].state = s;
443}
444
445void hr_update_hotspare_state(hr_volume_t *vol, size_t hs_idx,
446 hr_ext_state_t s)
447{
448 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
449
450 assert(hs_idx < vol->hotspare_no);
451
452 hr_ext_state_t old = vol->hotspares[hs_idx].state;
453 HR_DEBUG("\"%s\": changing hotspare %zu state: %s -> %s\n",
454 vol->devname, hs_idx, hr_get_ext_state_str(old),
455 hr_get_ext_state_str(s));
456 vol->hotspares[hs_idx].state = s;
457}
458
459void hr_update_vol_state(hr_volume_t *vol, hr_vol_state_t new)
460{
461 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
462
463 HR_NOTE("\"%s\": volume state changed: %s -> %s\n", vol->devname,
464 hr_get_vol_state_str(vol->state), hr_get_vol_state_str(new));
465 vol->state = new;
466}
467
468void hr_update_ext_svc_id(hr_volume_t *vol, size_t ext_idx, service_id_t new)
469{
470 if (vol->level != HR_LVL_0)
471 assert(fibril_rwlock_is_write_locked(&vol->extents_lock));
472
473 assert(ext_idx < vol->extent_no);
474
475 service_id_t old = vol->extents[ext_idx].svc_id;
476 HR_DEBUG("\"%s\": changing extent no. %zu svc_id: (%" PRIun ") -> "
477 "(%" PRIun ")\n", vol->devname, ext_idx, old, new);
478 vol->extents[ext_idx].svc_id = new;
479}
480
481void hr_update_hotspare_svc_id(hr_volume_t *vol, size_t hs_idx,
482 service_id_t new)
483{
484 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
485
486 assert(hs_idx < vol->hotspare_no);
487
488 service_id_t old = vol->hotspares[hs_idx].svc_id;
489 HR_DEBUG("\"%s\": changing hotspare no. %zu svc_id: (%" PRIun ") -> "
490 "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
491 vol->hotspares[hs_idx].svc_id = new;
492}
493
494size_t hr_count_extents(hr_volume_t *vol, hr_ext_state_t state)
495{
496 if (vol->level != HR_LVL_0)
497 assert(fibril_rwlock_is_locked(&vol->extents_lock));
498 assert(fibril_rwlock_is_locked(&vol->states_lock));
499
500 size_t count = 0;
501 for (size_t i = 0; i < vol->extent_no; i++)
502 if (vol->extents[i].state == state)
503 count++;
504
505 return count;
506}
507
508hr_range_lock_t *hr_range_lock_acquire(hr_volume_t *vol, uint64_t ba,
509 uint64_t cnt)
510{
511 hr_range_lock_t *rl = hr_malloc_waitok(sizeof(hr_range_lock_t));
512
513 rl->vol = vol;
514 rl->off = ba;
515 rl->len = cnt;
516
517 rl->pending = 1;
518 rl->ignore = false;
519
520 link_initialize(&rl->link);
521 fibril_mutex_initialize(&rl->lock);
522
523 fibril_mutex_lock(&rl->lock);
524
525again:
526 HR_RL_LIST_LOCK(vol);
527 list_foreach(vol->range_lock_list, link, hr_range_lock_t, rlp) {
528 if (rlp->ignore)
529 continue;
530 if (hr_range_lock_overlap(rlp, rl)) {
531 rlp->pending++;
532
533 HR_RL_LIST_UNLOCK(vol);
534
535 fibril_mutex_lock(&rlp->lock);
536
537 HR_RL_LIST_LOCK(vol);
538
539 rlp->pending--;
540
541 /*
542 * when ignore is set, after HR_RL_LIST_UNLOCK(),
543 * noone new is going to be able to start sleeping
544 * on the ignored range lock, only already waiting
545 * IOs will come through here
546 */
547 rlp->ignore = true;
548
549 fibril_mutex_unlock(&rlp->lock);
550
551 if (rlp->pending == 0) {
552 list_remove(&rlp->link);
553 free(rlp);
554 }
555
556 HR_RL_LIST_UNLOCK(vol);
557 goto again;
558 }
559 }
560
561 list_append(&rl->link, &vol->range_lock_list);
562
563 HR_RL_LIST_UNLOCK(vol);
564 return rl;
565}
566
567void hr_range_lock_release(hr_range_lock_t *rl)
568{
569 if (rl == NULL)
570 return;
571
572 HR_RL_LIST_LOCK(rl->vol);
573
574 rl->pending--;
575
576 fibril_mutex_unlock(&rl->lock);
577
578 if (rl->pending == 0) {
579 list_remove(&rl->link);
580 free(rl);
581 }
582
583 HR_RL_LIST_UNLOCK(rl->vol);
584}
585
586static bool hr_range_lock_overlap(hr_range_lock_t *rl1, hr_range_lock_t *rl2)
587{
588 uint64_t rl1_start = rl1->off;
589 uint64_t rl1_end = rl1->off + rl1->len - 1;
590 uint64_t rl2_start = rl2->off;
591 uint64_t rl2_end = rl2->off + rl2->len - 1;
592
593 /* one ends before the other starts */
594 if (rl1_end < rl2_start || rl2_end < rl1_start)
595 return false;
596
597 return true;
598}
599
600void hr_mark_vol_state_dirty(hr_volume_t *vol)
601{
602 atomic_store(&vol->state_dirty, true);
603}
604
605static errno_t hr_add_svc_linked_to_list(list_t *list, service_id_t svc_id,
606 bool inited, void *md)
607{
608 HR_DEBUG("%s()", __func__);
609
610 errno_t rc = EOK;
611 struct dev_list_member *to_add;
612
613 if (list == NULL)
614 return EINVAL;
615
616 to_add = malloc(sizeof(struct dev_list_member));
617 if (to_add == NULL) {
618 rc = ENOMEM;
619 goto error;
620 }
621
622 to_add->svc_id = svc_id;
623 to_add->inited = inited;
624
625 if (md != NULL) {
626 to_add->md = md;
627 to_add->md_present = true;
628 } else {
629 to_add->md_present = false;
630 }
631
632 list_append(&to_add->link, list);
633
634error:
635 return rc;
636}
637
638static void free_dev_list_member(struct dev_list_member *p)
639{
640 HR_DEBUG("%s()", __func__);
641
642 if (p->md_present)
643 free(p->md);
644 free(p);
645}
646
647static void free_svc_id_list(list_t *list)
648{
649 HR_DEBUG("%s()", __func__);
650
651 struct dev_list_member *dev_id;
652 while (!list_empty(list)) {
653 dev_id = list_pop(list, struct dev_list_member, link);
654
655 free_dev_list_member(dev_id);
656 }
657}
658
659static errno_t hr_fill_disk_part_svcs_list(list_t *list)
660{
661 HR_DEBUG("%s()", __func__);
662
663 errno_t rc;
664 size_t disk_count;
665 service_id_t *disk_svcs = NULL;
666 vbd_t *vbd = NULL;
667
668 rc = vbd_create(&vbd);
669 if (rc != EOK)
670 goto error;
671
672 rc = vbd_get_disks(vbd, &disk_svcs, &disk_count);
673 if (rc != EOK)
674 goto error;
675
676 for (size_t i = 0; i < disk_count; i++) {
677 vbd_disk_info_t disk_info;
678 rc = vbd_disk_info(vbd, disk_svcs[i], &disk_info);
679 if (rc != EOK)
680 goto error;
681
682 if (disk_info.ltype != lt_none) {
683 size_t part_count;
684 service_id_t *part_ids = NULL;
685 rc = vbd_label_get_parts(vbd, disk_svcs[i], &part_ids,
686 &part_count);
687 if (rc != EOK)
688 goto error;
689
690 for (size_t j = 0; j < part_count; j++) {
691 vbd_part_info_t part_info;
692 rc = vbd_part_get_info(vbd, part_ids[j],
693 &part_info);
694 if (rc != EOK) {
695 free(part_ids);
696 goto error;
697 }
698
699 rc = hr_add_svc_linked_to_list(list,
700 part_info.svc_id, false, NULL);
701 if (rc != EOK) {
702 free(part_ids);
703 goto error;
704 }
705 }
706
707 free(part_ids);
708
709 /*
710 * vbd can detect some bogus label type, but
711 * no partitions. In that case we handle the
712 * svc_id as a label-less disk.
713 *
714 * This can happen when creating an exfat fs
715 * in FreeBSD for example.
716 */
717 if (part_count == 0)
718 disk_info.ltype = lt_none;
719 }
720
721 if (disk_info.ltype == lt_none) {
722 rc = hr_add_svc_linked_to_list(list, disk_svcs[i],
723 false, NULL);
724 if (rc != EOK)
725 goto error;
726 }
727 }
728
729 free(disk_svcs);
730 vbd_destroy(vbd);
731 return EOK;
732error:
733 free_svc_id_list(list);
734 if (disk_svcs != NULL)
735 free(disk_svcs);
736 vbd_destroy(vbd);
737
738 return rc;
739}
740
741static errno_t block_init_dev_list(list_t *list)
742{
743 HR_DEBUG("%s()", __func__);
744
745 list_foreach_safe(*list, cur_link, next_link) {
746 struct dev_list_member *iter;
747 iter = list_get_instance(cur_link, struct dev_list_member,
748 link);
749
750 if (iter->inited)
751 continue;
752
753 errno_t rc = block_init(iter->svc_id);
754
755 /* already used as an extent of active volume */
756 /* XXX: figure out how it is with hotspares too */
757 if (rc == EEXIST) {
758 list_remove(cur_link);
759 free_dev_list_member(iter);
760 continue;
761 }
762
763 if (rc != EOK)
764 return rc;
765
766 iter->inited = true;
767 iter->fini = true;
768 }
769
770 return EOK;
771}
772
773static void block_fini_dev_list(list_t *list)
774{
775 HR_DEBUG("%s()", __func__);
776
777 list_foreach(*list, link, struct dev_list_member, iter) {
778 if (iter->inited && iter->fini) {
779 block_fini(iter->svc_id);
780 iter->inited = false;
781 iter->fini = false;
782 }
783 }
784}
785
786static errno_t hr_util_get_matching_md_svcs_list(list_t *rlist, list_t *list,
787 service_id_t svc_id, hr_metadata_type_t type_main,
788 void *metadata_struct_main)
789{
790 HR_DEBUG("%s()", __func__);
791
792 errno_t rc = EOK;
793
794 hr_superblock_ops_t *meta_ops = get_type_ops(type_main);
795
796 list_foreach(*list, link, struct dev_list_member, iter) {
797 if (iter->svc_id == svc_id)
798 continue;
799
800 void *metadata_struct;
801 hr_metadata_type_t type;
802
803 rc = find_metadata(iter->svc_id, &metadata_struct, &type);
804 if (rc == ENOFS)
805 continue;
806 if (rc != EOK)
807 goto error;
808
809 if (type != type_main) {
810 free(metadata_struct);
811 continue;
812 }
813
814 if (!meta_ops->compare_uuids(metadata_struct_main,
815 metadata_struct)) {
816 free(metadata_struct);
817 continue;
818 }
819
820 rc = hr_add_svc_linked_to_list(rlist, iter->svc_id, true,
821 metadata_struct);
822 if (rc != EOK)
823 goto error;
824 }
825
826 return EOK;
827error:
828 free_svc_id_list(rlist);
829 return rc;
830}
831
832static errno_t hr_util_assemble_from_matching_list(list_t *list,
833 hr_metadata_type_t type)
834{
835 HR_DEBUG("%s()", __func__);
836
837 errno_t rc = EOK;
838
839 hr_superblock_ops_t *meta_ops = get_type_ops(type);
840
841 link_t *memb_l = list_first(list);
842 struct dev_list_member *memb = list_get_instance(memb_l,
843 struct dev_list_member, link);
844
845 hr_level_t level = meta_ops->get_level(memb->md);
846 const char *devname = meta_ops->get_devname(memb->md);
847
848 hr_volume_t *vol;
849 rc = hr_create_vol_struct(&vol, level, devname, type);
850 if (rc != EOK)
851 return rc;
852
853 meta_ops->init_meta2vol(list, vol);
854
855 rc = vol->hr_ops.create(vol);
856 if (rc != EOK)
857 goto error;
858
859 rc = hr_register_volume(vol);
860 if (rc != EOK)
861 goto error;
862
863 fibril_rwlock_write_lock(&hr_volumes_lock);
864 list_append(&vol->lvolumes, &hr_volumes);
865 fibril_rwlock_write_unlock(&hr_volumes_lock);
866
867 HR_NOTE("assembled volume \"%s\"\n", vol->devname);
868
869 return EOK;
870error:
871 hr_destroy_vol_struct(vol);
872 return rc;
873}
874
875static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *cfg, list_t *list)
876{
877 HR_DEBUG("%s()", __func__);
878
879 errno_t rc = EOK;
880 for (size_t i = 0; i < cfg->dev_no; ++i) {
881 rc = hr_add_svc_linked_to_list(list, cfg->devs[i], false,
882 NULL);
883 if (rc != EOK)
884 goto error;
885 }
886
887 return EOK;
888error:
889 free_svc_id_list(list);
890 return rc;
891}
892
893errno_t hr_util_try_assemble(hr_config_t *cfg, size_t *rassembled_cnt)
894{
895 HR_DEBUG("%s()", __func__);
896
897 /*
898 * scan partitions or disks:
899 *
900 * When we find a metadata block with valid
901 * magic, take UUID and try to find other matching
902 * UUIDs.
903 *
904 * We ignore extents that are a part of already
905 * active volumes. (even when the counter is lower
906 * on active volumes... XXX: use timestamp as initial counter value
907 * when assembling, or writing dirty metadata?)
908 */
909
910 size_t asm_cnt = 0;
911 errno_t rc;
912 list_t dev_id_list;
913
914 list_initialize(&dev_id_list);
915
916 if (cfg == NULL)
917 rc = hr_fill_disk_part_svcs_list(&dev_id_list);
918 else
919 rc = hr_fill_svcs_list_from_cfg(cfg, &dev_id_list);
920
921 if (rc != EOK)
922 goto error;
923
924 rc = block_init_dev_list(&dev_id_list);
925 if (rc != EOK)
926 goto error;
927
928 struct dev_list_member *iter;
929 while (!list_empty(&dev_id_list)) {
930 iter = list_pop(&dev_id_list, struct dev_list_member, link);
931
932 void *metadata_struct_main;
933 hr_metadata_type_t type;
934
935 rc = find_metadata(iter->svc_id, &metadata_struct_main, &type);
936 if (rc == ENOFS) {
937 block_fini(iter->svc_id);
938 free_dev_list_member(iter);
939 rc = EOK;
940 continue;
941 }
942
943 if (rc != EOK)
944 goto error;
945
946 char *svc_name = NULL;
947 rc = loc_service_get_name(iter->svc_id, &svc_name);
948 if (rc != EOK)
949 goto error;
950 HR_DEBUG("found valid metadata on %s (type = %s), matching "
951 "other extents\n",
952 svc_name, hr_get_metadata_type_str(type));
953 free(svc_name);
954
955 list_t matching_svcs_list;
956 list_initialize(&matching_svcs_list);
957
958 rc = hr_util_get_matching_md_svcs_list(&matching_svcs_list,
959 &dev_id_list, iter->svc_id, type, metadata_struct_main);
960 if (rc != EOK)
961 goto error;
962
963 /* add current iter to list as well */
964 rc = hr_add_svc_linked_to_list(&matching_svcs_list,
965 iter->svc_id, true, metadata_struct_main);
966 if (rc != EOK) {
967 free_svc_id_list(&matching_svcs_list);
968 goto error;
969 }
970
971 /* remove matching list members from dev_id_list */
972 list_foreach(matching_svcs_list, link, struct dev_list_member,
973 iter2) {
974 struct dev_list_member *to_remove;
975 list_foreach_safe(dev_id_list, cur_link, next_link) {
976 to_remove = list_get_instance(cur_link,
977 struct dev_list_member, link);
978 if (to_remove->svc_id == iter2->svc_id) {
979 list_remove(cur_link);
980 free_dev_list_member(to_remove);
981 }
982 }
983 }
984
985 rc = hr_util_assemble_from_matching_list(&matching_svcs_list,
986 type);
987 switch (rc) {
988 case EOK:
989 asm_cnt++;
990 break;
991 case ENOMEM:
992 goto error;
993 default:
994 rc = EOK;
995 }
996 block_fini_dev_list(&matching_svcs_list);
997 free_svc_id_list(&matching_svcs_list);
998 }
999
1000error:
1001 if (rassembled_cnt != NULL)
1002 *rassembled_cnt = asm_cnt;
1003
1004 block_fini_dev_list(&dev_id_list);
1005 free_svc_id_list(&dev_id_list);
1006
1007 return rc;
1008}
1009
1010errno_t hr_util_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
1011{
1012 HR_DEBUG("%s()", __func__);
1013
1014 errno_t rc = EOK;
1015
1016 fibril_mutex_lock(&vol->hotspare_lock);
1017
1018 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
1019 HR_ERROR("%s(): cannot add more hotspares "
1020 "to \"%s\"\n", __func__, vol->devname);
1021 rc = ELIMIT;
1022 goto error;
1023 }
1024
1025 for (size_t i = 0; i < vol->hotspare_no; i++) {
1026 if (vol->hotspares[i].svc_id == hotspare) {
1027 HR_ERROR("%s(): hotspare (%" PRIun ") already used in "
1028 "%s\n", __func__, hotspare, vol->devname);
1029 rc = EEXIST;
1030 goto error;
1031 }
1032 }
1033
1034 rc = block_init(hotspare);
1035 if (rc != EOK)
1036 goto error;
1037
1038 uint64_t hs_blkno;
1039 rc = block_get_nblocks(hotspare, &hs_blkno);
1040 if (rc != EOK) {
1041 block_fini(hotspare);
1042 goto error;
1043 }
1044
1045 if (hs_blkno < vol->truncated_blkno) {
1046 HR_ERROR("%s(): hotspare (%" PRIun ") doesn't have enough "
1047 "blocks\n", __func__, hotspare);
1048
1049 rc = EINVAL;
1050 block_fini(hotspare);
1051 goto error;
1052 }
1053
1054 size_t hs_idx = vol->hotspare_no;
1055
1056 vol->hotspare_no++;
1057
1058 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
1059 hr_update_hotspare_state(vol, hs_idx, HR_EXT_HOTSPARE);
1060
1061 hr_mark_vol_state_dirty(vol);
1062error:
1063 fibril_mutex_unlock(&vol->hotspare_lock);
1064 return rc;
1065}
1066
1067void hr_raid5_xor(void *dst, const void *src, size_t size)
1068{
1069 size_t i;
1070 uint64_t *d = dst;
1071 const uint64_t *s = src;
1072
1073 for (i = 0; i < size / sizeof(uint64_t); ++i)
1074 *d++ ^= *s++;
1075}
1076
1077errno_t hr_sync_extents(hr_volume_t *vol)
1078{
1079 errno_t rc = EOK;
1080
1081 fibril_rwlock_read_lock(&vol->extents_lock);
1082 for (size_t e = 0; e < vol->extent_no; e++) {
1083 fibril_rwlock_read_lock(&vol->states_lock);
1084 hr_ext_state_t s = vol->extents[e].state;
1085 fibril_rwlock_read_unlock(&vol->states_lock);
1086
1087 service_id_t svc_id = vol->extents[e].svc_id;
1088
1089 if (s == HR_EXT_ONLINE || s == HR_EXT_REBUILD) {
1090 errno_t rc = hr_sync_cache(svc_id, 0, 0);
1091 if (rc != EOK && rc != ENOTSUP)
1092 vol->hr_ops.ext_state_cb(vol, e, rc);
1093 }
1094 }
1095 fibril_rwlock_read_unlock(&vol->extents_lock);
1096
1097 vol->hr_ops.vol_state_eval(vol);
1098
1099 fibril_rwlock_read_lock(&vol->states_lock);
1100 hr_vol_state_t s = vol->state;
1101 fibril_rwlock_read_unlock(&vol->states_lock);
1102
1103 if (s == HR_VOL_FAULTY)
1104 rc = EIO;
1105
1106 return rc;
1107}
1108
1109errno_t hr_init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)
1110{
1111 errno_t rc = EOK;
1112 size_t bad = vol->extent_no;
1113
1114 if (vol->level == HR_LVL_0)
1115 return EINVAL;
1116
1117 fibril_rwlock_write_lock(&vol->extents_lock);
1118 fibril_rwlock_write_lock(&vol->states_lock);
1119 fibril_mutex_lock(&vol->hotspare_lock);
1120
1121 if (vol->state != HR_VOL_DEGRADED) {
1122 rc = EINVAL;
1123 goto error;
1124 }
1125
1126 size_t rebuild = vol->extent_no;
1127 for (size_t i = 0; i < vol->extent_no; i++) {
1128 if (vol->extents[i].state == HR_EXT_REBUILD) {
1129 rebuild = i;
1130 break;
1131 }
1132 }
1133
1134 if (rebuild < vol->extent_no) {
1135 bad = rebuild;
1136 goto init_rebuild;
1137 }
1138
1139 size_t invalid = vol->extent_no;
1140 for (size_t i = 0; i < vol->extent_no; i++) {
1141 if (vol->extents[i].state == HR_EXT_INVALID) {
1142 invalid = i;
1143 break;
1144 }
1145 }
1146
1147 if (invalid < vol->extent_no) {
1148 bad = invalid;
1149 goto init_rebuild;
1150 }
1151
1152 for (size_t i = 0; i < vol->extent_no; i++) {
1153 if (vol->extents[i].state != HR_EXT_ONLINE) {
1154 bad = i;
1155 break;
1156 }
1157 }
1158
1159 if (bad == vol->extent_no || vol->hotspare_no == 0) {
1160 rc = EINVAL;
1161 goto error;
1162 }
1163
1164 size_t hotspare_idx = vol->hotspare_no - 1;
1165
1166 hr_ext_state_t hs_state = vol->hotspares[hotspare_idx].state;
1167 if (hs_state != HR_EXT_HOTSPARE) {
1168 HR_ERROR("hr_raid1_rebuild(): invalid hotspare"
1169 "state \"%s\", aborting rebuild\n",
1170 hr_get_ext_state_str(hs_state));
1171 rc = EINVAL;
1172 goto error;
1173 }
1174
1175 rc = hr_swap_hs(vol, bad, hotspare_idx);
1176 if (rc != EOK) {
1177 HR_ERROR("hr_raid1_rebuild(): swapping "
1178 "hotspare failed, aborting rebuild\n");
1179 goto error;
1180 }
1181
1182 hr_extent_t *rebuild_ext = &vol->extents[bad];
1183
1184 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %zu "
1185 "(%" PRIun ")\n", bad, rebuild_ext->svc_id);
1186
1187init_rebuild:
1188 hr_update_ext_state(vol, bad, HR_EXT_REBUILD);
1189 hr_update_vol_state(vol, HR_VOL_REBUILD);
1190
1191 *rebuild_idx = bad;
1192error:
1193 fibril_mutex_unlock(&vol->hotspare_lock);
1194 fibril_rwlock_write_unlock(&vol->states_lock);
1195 fibril_rwlock_write_unlock(&vol->extents_lock);
1196
1197 return rc;
1198}
1199
1200static errno_t hr_swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
1201{
1202 HR_DEBUG("%s()", __func__);
1203
1204 service_id_t faulty_svc_id = vol->extents[bad].svc_id;
1205 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
1206
1207 hr_update_ext_svc_id(vol, bad, hs_svc_id);
1208 hr_update_ext_state(vol, bad, HR_EXT_HOTSPARE);
1209
1210 hr_update_hotspare_svc_id(vol, hs, 0);
1211 hr_update_hotspare_state(vol, hs, HR_EXT_MISSING);
1212
1213 vol->hotspare_no--;
1214
1215 if (faulty_svc_id != 0)
1216 block_fini(faulty_svc_id);
1217
1218 return EOK;
1219}
1220
1221/** @}
1222 */
Note: See TracBrowser for help on using the repository browser.