source: mainline/uspace/srv/bd/hr/util.c@ a3486f2

Last change on this file since a3486f2 was a3486f2, checked in by Miroslav Cimerman <mc@…>, 6 weeks ago

hr: parallel RAID 5

  • Property mode set to 100644
File size: 24.7 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <adt/list.h>
37#include <block.h>
38#include <errno.h>
39#include <fibril_synch.h>
40#include <hr.h>
41#include <inttypes.h>
42#include <io/log.h>
43#include <loc.h>
44#include <mem.h>
45#include <stdatomic.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <str.h>
49#include <str_error.h>
50#include <vbd.h>
51
52#include "io.h"
53#include "superblock.h"
54#include "util.h"
55#include "var.h"
56
57static hr_range_lock_t *hr_range_lock_acquire_internal(hr_range_lock_t *,
58 hr_volume_t *, uint64_t, uint64_t);
59static bool hr_range_lock_overlap(hr_range_lock_t *, hr_range_lock_t *);
60static errno_t hr_add_svc_linked_to_list(list_t *, service_id_t, bool, void *);
61static void free_dev_list_member(struct dev_list_member *);
62static void free_svc_id_list(list_t *);
63static errno_t hr_fill_disk_part_svcs_list(list_t *);
64static errno_t block_init_dev_list(list_t *);
65static void block_fini_dev_list(list_t *);
66static errno_t hr_util_get_matching_md_svcs_list(list_t *, list_t *,
67 service_id_t, hr_metadata_type_t, void *);
68static errno_t hr_util_assemble_from_matching_list(list_t *,
69 hr_metadata_type_t);
70static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *, list_t *);
71
72#define HR_RL_LIST_LOCK(vol) (fibril_mutex_lock(&(vol)->range_lock_list_lock))
73#define HR_RL_LIST_UNLOCK(vol) \
74 (fibril_mutex_unlock(&(vol)->range_lock_list_lock))
75
76extern loc_srv_t *hr_srv;
77extern list_t hr_volumes;
78extern fibril_rwlock_t hr_volumes_lock;
79
80/*
81 * malloc() wrapper that behaves like
82 * FreeBSD malloc(9) with M_WAITOK flag.
83 *
84 * Return value is never NULL.
85 */
86void *malloc_waitok(size_t size)
87{
88 void *ret;
89 while ((ret = malloc(size)) == NULL)
90 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
91
92 return ret;
93}
94
95void *calloc_waitok(size_t nmemb, size_t size)
96{
97 void *ret;
98 while ((ret = calloc(nmemb, size)) == NULL)
99 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
100
101 return ret;
102}
103
104errno_t hr_create_vol_struct(hr_volume_t **rvol, hr_level_t level,
105 const char *devname, hr_metadata_type_t metadata_type)
106{
107 HR_DEBUG("%s()", __func__);
108
109 errno_t rc;
110
111 hr_volume_t *vol = calloc(1, sizeof(hr_volume_t));
112 if (vol == NULL)
113 return ENOMEM;
114
115 str_cpy(vol->devname, HR_DEVNAME_LEN, devname);
116 vol->level = level;
117
118 vol->meta_ops = get_type_ops(metadata_type);
119
120 uint8_t meta_flags = vol->meta_ops->get_flags();
121
122 switch (level) {
123 case HR_LVL_0:
124 vol->hr_ops.create = hr_raid0_create;
125 vol->hr_ops.init = hr_raid0_init;
126 vol->hr_ops.vol_state_eval = hr_raid0_vol_state_eval;
127 vol->hr_ops.ext_state_cb = hr_raid0_ext_state_cb;
128 break;
129 case HR_LVL_1:
130 vol->hr_ops.create = hr_raid1_create;
131 vol->hr_ops.init = hr_raid1_init;
132 vol->hr_ops.vol_state_eval = hr_raid1_vol_state_eval;
133 vol->hr_ops.ext_state_cb = hr_raid1_ext_state_cb;
134 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
135 vol->hr_ops.add_hotspare = hr_raid1_add_hotspare;
136 break;
137 case HR_LVL_4:
138 case HR_LVL_5:
139 vol->hr_ops.create = hr_raid5_create;
140 vol->hr_ops.init = hr_raid5_init;
141 vol->hr_ops.vol_state_eval = hr_raid5_vol_state_eval;
142 vol->hr_ops.ext_state_cb = hr_raid5_ext_state_cb;
143 if (meta_flags & HR_METADATA_HOTSPARE_SUPPORT)
144 vol->hr_ops.add_hotspare = hr_raid5_add_hotspare;
145 break;
146 default:
147 HR_DEBUG("unkown level: %d, aborting\n", vol->level);
148 rc = EINVAL;
149 goto error;
150 }
151
152 if (level == HR_LVL_4 || level == HR_LVL_5)
153 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
154 else
155 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
156
157 if (vol->fge == NULL) {
158 rc = ENOMEM;
159 goto error;
160 }
161
162 vol->in_mem_md = vol->meta_ops->alloc_struct();
163 if (vol->in_mem_md == NULL) {
164 free(vol->fge);
165 rc = ENOMEM;
166 goto error;
167 }
168
169 vol->state = HR_VOL_NONE;
170
171 fibril_mutex_initialize(&vol->md_lock);
172
173 fibril_rwlock_initialize(&vol->extents_lock);
174 fibril_rwlock_initialize(&vol->states_lock);
175
176 fibril_mutex_initialize(&vol->hotspare_lock);
177
178 list_initialize(&vol->range_lock_list);
179 fibril_mutex_initialize(&vol->range_lock_list_lock);
180
181 atomic_init(&vol->state_dirty, false);
182 atomic_init(&vol->first_write, false);
183 atomic_init(&vol->rebuild_blk, 0);
184 atomic_init(&vol->open_cnt, 0);
185
186 *rvol = vol;
187
188 return EOK;
189error:
190 free(vol);
191 return rc;
192}
193
194void hr_destroy_vol_struct(hr_volume_t *vol)
195{
196 HR_DEBUG("%s()", __func__);
197
198 if (vol == NULL)
199 return;
200
201 hr_fpool_destroy(vol->fge);
202 hr_fini_devs(vol);
203 free(vol->in_mem_md);
204 free(vol);
205}
206
207errno_t hr_get_volume_svcs(size_t *rcnt, service_id_t **rsvcs)
208{
209 size_t i;
210 service_id_t *vol_svcs;
211
212 if (rcnt == NULL || rsvcs == NULL)
213 return EINVAL;
214
215 fibril_rwlock_read_lock(&hr_volumes_lock);
216
217 size_t vol_cnt = list_count(&hr_volumes);
218 vol_svcs = malloc(vol_cnt * sizeof(service_id_t));
219 if (vol_svcs == NULL) {
220 fibril_rwlock_read_unlock(&hr_volumes_lock);
221 return ENOMEM;
222 }
223
224 i = 0;
225 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter)
226 vol_svcs[i++] = iter->svc_id;
227
228 fibril_rwlock_read_unlock(&hr_volumes_lock);
229
230 *rcnt = vol_cnt;
231 *rsvcs = vol_svcs;
232
233 return EOK;
234}
235
236hr_volume_t *hr_get_volume(service_id_t svc_id)
237{
238 HR_DEBUG("%s()", __func__);
239
240 hr_volume_t *rvol = NULL;
241
242 fibril_rwlock_read_lock(&hr_volumes_lock);
243 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter) {
244 if (iter->svc_id == svc_id) {
245 rvol = iter;
246 break;
247 }
248 }
249 fibril_rwlock_read_unlock(&hr_volumes_lock);
250
251 return rvol;
252}
253
254errno_t hr_remove_volume(service_id_t svc_id)
255{
256 HR_DEBUG("%s()", __func__);
257
258 hr_volume_t *vol = hr_get_volume(svc_id);
259 if (vol == NULL)
260 return ENOENT;
261
262 fibril_rwlock_write_lock(&hr_volumes_lock);
263
264 int open_cnt = atomic_load_explicit(&vol->open_cnt,
265 memory_order_relaxed);
266
267 /*
268 * The atomicity of this if condition (and this whole
269 * operation) is provided by the write lock - no new
270 * bd connection can come, because we need to get the
271 * bd_srvs_t from the volume, which we get from the list.
272 * (see hr_client_conn() in hr.c)
273 */
274 if (open_cnt > 0) {
275 fibril_rwlock_write_unlock(&hr_volumes_lock);
276 return EBUSY;
277 }
278
279 list_remove(&vol->lvolumes);
280
281 fibril_rwlock_write_unlock(&hr_volumes_lock);
282
283 /* save metadata, but we don't care about states anymore */
284 (void)vol->meta_ops->save(vol, NO_STATE_CALLBACK);
285
286 HR_NOTE("deactivating volume \"%s\"\n", vol->devname);
287
288 hr_destroy_vol_struct(vol);
289
290 errno_t rc = loc_service_unregister(hr_srv, svc_id);
291 return rc;
292}
293
294errno_t hr_init_extents_from_cfg(hr_volume_t *vol, hr_config_t *cfg)
295{
296 HR_DEBUG("%s()", __func__);
297
298 errno_t rc;
299 uint64_t blkno, smallest_blkno = ~0ULL;
300 size_t i, bsize;
301 size_t last_bsize = 0;
302
303 for (i = 0; i < cfg->dev_no; i++) {
304 service_id_t svc_id = cfg->devs[i];
305 if (svc_id == 0) {
306 rc = EINVAL;
307 goto error;
308 }
309
310 HR_DEBUG("%s(): block_init() on (%" PRIun ")\n", __func__,
311 svc_id);
312 rc = block_init(svc_id);
313 if (rc != EOK) {
314 HR_DEBUG("%s(): initing (%" PRIun ") failed, "
315 "aborting\n", __func__, svc_id);
316 goto error;
317 }
318
319 rc = block_get_nblocks(svc_id, &blkno);
320 if (rc != EOK)
321 goto error;
322
323 rc = block_get_bsize(svc_id, &bsize);
324 if (rc != EOK)
325 goto error;
326
327 if (last_bsize != 0 && bsize != last_bsize) {
328 HR_DEBUG("block sizes differ\n");
329 rc = EINVAL;
330 goto error;
331 }
332
333 vol->extents[i].svc_id = svc_id;
334 vol->extents[i].state = HR_EXT_ONLINE;
335
336 if (blkno < smallest_blkno)
337 smallest_blkno = blkno;
338 last_bsize = bsize;
339 }
340
341 vol->bsize = last_bsize;
342 vol->extent_no = cfg->dev_no;
343 vol->truncated_blkno = smallest_blkno;
344
345 for (i = 0; i < HR_MAX_HOTSPARES; i++)
346 vol->hotspares[i].state = HR_EXT_MISSING;
347
348 return EOK;
349
350error:
351 for (i = 0; i < HR_MAX_EXTENTS; i++) {
352 if (vol->extents[i].svc_id != 0)
353 block_fini(vol->extents[i].svc_id);
354 }
355
356 return rc;
357}
358
359void hr_fini_devs(hr_volume_t *vol)
360{
361 HR_DEBUG("%s()", __func__);
362
363 size_t i;
364
365 for (i = 0; i < vol->extent_no; i++) {
366 if (vol->extents[i].svc_id != 0) {
367 HR_DEBUG("hr_fini_devs(): block_fini() on "
368 "(%" PRIun ")\n", vol->extents[i].svc_id);
369 block_fini(vol->extents[i].svc_id);
370 }
371 }
372
373 for (i = 0; i < vol->hotspare_no; i++) {
374 if (vol->hotspares[i].svc_id != 0) {
375 HR_DEBUG("hr_fini_devs(): block_fini() on "
376 "(%" PRIun ")\n",
377 vol->hotspares[i].svc_id);
378 block_fini(vol->hotspares[i].svc_id);
379 }
380 }
381}
382
383errno_t hr_register_volume(hr_volume_t *vol)
384{
385 HR_DEBUG("%s()", __func__);
386
387 errno_t rc;
388 service_id_t new_id;
389 category_id_t cat_id;
390 const char *devname = vol->devname;
391
392 rc = loc_service_register(hr_srv, devname, &new_id);
393 if (rc != EOK) {
394 HR_ERROR("unable to register device \"%s\": %s\n",
395 devname, str_error(rc));
396 return rc;
397 }
398
399 rc = loc_category_get_id("raid", &cat_id, IPC_FLAG_BLOCKING);
400 if (rc != EOK) {
401 HR_ERROR("failed resolving category \"raid\": %s\n",
402 str_error(rc));
403 goto error;
404 }
405
406 rc = loc_service_add_to_cat(hr_srv, new_id, cat_id);
407 if (rc != EOK) {
408 HR_ERROR("failed adding \"%s\" to category \"raid\": %s\n",
409 devname, str_error(rc));
410 goto error;
411 }
412
413 vol->svc_id = new_id;
414 return EOK;
415error:
416 rc = loc_service_unregister(hr_srv, new_id);
417 return rc;
418}
419
420errno_t hr_check_ba_range(hr_volume_t *vol, size_t cnt, uint64_t ba)
421{
422 if (ba + cnt > vol->data_blkno)
423 return ERANGE;
424 return EOK;
425}
426
427void hr_add_data_offset(hr_volume_t *vol, uint64_t *ba)
428{
429 *ba = *ba + vol->data_offset;
430}
431
432void hr_sub_data_offset(hr_volume_t *vol, uint64_t *ba)
433{
434 *ba = *ba - vol->data_offset;
435}
436
437void hr_update_ext_state(hr_volume_t *vol, size_t ext_idx, hr_ext_state_t s)
438{
439 if (vol->level != HR_LVL_0)
440 assert(fibril_rwlock_is_locked(&vol->extents_lock));
441
442 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
443
444 assert(ext_idx < vol->extent_no);
445
446 hr_ext_state_t old = vol->extents[ext_idx].state;
447 HR_NOTE("\"%s\": changing extent %zu state: %s -> %s\n",
448 vol->devname, ext_idx, hr_get_ext_state_str(old),
449 hr_get_ext_state_str(s));
450 vol->extents[ext_idx].state = s;
451}
452
453void hr_update_hotspare_state(hr_volume_t *vol, size_t hs_idx,
454 hr_ext_state_t s)
455{
456 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
457
458 assert(hs_idx < vol->hotspare_no);
459
460 hr_ext_state_t old = vol->hotspares[hs_idx].state;
461 HR_NOTE("\"%s\": changing hotspare %zu state: %s -> %s\n",
462 vol->devname, hs_idx, hr_get_ext_state_str(old),
463 hr_get_ext_state_str(s));
464 vol->hotspares[hs_idx].state = s;
465}
466
467void hr_update_vol_state(hr_volume_t *vol, hr_vol_state_t new)
468{
469 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
470
471 HR_NOTE("\"%s\": changing volume state: %s -> %s\n", vol->devname,
472 hr_get_vol_state_str(vol->state), hr_get_vol_state_str(new));
473 vol->state = new;
474}
475
476void hr_update_ext_svc_id(hr_volume_t *vol, size_t ext_idx, service_id_t new)
477{
478 if (vol->level != HR_LVL_0)
479 assert(fibril_rwlock_is_write_locked(&vol->extents_lock));
480
481 assert(ext_idx < vol->extent_no);
482
483 service_id_t old = vol->extents[ext_idx].svc_id;
484 HR_NOTE("\"%s\": changing extent no. %zu svc_id: (%" PRIun ") -> "
485 "(%" PRIun ")\n", vol->devname, ext_idx, old, new);
486 vol->extents[ext_idx].svc_id = new;
487}
488
489void hr_update_hotspare_svc_id(hr_volume_t *vol, size_t hs_idx,
490 service_id_t new)
491{
492 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
493
494 assert(hs_idx < vol->hotspare_no);
495
496 service_id_t old = vol->hotspares[hs_idx].svc_id;
497 HR_NOTE("\"%s\": changing hotspare no. %zu svc_id: (%" PRIun ") -> "
498 "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
499 vol->hotspares[hs_idx].svc_id = new;
500}
501
502size_t hr_count_extents(hr_volume_t *vol, hr_ext_state_t state)
503{
504 if (vol->level != HR_LVL_0)
505 assert(fibril_rwlock_is_locked(&vol->extents_lock));
506 assert(fibril_rwlock_is_locked(&vol->states_lock));
507
508 size_t count = 0;
509 for (size_t i = 0; i < vol->extent_no; i++)
510 if (vol->extents[i].state == state)
511 count++;
512
513 return count;
514}
515
516void hr_range_lock_acquire_noalloc(hr_range_lock_t *rl, hr_volume_t *vol,
517 uint64_t ba, uint64_t cnt)
518{
519 assert(rl != NULL);
520 (void)hr_range_lock_acquire_internal(rl, vol, ba, cnt);
521}
522
523hr_range_lock_t *hr_range_lock_acquire(hr_volume_t *vol, uint64_t ba,
524 uint64_t cnt)
525{
526 hr_range_lock_t *rl = malloc(sizeof(hr_range_lock_t));
527 if (rl == NULL)
528 return NULL;
529
530 return hr_range_lock_acquire_internal(rl, vol, ba, cnt);
531}
532
533static hr_range_lock_t *hr_range_lock_acquire_internal(hr_range_lock_t *rl,
534 hr_volume_t *vol, uint64_t ba, uint64_t cnt)
535{
536 printf("hr_range_lock_acquire_internal got: 0x%p\n", rl);
537
538 rl->vol = vol;
539 rl->off = ba;
540 rl->len = cnt;
541
542 rl->pending = 1;
543 rl->ignore = false;
544
545 link_initialize(&rl->link);
546 fibril_mutex_initialize(&rl->lock);
547
548 fibril_mutex_lock(&rl->lock);
549
550again:
551 HR_RL_LIST_LOCK(vol);
552 list_foreach(vol->range_lock_list, link, hr_range_lock_t, rlp) {
553 if (rlp->ignore)
554 continue;
555 if (hr_range_lock_overlap(rlp, rl)) {
556 rlp->pending++;
557
558 HR_RL_LIST_UNLOCK(vol);
559
560 fibril_mutex_lock(&rlp->lock);
561
562 HR_RL_LIST_LOCK(vol);
563
564 rlp->pending--;
565
566 /*
567 * when ignore is set, after HR_RL_LIST_UNLOCK(),
568 * noone new is going to be able to start sleeping
569 * on the ignored range lock, only already waiting
570 * IOs will come through here
571 */
572 rlp->ignore = true;
573
574 fibril_mutex_unlock(&rlp->lock);
575
576 if (rlp->pending == 0) {
577 list_remove(&rlp->link);
578 free(rlp);
579 }
580
581 HR_RL_LIST_UNLOCK(vol);
582 goto again;
583 }
584 }
585
586 list_append(&rl->link, &vol->range_lock_list);
587
588 HR_RL_LIST_UNLOCK(vol);
589 return rl;
590}
591
592void hr_range_lock_release(hr_range_lock_t *rl)
593{
594 if (rl == NULL)
595 return;
596
597 HR_RL_LIST_LOCK(rl->vol);
598
599 rl->pending--;
600
601 fibril_mutex_unlock(&rl->lock);
602
603 if (rl->pending == 0) {
604 list_remove(&rl->link);
605 free(rl);
606 }
607
608 HR_RL_LIST_UNLOCK(rl->vol);
609}
610
611static bool hr_range_lock_overlap(hr_range_lock_t *rl1, hr_range_lock_t *rl2)
612{
613 uint64_t rl1_start = rl1->off;
614 uint64_t rl1_end = rl1->off + rl1->len - 1;
615 uint64_t rl2_start = rl2->off;
616 uint64_t rl2_end = rl2->off + rl2->len - 1;
617
618 /* one ends before the other starts */
619 if (rl1_end < rl2_start || rl2_end < rl1_start)
620 return false;
621
622 return true;
623}
624
625void hr_mark_vol_state_dirty(hr_volume_t *vol)
626{
627 atomic_store(&vol->state_dirty, true);
628}
629
630static errno_t hr_add_svc_linked_to_list(list_t *list, service_id_t svc_id,
631 bool inited, void *md)
632{
633 HR_DEBUG("%s()", __func__);
634
635 errno_t rc = EOK;
636 struct dev_list_member *to_add;
637
638 if (list == NULL)
639 return EINVAL;
640
641 to_add = malloc(sizeof(struct dev_list_member));
642 if (to_add == NULL) {
643 rc = ENOMEM;
644 goto error;
645 }
646
647 to_add->svc_id = svc_id;
648 to_add->inited = inited;
649
650 if (md != NULL) {
651 to_add->md = md;
652 to_add->md_present = true;
653 } else {
654 to_add->md_present = false;
655 }
656
657 list_append(&to_add->link, list);
658
659error:
660 return rc;
661}
662
663static void free_dev_list_member(struct dev_list_member *p)
664{
665 HR_DEBUG("%s()", __func__);
666
667 if (p->md_present)
668 free(p->md);
669 free(p);
670}
671
672static void free_svc_id_list(list_t *list)
673{
674 HR_DEBUG("%s()", __func__);
675
676 struct dev_list_member *dev_id;
677 while (!list_empty(list)) {
678 dev_id = list_pop(list, struct dev_list_member, link);
679
680 free_dev_list_member(dev_id);
681 }
682}
683
684static errno_t hr_fill_disk_part_svcs_list(list_t *list)
685{
686 HR_DEBUG("%s()", __func__);
687
688 errno_t rc;
689 size_t disk_count;
690 service_id_t *disk_svcs = NULL;
691 vbd_t *vbd = NULL;
692
693 rc = vbd_create(&vbd);
694 if (rc != EOK)
695 goto error;
696
697 rc = vbd_get_disks(vbd, &disk_svcs, &disk_count);
698 if (rc != EOK)
699 goto error;
700
701 for (size_t i = 0; i < disk_count; i++) {
702 vbd_disk_info_t disk_info;
703 rc = vbd_disk_info(vbd, disk_svcs[i], &disk_info);
704 if (rc != EOK)
705 goto error;
706
707 if (disk_info.ltype != lt_none) {
708 size_t part_count;
709 service_id_t *part_ids = NULL;
710 rc = vbd_label_get_parts(vbd, disk_svcs[i], &part_ids,
711 &part_count);
712 if (rc != EOK)
713 goto error;
714
715 for (size_t j = 0; j < part_count; j++) {
716 vbd_part_info_t part_info;
717 rc = vbd_part_get_info(vbd, part_ids[j],
718 &part_info);
719 if (rc != EOK) {
720 free(part_ids);
721 goto error;
722 }
723
724 rc = hr_add_svc_linked_to_list(list,
725 part_info.svc_id, false, NULL);
726 if (rc != EOK) {
727 free(part_ids);
728 goto error;
729 }
730 }
731
732 free(part_ids);
733
734 /*
735 * vbd can detect some bogus label type, but
736 * no partitions. In that case we handle the
737 * svc_id as a label-less disk.
738 *
739 * This can happen when creating an exfat fs
740 * in FreeBSD for example.
741 */
742 if (part_count == 0)
743 disk_info.ltype = lt_none;
744 }
745
746 if (disk_info.ltype == lt_none) {
747 rc = hr_add_svc_linked_to_list(list, disk_svcs[i],
748 false, NULL);
749 if (rc != EOK)
750 goto error;
751 }
752 }
753
754 free(disk_svcs);
755 vbd_destroy(vbd);
756 return EOK;
757error:
758 free_svc_id_list(list);
759 if (disk_svcs != NULL)
760 free(disk_svcs);
761 vbd_destroy(vbd);
762
763 return rc;
764}
765
766static errno_t block_init_dev_list(list_t *list)
767{
768 HR_DEBUG("%s()", __func__);
769
770 list_foreach_safe(*list, cur_link, next_link) {
771 struct dev_list_member *iter;
772 iter = list_get_instance(cur_link, struct dev_list_member,
773 link);
774
775 if (iter->inited)
776 continue;
777
778 errno_t rc = block_init(iter->svc_id);
779
780 /* already used as an extent of active volume */
781 /* XXX: figure out how it is with hotspares too */
782 if (rc == EEXIST) {
783 list_remove(cur_link);
784 free_dev_list_member(iter);
785 continue;
786 }
787
788 if (rc != EOK)
789 return rc;
790
791 iter->inited = true;
792 iter->fini = true;
793 }
794
795 return EOK;
796}
797
798static void block_fini_dev_list(list_t *list)
799{
800 HR_DEBUG("%s()", __func__);
801
802 list_foreach(*list, link, struct dev_list_member, iter) {
803 if (iter->inited && iter->fini) {
804 block_fini(iter->svc_id);
805 iter->inited = false;
806 iter->fini = false;
807 }
808 }
809}
810
811static errno_t hr_util_get_matching_md_svcs_list(list_t *rlist, list_t *list,
812 service_id_t svc_id, hr_metadata_type_t type_main,
813 void *metadata_struct_main)
814{
815 HR_DEBUG("%s()", __func__);
816
817 errno_t rc = EOK;
818
819 hr_superblock_ops_t *meta_ops = get_type_ops(type_main);
820
821 list_foreach(*list, link, struct dev_list_member, iter) {
822 if (iter->svc_id == svc_id)
823 continue;
824
825 void *metadata_struct;
826 hr_metadata_type_t type;
827
828 rc = find_metadata(iter->svc_id, &metadata_struct, &type);
829 if (rc == ENOFS)
830 continue;
831 if (rc != EOK)
832 goto error;
833
834 if (type != type_main) {
835 free(metadata_struct);
836 continue;
837 }
838
839 if (!meta_ops->compare_uuids(metadata_struct_main,
840 metadata_struct)) {
841 free(metadata_struct);
842 continue;
843 }
844
845 rc = hr_add_svc_linked_to_list(rlist, iter->svc_id, true,
846 metadata_struct);
847 if (rc != EOK)
848 goto error;
849 }
850
851 return EOK;
852error:
853 free_svc_id_list(rlist);
854 return rc;
855}
856
857static errno_t hr_util_assemble_from_matching_list(list_t *list,
858 hr_metadata_type_t type)
859{
860 HR_DEBUG("%s()", __func__);
861
862 errno_t rc = EOK;
863
864 hr_superblock_ops_t *meta_ops = get_type_ops(type);
865
866 link_t *memb_l = list_first(list);
867 struct dev_list_member *memb = list_get_instance(memb_l,
868 struct dev_list_member, link);
869
870 hr_level_t level = meta_ops->get_level(memb->md);
871 const char *devname = meta_ops->get_devname(memb->md);
872
873 hr_volume_t *vol;
874 rc = hr_create_vol_struct(&vol, level, devname, type);
875 if (rc != EOK)
876 return rc;
877
878 meta_ops->init_meta2vol(list, vol);
879
880 rc = vol->hr_ops.create(vol);
881 if (rc != EOK)
882 goto error;
883
884 rc = hr_register_volume(vol);
885 if (rc != EOK)
886 goto error;
887
888 fibril_rwlock_write_lock(&hr_volumes_lock);
889 list_append(&vol->lvolumes, &hr_volumes);
890 fibril_rwlock_write_unlock(&hr_volumes_lock);
891
892 HR_NOTE("assembled volume \"%s\"\n", vol->devname);
893
894 return EOK;
895error:
896 hr_destroy_vol_struct(vol);
897 return rc;
898}
899
900static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *cfg, list_t *list)
901{
902 HR_DEBUG("%s()", __func__);
903
904 errno_t rc = EOK;
905 for (size_t i = 0; i < cfg->dev_no; ++i) {
906 rc = hr_add_svc_linked_to_list(list, cfg->devs[i], false,
907 NULL);
908 if (rc != EOK)
909 goto error;
910 }
911
912 return EOK;
913error:
914 free_svc_id_list(list);
915 return rc;
916}
917
918errno_t hr_util_try_assemble(hr_config_t *cfg, size_t *rassembled_cnt)
919{
920 HR_DEBUG("%s()", __func__);
921
922 /*
923 * scan partitions or disks:
924 *
925 * When we find a metadata block with valid
926 * magic, take UUID and try to find other matching
927 * UUIDs.
928 *
929 * We ignore extents that are a part of already
930 * active volumes. (even when the counter is lower
931 * on active volumes... XXX: use timestamp as initial counter value
932 * when assembling, or writing dirty metadata?)
933 */
934
935 size_t asm_cnt = 0;
936 errno_t rc;
937 list_t dev_id_list;
938
939 list_initialize(&dev_id_list);
940
941 if (cfg == NULL)
942 rc = hr_fill_disk_part_svcs_list(&dev_id_list);
943 else
944 rc = hr_fill_svcs_list_from_cfg(cfg, &dev_id_list);
945
946 if (rc != EOK)
947 goto error;
948
949 rc = block_init_dev_list(&dev_id_list);
950 if (rc != EOK)
951 goto error;
952
953 struct dev_list_member *iter;
954 while (!list_empty(&dev_id_list)) {
955 iter = list_pop(&dev_id_list, struct dev_list_member, link);
956
957 void *metadata_struct_main;
958 hr_metadata_type_t type;
959
960 rc = find_metadata(iter->svc_id, &metadata_struct_main, &type);
961 if (rc == ENOFS) {
962 block_fini(iter->svc_id);
963 free_dev_list_member(iter);
964 rc = EOK;
965 continue;
966 }
967
968 if (rc != EOK)
969 goto error;
970
971 char *svc_name = NULL;
972 rc = loc_service_get_name(iter->svc_id, &svc_name);
973 if (rc != EOK)
974 goto error;
975 HR_DEBUG("found valid metadata on %s (type = %s), matching "
976 "other extents\n",
977 svc_name, hr_get_metadata_type_str(type));
978 free(svc_name);
979
980 list_t matching_svcs_list;
981 list_initialize(&matching_svcs_list);
982
983 rc = hr_util_get_matching_md_svcs_list(&matching_svcs_list,
984 &dev_id_list, iter->svc_id, type, metadata_struct_main);
985 if (rc != EOK)
986 goto error;
987
988 /* add current iter to list as well */
989 rc = hr_add_svc_linked_to_list(&matching_svcs_list,
990 iter->svc_id, true, metadata_struct_main);
991 if (rc != EOK) {
992 free_svc_id_list(&matching_svcs_list);
993 goto error;
994 }
995
996 /* remove matching list members from dev_id_list */
997 list_foreach(matching_svcs_list, link, struct dev_list_member,
998 iter2) {
999 struct dev_list_member *to_remove;
1000 list_foreach_safe(dev_id_list, cur_link, next_link) {
1001 to_remove = list_get_instance(cur_link,
1002 struct dev_list_member, link);
1003 if (to_remove->svc_id == iter2->svc_id) {
1004 list_remove(cur_link);
1005 free_dev_list_member(to_remove);
1006 }
1007 }
1008 }
1009
1010 rc = hr_util_assemble_from_matching_list(&matching_svcs_list,
1011 type);
1012 switch (rc) {
1013 case EOK:
1014 asm_cnt++;
1015 break;
1016 case ENOMEM:
1017 goto error;
1018 default:
1019 rc = EOK;
1020 }
1021 block_fini_dev_list(&matching_svcs_list);
1022 free_svc_id_list(&matching_svcs_list);
1023 }
1024
1025error:
1026 if (rassembled_cnt != NULL)
1027 *rassembled_cnt = asm_cnt;
1028
1029 block_fini_dev_list(&dev_id_list);
1030 free_svc_id_list(&dev_id_list);
1031
1032 return rc;
1033}
1034
1035errno_t hr_util_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
1036{
1037 HR_DEBUG("%s()", __func__);
1038
1039 errno_t rc = EOK;
1040
1041 fibril_mutex_lock(&vol->hotspare_lock);
1042
1043 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
1044 HR_ERROR("%s(): cannot add more hotspares "
1045 "to \"%s\"\n", __func__, vol->devname);
1046 rc = ELIMIT;
1047 goto error;
1048 }
1049
1050 for (size_t i = 0; i < vol->hotspare_no; i++) {
1051 if (vol->hotspares[i].svc_id == hotspare) {
1052 HR_ERROR("%s(): hotspare (%" PRIun ") already used in "
1053 "%s\n", __func__, hotspare, vol->devname);
1054 rc = EEXIST;
1055 goto error;
1056 }
1057 }
1058
1059 rc = block_init(hotspare);
1060 if (rc != EOK)
1061 goto error;
1062
1063 uint64_t hs_blkno;
1064 rc = block_get_nblocks(hotspare, &hs_blkno);
1065 if (rc != EOK) {
1066 block_fini(hotspare);
1067 goto error;
1068 }
1069
1070 if (hs_blkno < vol->truncated_blkno) {
1071 HR_ERROR("%s(): hotspare (%" PRIun ") doesn't have enough "
1072 "blocks\n", __func__, hotspare);
1073
1074 rc = EINVAL;
1075 block_fini(hotspare);
1076 goto error;
1077 }
1078
1079 size_t hs_idx = vol->hotspare_no;
1080
1081 vol->hotspare_no++;
1082
1083 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
1084 hr_update_hotspare_state(vol, hs_idx, HR_EXT_HOTSPARE);
1085
1086 hr_mark_vol_state_dirty(vol);
1087error:
1088 fibril_mutex_unlock(&vol->hotspare_lock);
1089 return rc;
1090}
1091
1092void hr_raid5_xor(void *dst, const void *src, size_t size)
1093{
1094 size_t i;
1095 uint64_t *d = dst;
1096 const uint64_t *s = src;
1097
1098 for (i = 0; i < size / sizeof(uint64_t); ++i)
1099 *d++ ^= *s++;
1100}
1101
1102/** @}
1103 */
Note: See TracBrowser for help on using the repository browser.