source: mainline/uspace/srv/bd/hr/util.c

Last change on this file was 73a2780, checked in by Miroslav Cimerman <mc@…>, 11 days ago

hr: remove old comments

  • Property mode set to 100644
File size: 28.2 KB
Line 
1/*
2 * Copyright (c) 2025 Miroslav Cimerman
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
36#include <adt/list.h>
37#include <block.h>
38#include <errno.h>
39#include <fibril_synch.h>
40#include <hr.h>
41#include <inttypes.h>
42#include <io/log.h>
43#include <loc.h>
44#include <mem.h>
45#include <stdatomic.h>
46#include <stdlib.h>
47#include <stdio.h>
48#include <str.h>
49#include <str_error.h>
50#include <vbd.h>
51
52#include "io.h"
53#include "superblock.h"
54#include "util.h"
55#include "var.h"
56
57static bool hr_range_lock_overlap(hr_range_lock_t *, hr_range_lock_t *);
58static errno_t hr_add_svc_linked_to_list(list_t *, service_id_t, bool, void *);
59static void free_dev_list_member(struct dev_list_member *);
60static void free_svc_id_list(list_t *);
61static errno_t hr_fill_disk_part_svcs_list(list_t *);
62static errno_t block_init_dev_list(list_t *);
63static void block_fini_dev_list(list_t *);
64static errno_t hr_util_get_matching_md_svcs_list(list_t *, list_t *,
65 service_id_t, hr_metadata_type_t, void *);
66static errno_t hr_util_assemble_from_matching_list(list_t *,
67 hr_metadata_type_t, uint8_t);
68static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *, list_t *);
69static errno_t hr_swap_hs(hr_volume_t *, size_t, size_t);
70
71#define HR_RL_LIST_LOCK(vol) (fibril_mutex_lock(&(vol)->range_lock_list_lock))
72#define HR_RL_LIST_UNLOCK(vol) \
73 (fibril_mutex_unlock(&(vol)->range_lock_list_lock))
74
75extern loc_srv_t *hr_srv;
76extern list_t hr_volumes;
77extern fibril_rwlock_t hr_volumes_lock;
78
79/*
80 * malloc() wrapper that behaves like
81 * FreeBSD malloc(9) with M_WAITOK flag.
82 *
83 * Return value is never NULL.
84 */
85void *hr_malloc_waitok(size_t size)
86{
87 void *ret;
88 while ((ret = malloc(size)) == NULL)
89 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
90
91 return ret;
92}
93
94void *hr_calloc_waitok(size_t nmemb, size_t size)
95{
96 void *ret;
97 while ((ret = calloc(nmemb, size)) == NULL)
98 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
99
100 return ret;
101}
102
103errno_t hr_create_vol_struct(hr_volume_t **rvol, hr_level_t level,
104 const char *devname, hr_metadata_type_t metadata_type, uint8_t vflags)
105{
106 HR_DEBUG("%s()", __func__);
107
108 errno_t rc;
109
110 hr_volume_t *vol = calloc(1, sizeof(hr_volume_t));
111 if (vol == NULL)
112 return ENOMEM;
113
114 str_cpy(vol->devname, HR_DEVNAME_LEN, devname);
115 vol->level = level;
116
117 vol->vflags = vflags;
118
119 vol->meta_ops = hr_get_meta_type_ops(metadata_type);
120
121 switch (level) {
122 case HR_LVL_0:
123 vol->hr_ops.create = hr_raid0_create;
124 vol->hr_ops.init = hr_raid0_init;
125 vol->hr_ops.vol_state_eval = hr_raid0_vol_state_eval;
126 vol->hr_ops.ext_state_cb = hr_raid0_ext_state_cb;
127 break;
128 case HR_LVL_1:
129 vol->hr_ops.create = hr_raid1_create;
130 vol->hr_ops.init = hr_raid1_init;
131 vol->hr_ops.vol_state_eval = hr_raid1_vol_state_eval;
132 vol->hr_ops.ext_state_cb = hr_raid1_ext_state_cb;
133 break;
134 case HR_LVL_4:
135 case HR_LVL_5:
136 vol->hr_ops.create = hr_raid5_create;
137 vol->hr_ops.init = hr_raid5_init;
138 vol->hr_ops.vol_state_eval = hr_raid5_vol_state_eval;
139 vol->hr_ops.ext_state_cb = hr_raid5_ext_state_cb;
140 break;
141 default:
142 HR_DEBUG("unkown level: %d, aborting\n", vol->level);
143 rc = EINVAL;
144 goto error;
145 }
146
147 if (level == HR_LVL_4 || level == HR_LVL_5)
148 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
149 else
150 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
151
152 if (vol->fge == NULL) {
153 rc = ENOMEM;
154 goto error;
155 }
156
157 vol->state = HR_VOL_NONE;
158
159 fibril_mutex_initialize(&vol->md_lock);
160
161 fibril_rwlock_initialize(&vol->extents_lock);
162 fibril_rwlock_initialize(&vol->states_lock);
163
164 fibril_mutex_initialize(&vol->hotspare_lock);
165
166 list_initialize(&vol->range_lock_list);
167 fibril_mutex_initialize(&vol->range_lock_list_lock);
168
169 atomic_init(&vol->state_dirty, false);
170 atomic_init(&vol->first_write, false);
171 for (size_t i = 0; i < HR_MAX_EXTENTS; i++)
172 atomic_init(&vol->last_ext_pos_arr[i], 0);
173 atomic_init(&vol->last_ext_used, 0);
174 atomic_init(&vol->rebuild_blk, 0);
175 atomic_init(&vol->open_cnt, 0);
176
177 *rvol = vol;
178
179 return EOK;
180error:
181 free(vol);
182 return rc;
183}
184
185void hr_destroy_vol_struct(hr_volume_t *vol)
186{
187 HR_DEBUG("%s()", __func__);
188
189 if (vol == NULL)
190 return;
191
192 hr_fpool_destroy(vol->fge);
193 hr_fini_devs(vol);
194 free(vol->in_mem_md);
195 free(vol);
196}
197
198errno_t hr_get_volume_svcs(size_t *rcnt, service_id_t **rsvcs)
199{
200 size_t i;
201 service_id_t *vol_svcs;
202
203 if (rcnt == NULL || rsvcs == NULL)
204 return EINVAL;
205
206 fibril_rwlock_read_lock(&hr_volumes_lock);
207
208 size_t vol_cnt = list_count(&hr_volumes);
209 vol_svcs = malloc(vol_cnt * sizeof(service_id_t));
210 if (vol_svcs == NULL) {
211 fibril_rwlock_read_unlock(&hr_volumes_lock);
212 return ENOMEM;
213 }
214
215 i = 0;
216 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter)
217 vol_svcs[i++] = iter->svc_id;
218
219 fibril_rwlock_read_unlock(&hr_volumes_lock);
220
221 *rcnt = vol_cnt;
222 *rsvcs = vol_svcs;
223
224 return EOK;
225}
226
227hr_volume_t *hr_get_volume(service_id_t svc_id)
228{
229 HR_DEBUG("%s()", __func__);
230
231 hr_volume_t *rvol = NULL;
232
233 fibril_rwlock_read_lock(&hr_volumes_lock);
234 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter) {
235 if (iter->svc_id == svc_id) {
236 rvol = iter;
237 break;
238 }
239 }
240 fibril_rwlock_read_unlock(&hr_volumes_lock);
241
242 return rvol;
243}
244
245errno_t hr_remove_volume(service_id_t svc_id)
246{
247 HR_DEBUG("%s()", __func__);
248
249 hr_volume_t *vol = hr_get_volume(svc_id);
250 if (vol == NULL)
251 return ENOENT;
252
253 fibril_rwlock_write_lock(&hr_volumes_lock);
254
255 int open_cnt = atomic_load_explicit(&vol->open_cnt,
256 memory_order_relaxed);
257
258 /*
259 * The atomicity of this if condition (and this whole
260 * operation) is provided by the write lock - no new
261 * bd connection can come, because we need to get the
262 * bd_srvs_t from the volume, which we get from the list.
263 * (see hr_client_conn() in hr.c)
264 */
265 if (open_cnt > 0) {
266 fibril_rwlock_write_unlock(&hr_volumes_lock);
267 return EBUSY;
268 }
269
270 list_remove(&vol->lvolumes);
271
272 fibril_rwlock_write_unlock(&hr_volumes_lock);
273
274 /* save metadata, but we don't care about states anymore */
275 vol->meta_ops->save(vol, NO_STATE_CALLBACK);
276
277 HR_NOTE("deactivating volume \"%s\"\n", vol->devname);
278
279 hr_destroy_vol_struct(vol);
280
281 errno_t rc = loc_service_unregister(hr_srv, svc_id);
282 return rc;
283}
284
285errno_t hr_init_extents_from_cfg(hr_volume_t *vol, hr_config_t *cfg)
286{
287 HR_DEBUG("%s()", __func__);
288
289 errno_t rc;
290 uint64_t blkno, smallest_blkno = ~0ULL;
291 size_t i, bsize;
292 size_t last_bsize = 0;
293
294 for (i = 0; i < cfg->dev_no; i++) {
295 service_id_t svc_id = cfg->devs[i];
296 if (svc_id == 0) {
297 rc = EINVAL;
298 goto error;
299 }
300
301 HR_DEBUG("%s(): block_init() on (%" PRIun ")\n", __func__,
302 svc_id);
303 rc = block_init(svc_id);
304 if (rc != EOK) {
305 HR_DEBUG("%s(): initing (%" PRIun ") failed, "
306 "aborting\n", __func__, svc_id);
307 goto error;
308 }
309
310 rc = block_get_nblocks(svc_id, &blkno);
311 if (rc != EOK)
312 goto error;
313
314 rc = block_get_bsize(svc_id, &bsize);
315 if (rc != EOK)
316 goto error;
317
318 if (last_bsize != 0 && bsize != last_bsize) {
319 HR_DEBUG("block sizes differ\n");
320 rc = EINVAL;
321 goto error;
322 }
323
324 vol->extents[i].svc_id = svc_id;
325 vol->extents[i].state = HR_EXT_ONLINE;
326
327 if (blkno < smallest_blkno)
328 smallest_blkno = blkno;
329 last_bsize = bsize;
330 }
331
332 vol->bsize = last_bsize;
333 vol->extent_no = cfg->dev_no;
334 vol->truncated_blkno = smallest_blkno;
335
336 for (i = 0; i < HR_MAX_HOTSPARES; i++)
337 vol->hotspares[i].state = HR_EXT_MISSING;
338
339 return EOK;
340
341error:
342 for (i = 0; i < HR_MAX_EXTENTS; i++) {
343 if (vol->extents[i].svc_id != 0)
344 block_fini(vol->extents[i].svc_id);
345 }
346
347 return rc;
348}
349
350void hr_fini_devs(hr_volume_t *vol)
351{
352 HR_DEBUG("%s()", __func__);
353
354 size_t i;
355
356 for (i = 0; i < vol->extent_no; i++) {
357 if (vol->extents[i].svc_id != 0) {
358 HR_DEBUG("hr_fini_devs(): block_fini() on "
359 "(%" PRIun ")\n", vol->extents[i].svc_id);
360 block_fini(vol->extents[i].svc_id);
361 }
362 }
363
364 for (i = 0; i < vol->hotspare_no; i++) {
365 if (vol->hotspares[i].svc_id != 0) {
366 HR_DEBUG("hr_fini_devs(): block_fini() on "
367 "(%" PRIun ")\n",
368 vol->hotspares[i].svc_id);
369 block_fini(vol->hotspares[i].svc_id);
370 }
371 }
372}
373
374errno_t hr_register_volume(hr_volume_t *vol)
375{
376 HR_DEBUG("%s()", __func__);
377
378 errno_t rc;
379 service_id_t new_id;
380 category_id_t cat_id;
381 const char *devname = vol->devname;
382
383 rc = loc_service_register(hr_srv, devname, fallback_port_id, &new_id);
384 if (rc != EOK) {
385 HR_ERROR("unable to register device \"%s\": %s\n",
386 devname, str_error(rc));
387 return rc;
388 }
389
390 rc = loc_category_get_id("raid", &cat_id, IPC_FLAG_BLOCKING);
391 if (rc != EOK) {
392 HR_ERROR("failed resolving category \"raid\": %s\n",
393 str_error(rc));
394 goto error;
395 }
396
397 rc = loc_service_add_to_cat(hr_srv, new_id, cat_id);
398 if (rc != EOK) {
399 HR_ERROR("failed adding \"%s\" to category \"raid\": %s\n",
400 devname, str_error(rc));
401 goto error;
402 }
403
404 vol->svc_id = new_id;
405 return EOK;
406error:
407 rc = loc_service_unregister(hr_srv, new_id);
408 return rc;
409}
410
411errno_t hr_check_ba_range(hr_volume_t *vol, size_t cnt, uint64_t ba)
412{
413 if (ba + cnt > vol->data_blkno)
414 return ERANGE;
415 return EOK;
416}
417
418void hr_add_data_offset(hr_volume_t *vol, uint64_t *ba)
419{
420 *ba = *ba + vol->data_offset;
421}
422
423void hr_sub_data_offset(hr_volume_t *vol, uint64_t *ba)
424{
425 *ba = *ba - vol->data_offset;
426}
427
428void hr_update_ext_state(hr_volume_t *vol, size_t ext_idx, hr_ext_state_t s)
429{
430 if (vol->level != HR_LVL_0)
431 assert(fibril_rwlock_is_locked(&vol->extents_lock));
432
433 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
434
435 assert(ext_idx < vol->extent_no);
436
437 hr_ext_state_t old = vol->extents[ext_idx].state;
438 HR_DEBUG("\"%s\": changing extent %zu state: %s -> %s\n",
439 vol->devname, ext_idx, hr_get_ext_state_str(old),
440 hr_get_ext_state_str(s));
441 vol->extents[ext_idx].state = s;
442}
443
444void hr_update_hotspare_state(hr_volume_t *vol, size_t hs_idx,
445 hr_ext_state_t s)
446{
447 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
448
449 assert(hs_idx < vol->hotspare_no);
450
451 hr_ext_state_t old = vol->hotspares[hs_idx].state;
452 HR_DEBUG("\"%s\": changing hotspare %zu state: %s -> %s\n",
453 vol->devname, hs_idx, hr_get_ext_state_str(old),
454 hr_get_ext_state_str(s));
455 vol->hotspares[hs_idx].state = s;
456}
457
458void hr_update_vol_state(hr_volume_t *vol, hr_vol_state_t new)
459{
460 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
461
462 HR_NOTE("\"%s\": volume state changed: %s -> %s\n", vol->devname,
463 hr_get_vol_state_str(vol->state), hr_get_vol_state_str(new));
464 vol->state = new;
465}
466
467void hr_update_ext_svc_id(hr_volume_t *vol, size_t ext_idx, service_id_t new)
468{
469 if (vol->level != HR_LVL_0)
470 assert(fibril_rwlock_is_write_locked(&vol->extents_lock));
471
472 assert(ext_idx < vol->extent_no);
473
474 service_id_t old = vol->extents[ext_idx].svc_id;
475 HR_DEBUG("\"%s\": changing extent no. %zu svc_id: (%" PRIun ") -> "
476 "(%" PRIun ")\n", vol->devname, ext_idx, old, new);
477 vol->extents[ext_idx].svc_id = new;
478}
479
480void hr_update_hotspare_svc_id(hr_volume_t *vol, size_t hs_idx,
481 service_id_t new)
482{
483 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
484
485 assert(hs_idx < vol->hotspare_no);
486
487 service_id_t old = vol->hotspares[hs_idx].svc_id;
488 HR_DEBUG("\"%s\": changing hotspare no. %zu svc_id: (%" PRIun ") -> "
489 "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
490 vol->hotspares[hs_idx].svc_id = new;
491}
492
493size_t hr_count_extents(hr_volume_t *vol, hr_ext_state_t state)
494{
495 if (vol->level != HR_LVL_0)
496 assert(fibril_rwlock_is_locked(&vol->extents_lock));
497 assert(fibril_rwlock_is_locked(&vol->states_lock));
498
499 size_t count = 0;
500 for (size_t i = 0; i < vol->extent_no; i++)
501 if (vol->extents[i].state == state)
502 count++;
503
504 return count;
505}
506
507hr_range_lock_t *hr_range_lock_acquire(hr_volume_t *vol, uint64_t ba,
508 uint64_t cnt)
509{
510 hr_range_lock_t *rl = hr_malloc_waitok(sizeof(hr_range_lock_t));
511
512 rl->vol = vol;
513 rl->off = ba;
514 rl->len = cnt;
515
516 rl->pending = 1;
517 rl->ignore = false;
518
519 link_initialize(&rl->link);
520 fibril_mutex_initialize(&rl->lock);
521
522 fibril_mutex_lock(&rl->lock);
523
524again:
525 HR_RL_LIST_LOCK(vol);
526 list_foreach(vol->range_lock_list, link, hr_range_lock_t, rlp) {
527 if (rlp->ignore)
528 continue;
529 if (hr_range_lock_overlap(rlp, rl)) {
530 rlp->pending++;
531
532 HR_RL_LIST_UNLOCK(vol);
533
534 fibril_mutex_lock(&rlp->lock);
535
536 HR_RL_LIST_LOCK(vol);
537
538 rlp->pending--;
539
540 /*
541 * when ignore is set, after HR_RL_LIST_UNLOCK(),
542 * noone new is going to be able to start sleeping
543 * on the ignored range lock, only already waiting
544 * IOs will come through here
545 */
546 rlp->ignore = true;
547
548 fibril_mutex_unlock(&rlp->lock);
549
550 if (rlp->pending == 0) {
551 list_remove(&rlp->link);
552 free(rlp);
553 }
554
555 HR_RL_LIST_UNLOCK(vol);
556 goto again;
557 }
558 }
559
560 list_append(&rl->link, &vol->range_lock_list);
561
562 HR_RL_LIST_UNLOCK(vol);
563 return rl;
564}
565
566void hr_range_lock_release(hr_range_lock_t *rl)
567{
568 if (rl == NULL)
569 return;
570
571 HR_RL_LIST_LOCK(rl->vol);
572
573 rl->pending--;
574
575 fibril_mutex_unlock(&rl->lock);
576
577 if (rl->pending == 0) {
578 list_remove(&rl->link);
579 free(rl);
580 }
581
582 HR_RL_LIST_UNLOCK(rl->vol);
583}
584
585static bool hr_range_lock_overlap(hr_range_lock_t *rl1, hr_range_lock_t *rl2)
586{
587 uint64_t rl1_start = rl1->off;
588 uint64_t rl1_end = rl1->off + rl1->len - 1;
589 uint64_t rl2_start = rl2->off;
590 uint64_t rl2_end = rl2->off + rl2->len - 1;
591
592 /* one ends before the other starts */
593 if (rl1_end < rl2_start || rl2_end < rl1_start)
594 return false;
595
596 return true;
597}
598
599void hr_mark_vol_state_dirty(hr_volume_t *vol)
600{
601 atomic_store(&vol->state_dirty, true);
602}
603
604static errno_t hr_add_svc_linked_to_list(list_t *list, service_id_t svc_id,
605 bool inited, void *md)
606{
607 HR_DEBUG("%s()", __func__);
608
609 errno_t rc = EOK;
610 struct dev_list_member *to_add;
611
612 if (list == NULL)
613 return EINVAL;
614
615 to_add = malloc(sizeof(struct dev_list_member));
616 if (to_add == NULL) {
617 rc = ENOMEM;
618 goto error;
619 }
620
621 to_add->svc_id = svc_id;
622 to_add->inited = inited;
623 to_add->fini = true;
624
625 if (md != NULL) {
626 to_add->md = md;
627 to_add->md_present = true;
628 } else {
629 to_add->md_present = false;
630 }
631
632 list_append(&to_add->link, list);
633
634error:
635 return rc;
636}
637
638static void free_dev_list_member(struct dev_list_member *p)
639{
640 HR_DEBUG("%s()", __func__);
641
642 if (p->md_present)
643 free(p->md);
644 free(p);
645}
646
647static void free_svc_id_list(list_t *list)
648{
649 HR_DEBUG("%s()", __func__);
650
651 struct dev_list_member *dev_id;
652 while (!list_empty(list)) {
653 dev_id = list_pop(list, struct dev_list_member, link);
654
655 free_dev_list_member(dev_id);
656 }
657}
658
659static errno_t hr_fill_disk_part_svcs_list(list_t *list)
660{
661 HR_DEBUG("%s()", __func__);
662
663 errno_t rc;
664 size_t disk_count;
665 service_id_t *disk_svcs = NULL;
666 vbd_t *vbd = NULL;
667
668 rc = vbd_create(&vbd);
669 if (rc != EOK)
670 goto error;
671
672 rc = vbd_get_disks(vbd, &disk_svcs, &disk_count);
673 if (rc != EOK)
674 goto error;
675
676 for (size_t i = 0; i < disk_count; i++) {
677 vbd_disk_info_t disk_info;
678 rc = vbd_disk_info(vbd, disk_svcs[i], &disk_info);
679 if (rc != EOK)
680 goto error;
681
682 if (disk_info.ltype != lt_none) {
683 size_t part_count;
684 service_id_t *part_ids = NULL;
685 rc = vbd_label_get_parts(vbd, disk_svcs[i], &part_ids,
686 &part_count);
687 if (rc != EOK)
688 goto error;
689
690 for (size_t j = 0; j < part_count; j++) {
691 vbd_part_info_t part_info;
692 rc = vbd_part_get_info(vbd, part_ids[j],
693 &part_info);
694 if (rc != EOK) {
695 free(part_ids);
696 goto error;
697 }
698
699 rc = hr_add_svc_linked_to_list(list,
700 part_info.svc_id, false, NULL);
701 if (rc != EOK) {
702 free(part_ids);
703 goto error;
704 }
705 }
706
707 free(part_ids);
708
709 /*
710 * vbd can detect some bogus label type, but
711 * no partitions. In that case we handle the
712 * svc_id as a label-less disk.
713 *
714 * This can happen when creating an exfat fs
715 * in FreeBSD for example.
716 */
717 if (part_count == 0)
718 disk_info.ltype = lt_none;
719 }
720
721 if (disk_info.ltype == lt_none) {
722 rc = hr_add_svc_linked_to_list(list, disk_svcs[i],
723 false, NULL);
724 if (rc != EOK)
725 goto error;
726 }
727 }
728
729 free(disk_svcs);
730 vbd_destroy(vbd);
731 return EOK;
732error:
733 free_svc_id_list(list);
734 if (disk_svcs != NULL)
735 free(disk_svcs);
736 vbd_destroy(vbd);
737
738 return rc;
739}
740
741static errno_t block_init_dev_list(list_t *list)
742{
743 HR_DEBUG("%s()", __func__);
744
745 list_foreach_safe(*list, cur_link, next_link) {
746 struct dev_list_member *iter;
747 iter = list_get_instance(cur_link, struct dev_list_member,
748 link);
749
750 if (iter->inited)
751 continue;
752
753 errno_t rc = block_init(iter->svc_id);
754
755 if (rc == EEXIST) {
756 list_remove(cur_link);
757 free_dev_list_member(iter);
758 continue;
759 }
760
761 if (rc != EOK)
762 return rc;
763
764 iter->inited = true;
765 iter->fini = true;
766 }
767
768 return EOK;
769}
770
771static void block_fini_dev_list(list_t *list)
772{
773 HR_DEBUG("%s()", __func__);
774
775 list_foreach(*list, link, struct dev_list_member, iter) {
776 if (iter->inited && iter->fini) {
777 block_fini(iter->svc_id);
778 iter->inited = false;
779 iter->fini = false;
780 }
781 }
782}
783
784static errno_t hr_util_get_matching_md_svcs_list(list_t *rlist, list_t *list,
785 service_id_t svc_id, hr_metadata_type_t type_main,
786 void *metadata_struct_main)
787{
788 HR_DEBUG("%s()", __func__);
789
790 errno_t rc = EOK;
791
792 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type_main);
793
794 list_foreach(*list, link, struct dev_list_member, iter) {
795 if (iter->svc_id == svc_id)
796 continue;
797
798 void *metadata_struct;
799 hr_metadata_type_t type;
800
801 rc = hr_find_metadata(iter->svc_id, &metadata_struct, &type);
802 if (rc == ENOFS)
803 continue;
804 if (rc != EOK)
805 goto error;
806
807 if (type != type_main) {
808 free(metadata_struct);
809 continue;
810 }
811
812 if (!meta_ops->compare_uuids(metadata_struct_main,
813 metadata_struct)) {
814 free(metadata_struct);
815 continue;
816 }
817
818 rc = hr_add_svc_linked_to_list(rlist, iter->svc_id, true,
819 metadata_struct);
820 if (rc != EOK)
821 goto error;
822 }
823
824 return EOK;
825error:
826 free_svc_id_list(rlist);
827 return rc;
828}
829
830static errno_t hr_util_assemble_from_matching_list(list_t *list,
831 hr_metadata_type_t type, uint8_t vflags)
832{
833 HR_DEBUG("%s()", __func__);
834
835 errno_t rc = EOK;
836
837 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type);
838
839 link_t *memb_l = list_first(list);
840 struct dev_list_member *memb = list_get_instance(memb_l,
841 struct dev_list_member, link);
842
843 hr_level_t level = meta_ops->get_level(memb->md);
844 const char *devname = meta_ops->get_devname(memb->md);
845
846 hr_volume_t *vol;
847 rc = hr_create_vol_struct(&vol, level, devname, type, vflags);
848 if (rc != EOK)
849 return rc;
850
851 meta_ops->init_meta2vol(list, vol);
852 if (rc != EOK)
853 goto error;
854
855 rc = vol->hr_ops.create(vol);
856 if (rc != EOK)
857 goto error;
858
859 for (size_t e = 0; e < vol->extent_no; e++) {
860 if (vol->extents[e].svc_id == 0)
861 continue;
862 list_foreach(*list, link, struct dev_list_member, iter) {
863 if (iter->svc_id == vol->extents[e].svc_id)
864 iter->fini = false;
865 }
866 }
867
868 rc = hr_register_volume(vol);
869 if (rc != EOK)
870 goto error;
871
872 fibril_rwlock_write_lock(&hr_volumes_lock);
873 list_append(&vol->lvolumes, &hr_volumes);
874 fibril_rwlock_write_unlock(&hr_volumes_lock);
875
876 HR_NOTE("assembled volume \"%s\"\n", vol->devname);
877
878 return EOK;
879error:
880 /* let the caller fini the block svc list */
881 for (size_t e = 0; e < vol->extent_no; e++)
882 vol->extents[e].svc_id = 0;
883
884 hr_destroy_vol_struct(vol);
885
886 return rc;
887}
888
889static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *cfg, list_t *list)
890{
891 HR_DEBUG("%s()", __func__);
892
893 errno_t rc = EOK;
894 for (size_t i = 0; i < cfg->dev_no; ++i) {
895 rc = hr_add_svc_linked_to_list(list, cfg->devs[i], false,
896 NULL);
897 if (rc != EOK)
898 goto error;
899 }
900
901 return EOK;
902error:
903 free_svc_id_list(list);
904 return rc;
905}
906
907errno_t hr_util_try_assemble(hr_config_t *cfg, size_t *rassembled_cnt)
908{
909 HR_DEBUG("%s()", __func__);
910
911 /*
912 * scan partitions or disks:
913 *
914 * When we find a metadata block with valid
915 * magic, take UUID and try to find other matching
916 * UUIDs.
917 *
918 * We ignore extents that are a part of already
919 * active volumes. (even when the counter is lower
920 * on active volumes... XXX: use timestamp as initial counter value
921 * when assembling, or writing dirty metadata?)
922 */
923
924 size_t asm_cnt = 0;
925 errno_t rc;
926 list_t dev_id_list;
927 uint8_t vflags = 0;
928
929 list_initialize(&dev_id_list);
930
931 if (cfg == NULL) {
932 rc = hr_fill_disk_part_svcs_list(&dev_id_list);
933 } else {
934 rc = hr_fill_svcs_list_from_cfg(cfg, &dev_id_list);
935 vflags = cfg->vol_flags;
936 }
937
938 if (rc != EOK)
939 goto error;
940
941 rc = block_init_dev_list(&dev_id_list);
942 if (rc != EOK)
943 goto error;
944
945 struct dev_list_member *iter;
946 while (!list_empty(&dev_id_list)) {
947 iter = list_pop(&dev_id_list, struct dev_list_member, link);
948
949 void *metadata_struct_main;
950 hr_metadata_type_t type;
951
952 rc = hr_find_metadata(iter->svc_id, &metadata_struct_main, &type);
953 if (rc == ENOFS) {
954 block_fini(iter->svc_id);
955 free_dev_list_member(iter);
956 rc = EOK;
957 continue;
958 }
959
960 if (rc != EOK) {
961 block_fini(iter->svc_id);
962 free_dev_list_member(iter);
963 goto error;
964 }
965
966 char *svc_name = NULL;
967 rc = loc_service_get_name(iter->svc_id, &svc_name);
968 if (rc != EOK) {
969 block_fini(iter->svc_id);
970 free_dev_list_member(iter);
971 goto error;
972 }
973 HR_DEBUG("found valid metadata on %s (type = %s), matching "
974 "other extents\n",
975 svc_name, hr_get_metadata_type_str(type));
976 free(svc_name);
977
978 list_t matching_svcs_list;
979 list_initialize(&matching_svcs_list);
980
981 rc = hr_util_get_matching_md_svcs_list(&matching_svcs_list,
982 &dev_id_list, iter->svc_id, type, metadata_struct_main);
983 if (rc != EOK) {
984 block_fini(iter->svc_id);
985 free_dev_list_member(iter);
986 goto error;
987 }
988
989 /* add current iter to list as well */
990 rc = hr_add_svc_linked_to_list(&matching_svcs_list,
991 iter->svc_id, true, metadata_struct_main);
992 if (rc != EOK) {
993 block_fini(iter->svc_id);
994 free_svc_id_list(&matching_svcs_list);
995 goto error;
996 }
997
998 free_dev_list_member(iter);
999
1000 /* remove matching list members from dev_id_list */
1001 list_foreach(matching_svcs_list, link, struct dev_list_member,
1002 iter2) {
1003 struct dev_list_member *to_remove;
1004 list_foreach_safe(dev_id_list, cur_link, next_link) {
1005 to_remove = list_get_instance(cur_link,
1006 struct dev_list_member, link);
1007 if (to_remove->svc_id == iter2->svc_id) {
1008 list_remove(cur_link);
1009 free_dev_list_member(to_remove);
1010 }
1011 }
1012 }
1013
1014 rc = hr_util_assemble_from_matching_list(&matching_svcs_list,
1015 type, vflags);
1016 switch (rc) {
1017 case EOK:
1018 asm_cnt++;
1019 break;
1020 case ENOMEM:
1021 goto error;
1022 default:
1023 rc = EOK;
1024 }
1025 block_fini_dev_list(&matching_svcs_list);
1026 free_svc_id_list(&matching_svcs_list);
1027 }
1028
1029error:
1030 if (rassembled_cnt != NULL)
1031 *rassembled_cnt = asm_cnt;
1032
1033 block_fini_dev_list(&dev_id_list);
1034 free_svc_id_list(&dev_id_list);
1035
1036 return rc;
1037}
1038
1039errno_t hr_util_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
1040{
1041 HR_DEBUG("%s()", __func__);
1042
1043 errno_t rc = EOK;
1044
1045 fibril_mutex_lock(&vol->hotspare_lock);
1046
1047 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
1048 HR_ERROR("%s(): cannot add more hotspares "
1049 "to \"%s\"\n", __func__, vol->devname);
1050 rc = ELIMIT;
1051 goto error;
1052 }
1053
1054 for (size_t i = 0; i < vol->hotspare_no; i++) {
1055 if (vol->hotspares[i].svc_id == hotspare) {
1056 HR_ERROR("%s(): hotspare (%" PRIun ") already used in "
1057 "%s\n", __func__, hotspare, vol->devname);
1058 rc = EEXIST;
1059 goto error;
1060 }
1061 }
1062
1063 rc = block_init(hotspare);
1064 if (rc != EOK)
1065 goto error;
1066
1067 uint64_t hs_blkno;
1068 rc = block_get_nblocks(hotspare, &hs_blkno);
1069 if (rc != EOK) {
1070 block_fini(hotspare);
1071 goto error;
1072 }
1073
1074 if (hs_blkno < vol->truncated_blkno) {
1075 HR_ERROR("%s(): hotspare (%" PRIun ") doesn't have enough "
1076 "blocks\n", __func__, hotspare);
1077
1078 rc = EINVAL;
1079 block_fini(hotspare);
1080 goto error;
1081 }
1082
1083 size_t hs_idx = vol->hotspare_no;
1084
1085 vol->hotspare_no++;
1086
1087 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
1088 hr_update_hotspare_state(vol, hs_idx, HR_EXT_HOTSPARE);
1089
1090 hr_mark_vol_state_dirty(vol);
1091error:
1092 fibril_mutex_unlock(&vol->hotspare_lock);
1093 return rc;
1094}
1095
1096void hr_raid5_xor(void *dst, const void *src, size_t size)
1097{
1098 size_t i;
1099 uint64_t *d = dst;
1100 const uint64_t *s = src;
1101
1102 for (i = 0; i < size / sizeof(uint64_t); ++i)
1103 *d++ ^= *s++;
1104}
1105
1106errno_t hr_sync_extents(hr_volume_t *vol)
1107{
1108 errno_t rc = EOK;
1109
1110 fibril_rwlock_read_lock(&vol->extents_lock);
1111 for (size_t e = 0; e < vol->extent_no; e++) {
1112 fibril_rwlock_read_lock(&vol->states_lock);
1113 hr_ext_state_t s = vol->extents[e].state;
1114 fibril_rwlock_read_unlock(&vol->states_lock);
1115
1116 service_id_t svc_id = vol->extents[e].svc_id;
1117
1118 if (s == HR_EXT_ONLINE || s == HR_EXT_REBUILD) {
1119 errno_t rc = hr_sync_cache(svc_id, 0, 0);
1120 if (rc != EOK && rc != ENOTSUP)
1121 vol->hr_ops.ext_state_cb(vol, e, rc);
1122 }
1123 }
1124 fibril_rwlock_read_unlock(&vol->extents_lock);
1125
1126 vol->hr_ops.vol_state_eval(vol);
1127
1128 fibril_rwlock_read_lock(&vol->states_lock);
1129 hr_vol_state_t s = vol->state;
1130 fibril_rwlock_read_unlock(&vol->states_lock);
1131
1132 if (s == HR_VOL_FAULTY)
1133 rc = EIO;
1134
1135 return rc;
1136}
1137
1138errno_t hr_init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)
1139{
1140 HR_DEBUG("%s()", __func__);
1141
1142 errno_t rc = EOK;
1143 size_t bad = vol->extent_no;
1144
1145 if (vol->level == HR_LVL_0)
1146 return EINVAL;
1147
1148 fibril_rwlock_read_lock(&vol->states_lock);
1149 if (vol->state != HR_VOL_DEGRADED) {
1150 fibril_rwlock_read_unlock(&vol->states_lock);
1151 return EINVAL;
1152 }
1153 fibril_rwlock_read_unlock(&vol->states_lock);
1154
1155 fibril_rwlock_write_lock(&vol->extents_lock);
1156 fibril_rwlock_write_lock(&vol->states_lock);
1157 fibril_mutex_lock(&vol->hotspare_lock);
1158
1159 size_t rebuild = vol->extent_no;
1160 for (size_t i = 0; i < vol->extent_no; i++) {
1161 if (vol->extents[i].state == HR_EXT_REBUILD) {
1162 rebuild = i;
1163 break;
1164 }
1165 }
1166
1167 if (rebuild < vol->extent_no) {
1168 bad = rebuild;
1169 goto init_rebuild;
1170 }
1171
1172 size_t invalid = vol->extent_no;
1173 for (size_t i = 0; i < vol->extent_no; i++) {
1174 if (vol->extents[i].state == HR_EXT_INVALID) {
1175 invalid = i;
1176 break;
1177 }
1178 }
1179
1180 if (invalid < vol->extent_no) {
1181 bad = invalid;
1182 goto init_rebuild;
1183 }
1184
1185 for (size_t i = 0; i < vol->extent_no; i++) {
1186 if (vol->extents[i].state != HR_EXT_ONLINE) {
1187 bad = i;
1188 break;
1189 }
1190 }
1191
1192 if (bad == vol->extent_no || vol->hotspare_no == 0) {
1193 rc = EINVAL;
1194 goto error;
1195 }
1196
1197 size_t hotspare_idx = vol->hotspare_no - 1;
1198
1199 hr_ext_state_t hs_state = vol->hotspares[hotspare_idx].state;
1200 if (hs_state != HR_EXT_HOTSPARE) {
1201 HR_ERROR("hr_raid1_rebuild(): invalid hotspare"
1202 "state \"%s\", aborting rebuild\n",
1203 hr_get_ext_state_str(hs_state));
1204 rc = EINVAL;
1205 goto error;
1206 }
1207
1208 rc = hr_swap_hs(vol, bad, hotspare_idx);
1209 if (rc != EOK) {
1210 HR_ERROR("hr_raid1_rebuild(): swapping "
1211 "hotspare failed, aborting rebuild\n");
1212 goto error;
1213 }
1214
1215 hr_extent_t *rebuild_ext = &vol->extents[bad];
1216
1217 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %zu "
1218 "(%" PRIun ")\n", bad, rebuild_ext->svc_id);
1219
1220init_rebuild:
1221 hr_update_ext_state(vol, bad, HR_EXT_REBUILD);
1222 hr_update_vol_state(vol, HR_VOL_REBUILD);
1223
1224 *rebuild_idx = bad;
1225error:
1226 fibril_mutex_unlock(&vol->hotspare_lock);
1227 fibril_rwlock_write_unlock(&vol->states_lock);
1228 fibril_rwlock_write_unlock(&vol->extents_lock);
1229
1230 return rc;
1231}
1232
1233static errno_t hr_swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
1234{
1235 HR_DEBUG("%s()", __func__);
1236
1237 service_id_t faulty_svc_id = vol->extents[bad].svc_id;
1238 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
1239
1240 hr_update_ext_svc_id(vol, bad, hs_svc_id);
1241 hr_update_ext_state(vol, bad, HR_EXT_HOTSPARE);
1242
1243 hr_update_hotspare_svc_id(vol, hs, 0);
1244 hr_update_hotspare_state(vol, hs, HR_EXT_MISSING);
1245
1246 vol->hotspare_no--;
1247
1248 if (faulty_svc_id != 0)
1249 block_fini(faulty_svc_id);
1250
1251 return EOK;
1252}
1253
1254uint32_t hr_closest_pow2(uint32_t n)
1255{
1256 if (n == 0)
1257 return 0;
1258
1259 n |= (n >> 1);
1260 n |= (n >> 2);
1261 n |= (n >> 4);
1262 n |= (n >> 8);
1263 n |= (n >> 16);
1264 return n - (n >> 1);
1265}
1266
1267/** @}
1268 */
Note: See TracBrowser for help on using the repository browser.