source: mainline/uspace/srv/bd/hr/util.c

Last change on this file was 73a2780, checked in by Miroslav Cimerman <mc@…>, 9 days ago

hr: remove old comments

  • Property mode set to 100644
File size: 28.2 KB
RevLine 
[da5c257]1/*
[36661772]2 * Copyright (c) 2025 Miroslav Cimerman
[da5c257]3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/** @addtogroup hr
30 * @{
31 */
32/**
33 * @file
34 */
35
[38e3c0a7]36#include <adt/list.h>
[da5c257]37#include <block.h>
38#include <errno.h>
[38e3c0a7]39#include <fibril_synch.h>
[da5c257]40#include <hr.h>
[ca7fa5b]41#include <inttypes.h>
[da5c257]42#include <io/log.h>
43#include <loc.h>
[8b51009]44#include <mem.h>
[d6fe2a1]45#include <stdatomic.h>
[44ea48e]46#include <stdlib.h>
47#include <stdio.h>
[baa4929]48#include <str.h>
[da5c257]49#include <str_error.h>
[8b51009]50#include <vbd.h>
[da5c257]51
[8b51009]52#include "io.h"
53#include "superblock.h"
[da5c257]54#include "util.h"
[b0f1366]55#include "var.h"
[da5c257]56
[6f13257]57static bool hr_range_lock_overlap(hr_range_lock_t *, hr_range_lock_t *);
58static errno_t hr_add_svc_linked_to_list(list_t *, service_id_t, bool, void *);
59static void free_dev_list_member(struct dev_list_member *);
60static void free_svc_id_list(list_t *);
61static errno_t hr_fill_disk_part_svcs_list(list_t *);
62static errno_t block_init_dev_list(list_t *);
63static void block_fini_dev_list(list_t *);
64static errno_t hr_util_get_matching_md_svcs_list(list_t *, list_t *,
[b883aa8]65 service_id_t, hr_metadata_type_t, void *);
[6f13257]66static errno_t hr_util_assemble_from_matching_list(list_t *,
[95ca19d]67 hr_metadata_type_t, uint8_t);
[6f13257]68static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *, list_t *);
[09c195e8]69static errno_t hr_swap_hs(hr_volume_t *, size_t, size_t);
[6d0fc11]70
[50603405]71#define HR_RL_LIST_LOCK(vol) (fibril_mutex_lock(&(vol)->range_lock_list_lock))
[38e3c0a7]72#define HR_RL_LIST_UNLOCK(vol) \
[50603405]73 (fibril_mutex_unlock(&(vol)->range_lock_list_lock))
[38e3c0a7]74
[da5c257]75extern loc_srv_t *hr_srv;
[8b51009]76extern list_t hr_volumes;
77extern fibril_rwlock_t hr_volumes_lock;
78
[f0950d2]79/*
80 * malloc() wrapper that behaves like
81 * FreeBSD malloc(9) with M_WAITOK flag.
82 *
83 * Return value is never NULL.
84 */
[81b4c795]85void *hr_malloc_waitok(size_t size)
[f0950d2]86{
87 void *ret;
88 while ((ret = malloc(size)) == NULL)
89 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
90
91 return ret;
92}
93
[81b4c795]94void *hr_calloc_waitok(size_t nmemb, size_t size)
[f0950d2]95{
96 void *ret;
97 while ((ret = calloc(nmemb, size)) == NULL)
98 fibril_usleep(MSEC2USEC(250)); /* sleep 250ms */
99
100 return ret;
101}
102
[baa4929]103errno_t hr_create_vol_struct(hr_volume_t **rvol, hr_level_t level,
[95ca19d]104 const char *devname, hr_metadata_type_t metadata_type, uint8_t vflags)
[8b51009]105{
[59ec1c50]106 HR_DEBUG("%s()", __func__);
107
[8b51009]108 errno_t rc;
109
110 hr_volume_t *vol = calloc(1, sizeof(hr_volume_t));
111 if (vol == NULL)
112 return ENOMEM;
113
[baa4929]114 str_cpy(vol->devname, HR_DEVNAME_LEN, devname);
[8b51009]115 vol->level = level;
116
[95ca19d]117 vol->vflags = vflags;
118
[aa9bad8]119 vol->meta_ops = hr_get_meta_type_ops(metadata_type);
[50603405]120
[8b51009]121 switch (level) {
122 case HR_LVL_0:
123 vol->hr_ops.create = hr_raid0_create;
124 vol->hr_ops.init = hr_raid0_init;
[da80de9]125 vol->hr_ops.vol_state_eval = hr_raid0_vol_state_eval;
126 vol->hr_ops.ext_state_cb = hr_raid0_ext_state_cb;
[8b51009]127 break;
[50603405]128 case HR_LVL_1:
129 vol->hr_ops.create = hr_raid1_create;
130 vol->hr_ops.init = hr_raid1_init;
[da80de9]131 vol->hr_ops.vol_state_eval = hr_raid1_vol_state_eval;
132 vol->hr_ops.ext_state_cb = hr_raid1_ext_state_cb;
[50603405]133 break;
[8b51009]134 case HR_LVL_4:
135 case HR_LVL_5:
136 vol->hr_ops.create = hr_raid5_create;
137 vol->hr_ops.init = hr_raid5_init;
[da80de9]138 vol->hr_ops.vol_state_eval = hr_raid5_vol_state_eval;
139 vol->hr_ops.ext_state_cb = hr_raid5_ext_state_cb;
[8b51009]140 break;
141 default:
142 HR_DEBUG("unkown level: %d, aborting\n", vol->level);
143 rc = EINVAL;
144 goto error;
145 }
146
[a3486f2]147 if (level == HR_LVL_4 || level == HR_LVL_5)
148 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t));
149 else
150 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t));
151
[8b51009]152 if (vol->fge == NULL) {
153 rc = ENOMEM;
154 goto error;
155 }
156
[56602e0]157 vol->state = HR_VOL_NONE;
[8b51009]158
[800d188]159 fibril_mutex_initialize(&vol->md_lock);
160
[8b51009]161 fibril_rwlock_initialize(&vol->extents_lock);
162 fibril_rwlock_initialize(&vol->states_lock);
163
164 fibril_mutex_initialize(&vol->hotspare_lock);
165
166 list_initialize(&vol->range_lock_list);
167 fibril_mutex_initialize(&vol->range_lock_list_lock);
168
169 atomic_init(&vol->state_dirty, false);
[234212a]170 atomic_init(&vol->first_write, false);
[974f9ba]171 for (size_t i = 0; i < HR_MAX_EXTENTS; i++)
172 atomic_init(&vol->last_ext_pos_arr[i], 0);
173 atomic_init(&vol->last_ext_used, 0);
[7fba146]174 atomic_init(&vol->rebuild_blk, 0);
[8b51009]175 atomic_init(&vol->open_cnt, 0);
176
177 *rvol = vol;
178
179 return EOK;
180error:
181 free(vol);
182 return rc;
183}
184
185void hr_destroy_vol_struct(hr_volume_t *vol)
186{
[d1d355f]187 HR_DEBUG("%s()", __func__);
188
[8b51009]189 if (vol == NULL)
190 return;
191
192 hr_fpool_destroy(vol->fge);
193 hr_fini_devs(vol);
194 free(vol->in_mem_md);
195 free(vol);
196}
197
[9a3eec1]198errno_t hr_get_volume_svcs(size_t *rcnt, service_id_t **rsvcs)
199{
200 size_t i;
201 service_id_t *vol_svcs;
202
203 if (rcnt == NULL || rsvcs == NULL)
204 return EINVAL;
205
206 fibril_rwlock_read_lock(&hr_volumes_lock);
207
208 size_t vol_cnt = list_count(&hr_volumes);
209 vol_svcs = malloc(vol_cnt * sizeof(service_id_t));
210 if (vol_svcs == NULL) {
211 fibril_rwlock_read_unlock(&hr_volumes_lock);
212 return ENOMEM;
213 }
214
215 i = 0;
216 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter)
217 vol_svcs[i++] = iter->svc_id;
218
219 fibril_rwlock_read_unlock(&hr_volumes_lock);
220
221 *rcnt = vol_cnt;
222 *rsvcs = vol_svcs;
223
224 return EOK;
225}
226
[8b51009]227hr_volume_t *hr_get_volume(service_id_t svc_id)
228{
[d1d355f]229 HR_DEBUG("%s()", __func__);
[8b51009]230
231 hr_volume_t *rvol = NULL;
232
233 fibril_rwlock_read_lock(&hr_volumes_lock);
234 list_foreach(hr_volumes, lvolumes, hr_volume_t, iter) {
235 if (iter->svc_id == svc_id) {
236 rvol = iter;
237 break;
238 }
239 }
240 fibril_rwlock_read_unlock(&hr_volumes_lock);
[c9ce6d22]241
[8b51009]242 return rvol;
243}
244
[c9ce6d22]245errno_t hr_remove_volume(service_id_t svc_id)
[8b51009]246{
[d1d355f]247 HR_DEBUG("%s()", __func__);
[8b51009]248
[c9ce6d22]249 hr_volume_t *vol = hr_get_volume(svc_id);
250 if (vol == NULL)
251 return ENOENT;
252
[8b51009]253 fibril_rwlock_write_lock(&hr_volumes_lock);
[800d188]254
[d1d355f]255 int open_cnt = atomic_load_explicit(&vol->open_cnt,
256 memory_order_relaxed);
[c9ce6d22]257
[d1d355f]258 /*
259 * The atomicity of this if condition (and this whole
260 * operation) is provided by the write lock - no new
261 * bd connection can come, because we need to get the
262 * bd_srvs_t from the volume, which we get from the list.
263 * (see hr_client_conn() in hr.c)
264 */
265 if (open_cnt > 0) {
266 fibril_rwlock_write_unlock(&hr_volumes_lock);
267 return EBUSY;
[8b51009]268 }
269
[d1d355f]270 list_remove(&vol->lvolumes);
271
[8b51009]272 fibril_rwlock_write_unlock(&hr_volumes_lock);
[d1d355f]273
274 /* save metadata, but we don't care about states anymore */
[e0695ce]275 vol->meta_ops->save(vol, NO_STATE_CALLBACK);
[d1d355f]276
277 HR_NOTE("deactivating volume \"%s\"\n", vol->devname);
278
279 hr_destroy_vol_struct(vol);
280
281 errno_t rc = loc_service_unregister(hr_srv, svc_id);
282 return rc;
[8b51009]283}
[da5c257]284
[baa4929]285errno_t hr_init_extents_from_cfg(hr_volume_t *vol, hr_config_t *cfg)
[da5c257]286{
[a57dde4]287 HR_DEBUG("%s()", __func__);
[da5c257]288
289 errno_t rc;
[80c760e]290 uint64_t blkno, smallest_blkno = ~0ULL;
[ca7fa5b]291 size_t i, bsize;
[baa4929]292 size_t last_bsize = 0;
[da5c257]293
[baa4929]294 for (i = 0; i < cfg->dev_no; i++) {
295 service_id_t svc_id = cfg->devs[i];
296 if (svc_id == 0) {
297 rc = EINVAL;
298 goto error;
[e47a032]299 }
[5d96f427]300
[ca7fa5b]301 HR_DEBUG("%s(): block_init() on (%" PRIun ")\n", __func__,
302 svc_id);
[baa4929]303 rc = block_init(svc_id);
[da5c257]304 if (rc != EOK) {
[14394141]305 HR_DEBUG("%s(): initing (%" PRIun ") failed, "
306 "aborting\n", __func__, svc_id);
[baa4929]307 goto error;
308 }
309
310 rc = block_get_nblocks(svc_id, &blkno);
311 if (rc != EOK)
312 goto error;
313
314 rc = block_get_bsize(svc_id, &bsize);
315 if (rc != EOK)
316 goto error;
317
318 if (last_bsize != 0 && bsize != last_bsize) {
319 HR_DEBUG("block sizes differ\n");
320 rc = EINVAL;
321 goto error;
[da5c257]322 }
[baa4929]323
324 vol->extents[i].svc_id = svc_id;
[56602e0]325 vol->extents[i].state = HR_EXT_ONLINE;
[baa4929]326
[80c760e]327 if (blkno < smallest_blkno)
328 smallest_blkno = blkno;
[baa4929]329 last_bsize = bsize;
[da5c257]330 }
331
[baa4929]332 vol->bsize = last_bsize;
333 vol->extent_no = cfg->dev_no;
[80c760e]334 vol->truncated_blkno = smallest_blkno;
[baa4929]335
[e494d7b]336 for (i = 0; i < HR_MAX_HOTSPARES; i++)
[56602e0]337 vol->hotspares[i].state = HR_EXT_MISSING;
[e494d7b]338
[baa4929]339 return EOK;
340
341error:
342 for (i = 0; i < HR_MAX_EXTENTS; i++) {
343 if (vol->extents[i].svc_id != 0)
344 block_fini(vol->extents[i].svc_id);
345 }
346
[da5c257]347 return rc;
348}
349
350void hr_fini_devs(hr_volume_t *vol)
351{
[a57dde4]352 HR_DEBUG("%s()", __func__);
[da5c257]353
354 size_t i;
355
[65706f1]356 for (i = 0; i < vol->extent_no; i++) {
[36661772]357 if (vol->extents[i].svc_id != 0) {
[14394141]358 HR_DEBUG("hr_fini_devs(): block_fini() on "
359 "(%" PRIun ")\n", vol->extents[i].svc_id);
[e47a032]360 block_fini(vol->extents[i].svc_id);
[5d96f427]361 }
362 }
[0437dd5]363
364 for (i = 0; i < vol->hotspare_no; i++) {
365 if (vol->hotspares[i].svc_id != 0) {
[14394141]366 HR_DEBUG("hr_fini_devs(): block_fini() on "
367 "(%" PRIun ")\n",
[0437dd5]368 vol->hotspares[i].svc_id);
369 block_fini(vol->hotspares[i].svc_id);
370 }
371 }
[da5c257]372}
373
[5d96f427]374errno_t hr_register_volume(hr_volume_t *vol)
[da5c257]375{
[a57dde4]376 HR_DEBUG("%s()", __func__);
[b0f1366]377
[da5c257]378 errno_t rc;
379 service_id_t new_id;
380 category_id_t cat_id;
[287b2ea]381 const char *devname = vol->devname;
[da5c257]382
[817cb83]383 rc = loc_service_register(hr_srv, devname, fallback_port_id, &new_id);
[da5c257]384 if (rc != EOK) {
[d199a6f]385 HR_ERROR("unable to register device \"%s\": %s\n",
[287b2ea]386 devname, str_error(rc));
[49de61c]387 return rc;
[da5c257]388 }
389
390 rc = loc_category_get_id("raid", &cat_id, IPC_FLAG_BLOCKING);
391 if (rc != EOK) {
[d199a6f]392 HR_ERROR("failed resolving category \"raid\": %s\n",
[5d96f427]393 str_error(rc));
[da5c257]394 goto error;
395 }
396
397 rc = loc_service_add_to_cat(hr_srv, new_id, cat_id);
398 if (rc != EOK) {
[d199a6f]399 HR_ERROR("failed adding \"%s\" to category \"raid\": %s\n",
[287b2ea]400 devname, str_error(rc));
[da5c257]401 goto error;
402 }
403
[5d96f427]404 vol->svc_id = new_id;
[49de61c]405 return EOK;
[da5c257]406error:
[49de61c]407 rc = loc_service_unregister(hr_srv, new_id);
[da5c257]408 return rc;
409}
410
[4a2a6b8b]411errno_t hr_check_ba_range(hr_volume_t *vol, size_t cnt, uint64_t ba)
[b0f1366]412{
[4a2a6b8b]413 if (ba + cnt > vol->data_blkno)
[b0f1366]414 return ERANGE;
[4a2a6b8b]415 return EOK;
416}
[b0f1366]417
[234212a]418void hr_add_data_offset(hr_volume_t *vol, uint64_t *ba)
[4a2a6b8b]419{
[b0f1366]420 *ba = *ba + vol->data_offset;
421}
422
[234212a]423void hr_sub_data_offset(hr_volume_t *vol, uint64_t *ba)
424{
425 *ba = *ba - vol->data_offset;
426}
427
[56602e0]428void hr_update_ext_state(hr_volume_t *vol, size_t ext_idx, hr_ext_state_t s)
[e47a032]429{
[36661772]430 if (vol->level != HR_LVL_0)
431 assert(fibril_rwlock_is_locked(&vol->extents_lock));
432
433 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
434
[ca7fa5b]435 assert(ext_idx < vol->extent_no);
[7a3529a8]436
[56602e0]437 hr_ext_state_t old = vol->extents[ext_idx].state;
[a5a2dcf]438 HR_DEBUG("\"%s\": changing extent %zu state: %s -> %s\n",
[155d34f]439 vol->devname, ext_idx, hr_get_ext_state_str(old),
440 hr_get_ext_state_str(s));
[56602e0]441 vol->extents[ext_idx].state = s;
[e47a032]442}
443
[56602e0]444void hr_update_hotspare_state(hr_volume_t *vol, size_t hs_idx,
445 hr_ext_state_t s)
[a0c3080]446{
[36661772]447 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
448
[ca7fa5b]449 assert(hs_idx < vol->hotspare_no);
[7a3529a8]450
[56602e0]451 hr_ext_state_t old = vol->hotspares[hs_idx].state;
[a5a2dcf]452 HR_DEBUG("\"%s\": changing hotspare %zu state: %s -> %s\n",
[155d34f]453 vol->devname, hs_idx, hr_get_ext_state_str(old),
454 hr_get_ext_state_str(s));
[56602e0]455 vol->hotspares[hs_idx].state = s;
[a0c3080]456}
457
[56602e0]458void hr_update_vol_state(hr_volume_t *vol, hr_vol_state_t new)
[a0c3080]459{
[36661772]460 assert(fibril_rwlock_is_write_locked(&vol->states_lock));
461
[a5a2dcf]462 HR_NOTE("\"%s\": volume state changed: %s -> %s\n", vol->devname,
[56602e0]463 hr_get_vol_state_str(vol->state), hr_get_vol_state_str(new));
464 vol->state = new;
[edc89bd8]465}
466
[ca7fa5b]467void hr_update_ext_svc_id(hr_volume_t *vol, size_t ext_idx, service_id_t new)
[edc89bd8]468{
469 if (vol->level != HR_LVL_0)
470 assert(fibril_rwlock_is_write_locked(&vol->extents_lock));
471
[ca7fa5b]472 assert(ext_idx < vol->extent_no);
[edc89bd8]473
[ca7fa5b]474 service_id_t old = vol->extents[ext_idx].svc_id;
[a5a2dcf]475 HR_DEBUG("\"%s\": changing extent no. %zu svc_id: (%" PRIun ") -> "
[ca7fa5b]476 "(%" PRIun ")\n", vol->devname, ext_idx, old, new);
477 vol->extents[ext_idx].svc_id = new;
[edc89bd8]478}
479
[ca7fa5b]480void hr_update_hotspare_svc_id(hr_volume_t *vol, size_t hs_idx,
481 service_id_t new)
[edc89bd8]482{
483 assert(fibril_mutex_is_locked(&vol->hotspare_lock));
484
[ca7fa5b]485 assert(hs_idx < vol->hotspare_no);
[edc89bd8]486
[ca7fa5b]487 service_id_t old = vol->hotspares[hs_idx].svc_id;
[a5a2dcf]488 HR_DEBUG("\"%s\": changing hotspare no. %zu svc_id: (%" PRIun ") -> "
[ca7fa5b]489 "(%" PRIun ")\n", vol->devname, hs_idx, old, new);
490 vol->hotspares[hs_idx].svc_id = new;
[a0c3080]491}
492
[56602e0]493size_t hr_count_extents(hr_volume_t *vol, hr_ext_state_t state)
[e76e12d8]494{
[36661772]495 if (vol->level != HR_LVL_0)
496 assert(fibril_rwlock_is_locked(&vol->extents_lock));
497 assert(fibril_rwlock_is_locked(&vol->states_lock));
498
[e76e12d8]499 size_t count = 0;
[38e3c0a7]500 for (size_t i = 0; i < vol->extent_no; i++)
[56602e0]501 if (vol->extents[i].state == state)
[e76e12d8]502 count++;
503
504 return count;
505}
506
[38e3c0a7]507hr_range_lock_t *hr_range_lock_acquire(hr_volume_t *vol, uint64_t ba,
508 uint64_t cnt)
509{
[81b4c795]510 hr_range_lock_t *rl = hr_malloc_waitok(sizeof(hr_range_lock_t));
[38e3c0a7]511
512 rl->vol = vol;
513 rl->off = ba;
514 rl->len = cnt;
515
516 rl->pending = 1;
517 rl->ignore = false;
518
519 link_initialize(&rl->link);
520 fibril_mutex_initialize(&rl->lock);
521
522 fibril_mutex_lock(&rl->lock);
523
524again:
525 HR_RL_LIST_LOCK(vol);
526 list_foreach(vol->range_lock_list, link, hr_range_lock_t, rlp) {
527 if (rlp->ignore)
528 continue;
529 if (hr_range_lock_overlap(rlp, rl)) {
530 rlp->pending++;
531
532 HR_RL_LIST_UNLOCK(vol);
533
534 fibril_mutex_lock(&rlp->lock);
535
536 HR_RL_LIST_LOCK(vol);
537
538 rlp->pending--;
539
540 /*
541 * when ignore is set, after HR_RL_LIST_UNLOCK(),
542 * noone new is going to be able to start sleeping
543 * on the ignored range lock, only already waiting
544 * IOs will come through here
545 */
546 rlp->ignore = true;
547
548 fibril_mutex_unlock(&rlp->lock);
549
550 if (rlp->pending == 0) {
551 list_remove(&rlp->link);
552 free(rlp);
553 }
554
555 HR_RL_LIST_UNLOCK(vol);
556 goto again;
557 }
558 }
559
560 list_append(&rl->link, &vol->range_lock_list);
561
562 HR_RL_LIST_UNLOCK(vol);
563 return rl;
564}
565
566void hr_range_lock_release(hr_range_lock_t *rl)
567{
[36661772]568 if (rl == NULL)
569 return;
570
[38e3c0a7]571 HR_RL_LIST_LOCK(rl->vol);
572
573 rl->pending--;
574
575 fibril_mutex_unlock(&rl->lock);
576
577 if (rl->pending == 0) {
578 list_remove(&rl->link);
579 free(rl);
580 }
581
582 HR_RL_LIST_UNLOCK(rl->vol);
583}
584
585static bool hr_range_lock_overlap(hr_range_lock_t *rl1, hr_range_lock_t *rl2)
586{
587 uint64_t rl1_start = rl1->off;
588 uint64_t rl1_end = rl1->off + rl1->len - 1;
589 uint64_t rl2_start = rl2->off;
590 uint64_t rl2_end = rl2->off + rl2->len - 1;
591
592 /* one ends before the other starts */
593 if (rl1_end < rl2_start || rl2_end < rl1_start)
594 return false;
595
596 return true;
597}
598
[d6fe2a1]599void hr_mark_vol_state_dirty(hr_volume_t *vol)
600{
[d2da1be]601 atomic_store(&vol->state_dirty, true);
[d6fe2a1]602}
603
[8b51009]604static errno_t hr_add_svc_linked_to_list(list_t *list, service_id_t svc_id,
[50603405]605 bool inited, void *md)
[8b51009]606{
[50603405]607 HR_DEBUG("%s()", __func__);
608
[8b51009]609 errno_t rc = EOK;
[50603405]610 struct dev_list_member *to_add;
[8b51009]611
[50603405]612 if (list == NULL)
613 return EINVAL;
614
615 to_add = malloc(sizeof(struct dev_list_member));
[8b51009]616 if (to_add == NULL) {
617 rc = ENOMEM;
618 goto error;
619 }
[50603405]620
[8b51009]621 to_add->svc_id = svc_id;
622 to_add->inited = inited;
[b127da2f]623 to_add->fini = true;
[8b51009]624
625 if (md != NULL) {
[50603405]626 to_add->md = md;
[8b51009]627 to_add->md_present = true;
628 } else {
629 to_add->md_present = false;
630 }
631
632 list_append(&to_add->link, list);
633
634error:
635 return rc;
636}
637
[50603405]638static void free_dev_list_member(struct dev_list_member *p)
[8b51009]639{
[50603405]640 HR_DEBUG("%s()", __func__);
641
[8b51009]642 if (p->md_present)
643 free(p->md);
644 free(p);
645}
646
647static void free_svc_id_list(list_t *list)
648{
[50603405]649 HR_DEBUG("%s()", __func__);
650
651 struct dev_list_member *dev_id;
[8b51009]652 while (!list_empty(list)) {
[50603405]653 dev_id = list_pop(list, struct dev_list_member, link);
[14394141]654
[50603405]655 free_dev_list_member(dev_id);
[8b51009]656 }
657}
658
659static errno_t hr_fill_disk_part_svcs_list(list_t *list)
660{
[50603405]661 HR_DEBUG("%s()", __func__);
662
[8b51009]663 errno_t rc;
664 size_t disk_count;
665 service_id_t *disk_svcs = NULL;
666 vbd_t *vbd = NULL;
667
668 rc = vbd_create(&vbd);
669 if (rc != EOK)
670 goto error;
671
672 rc = vbd_get_disks(vbd, &disk_svcs, &disk_count);
673 if (rc != EOK)
674 goto error;
675
676 for (size_t i = 0; i < disk_count; i++) {
677 vbd_disk_info_t disk_info;
678 rc = vbd_disk_info(vbd, disk_svcs[i], &disk_info);
679 if (rc != EOK)
680 goto error;
681
[e1ed6ec0]682 if (disk_info.ltype != lt_none) {
[8b51009]683 size_t part_count;
684 service_id_t *part_ids = NULL;
[50603405]685 rc = vbd_label_get_parts(vbd, disk_svcs[i], &part_ids,
686 &part_count);
[8b51009]687 if (rc != EOK)
688 goto error;
689
690 for (size_t j = 0; j < part_count; j++) {
691 vbd_part_info_t part_info;
[50603405]692 rc = vbd_part_get_info(vbd, part_ids[j],
693 &part_info);
[8b51009]694 if (rc != EOK) {
695 free(part_ids);
696 goto error;
697 }
698
699 rc = hr_add_svc_linked_to_list(list,
700 part_info.svc_id, false, NULL);
701 if (rc != EOK) {
702 free(part_ids);
703 goto error;
704 }
705 }
706
707 free(part_ids);
[e1ed6ec0]708
709 /*
710 * vbd can detect some bogus label type, but
711 * no partitions. In that case we handle the
712 * svc_id as a label-less disk.
713 *
714 * This can happen when creating an exfat fs
715 * in FreeBSD for example.
716 */
717 if (part_count == 0)
718 disk_info.ltype = lt_none;
719 }
720
721 if (disk_info.ltype == lt_none) {
722 rc = hr_add_svc_linked_to_list(list, disk_svcs[i],
723 false, NULL);
724 if (rc != EOK)
725 goto error;
[8b51009]726 }
727 }
728
729 free(disk_svcs);
730 vbd_destroy(vbd);
731 return EOK;
732error:
733 free_svc_id_list(list);
734 if (disk_svcs != NULL)
735 free(disk_svcs);
736 vbd_destroy(vbd);
737
738 return rc;
739}
740
741static errno_t block_init_dev_list(list_t *list)
742{
[50603405]743 HR_DEBUG("%s()", __func__);
744
[8b51009]745 list_foreach_safe(*list, cur_link, next_link) {
[50603405]746 struct dev_list_member *iter;
747 iter = list_get_instance(cur_link, struct dev_list_member,
748 link);
[8b51009]749
750 if (iter->inited)
751 continue;
752
753 errno_t rc = block_init(iter->svc_id);
754
755 if (rc == EEXIST) {
756 list_remove(cur_link);
[50603405]757 free_dev_list_member(iter);
[8b51009]758 continue;
759 }
760
761 if (rc != EOK)
762 return rc;
763
764 iter->inited = true;
[80c760e]765 iter->fini = true;
[8b51009]766 }
767
768 return EOK;
769}
770
771static void block_fini_dev_list(list_t *list)
772{
[50603405]773 HR_DEBUG("%s()", __func__);
774
775 list_foreach(*list, link, struct dev_list_member, iter) {
[80c760e]776 if (iter->inited && iter->fini) {
[8b51009]777 block_fini(iter->svc_id);
778 iter->inited = false;
[80c760e]779 iter->fini = false;
[8b51009]780 }
781 }
782}
783
[50603405]784static errno_t hr_util_get_matching_md_svcs_list(list_t *rlist, list_t *list,
[b883aa8]785 service_id_t svc_id, hr_metadata_type_t type_main,
786 void *metadata_struct_main)
[8b51009]787{
[50603405]788 HR_DEBUG("%s()", __func__);
789
[8b51009]790 errno_t rc = EOK;
791
[aa9bad8]792 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type_main);
[58c43d4]793
[50603405]794 list_foreach(*list, link, struct dev_list_member, iter) {
[8b51009]795 if (iter->svc_id == svc_id)
796 continue;
797
[50603405]798 void *metadata_struct;
[b883aa8]799 hr_metadata_type_t type;
[8b51009]800
[aa9bad8]801 rc = hr_find_metadata(iter->svc_id, &metadata_struct, &type);
[50603405]802 if (rc == ENOFS)
[8b51009]803 continue;
[50603405]804 if (rc != EOK)
805 goto error;
[8b51009]806
[58c43d4]807 if (type != type_main) {
808 free(metadata_struct);
809 continue;
810 }
[8b51009]811
[50603405]812 if (!meta_ops->compare_uuids(metadata_struct_main,
813 metadata_struct)) {
814 free(metadata_struct);
815 continue;
816 }
[8b51009]817
[50603405]818 rc = hr_add_svc_linked_to_list(rlist, iter->svc_id, true,
819 metadata_struct);
[8b51009]820 if (rc != EOK)
821 goto error;
822 }
823
824 return EOK;
825error:
826 free_svc_id_list(rlist);
827 return rc;
828}
829
[50603405]830static errno_t hr_util_assemble_from_matching_list(list_t *list,
[95ca19d]831 hr_metadata_type_t type, uint8_t vflags)
[8b51009]832{
833 HR_DEBUG("%s()", __func__);
834
835 errno_t rc = EOK;
836
[aa9bad8]837 hr_superblock_ops_t *meta_ops = hr_get_meta_type_ops(type);
[8b51009]838
[50603405]839 link_t *memb_l = list_first(list);
840 struct dev_list_member *memb = list_get_instance(memb_l,
841 struct dev_list_member, link);
[8b51009]842
[50603405]843 hr_level_t level = meta_ops->get_level(memb->md);
844 const char *devname = meta_ops->get_devname(memb->md);
[8b51009]845
846 hr_volume_t *vol;
[95ca19d]847 rc = hr_create_vol_struct(&vol, level, devname, type, vflags);
[8b51009]848 if (rc != EOK)
[59ec1c50]849 return rc;
[8b51009]850
[50603405]851 meta_ops->init_meta2vol(list, vol);
[93ea452]852 if (rc != EOK)
853 goto error;
[44da6c8]854
[8b51009]855 rc = vol->hr_ops.create(vol);
856 if (rc != EOK)
857 goto error;
858
[b127da2f]859 for (size_t e = 0; e < vol->extent_no; e++) {
860 if (vol->extents[e].svc_id == 0)
861 continue;
862 list_foreach(*list, link, struct dev_list_member, iter) {
863 if (iter->svc_id == vol->extents[e].svc_id)
864 iter->fini = false;
865 }
866 }
867
[8a65373]868 rc = hr_register_volume(vol);
[d1d355f]869 if (rc != EOK)
[8a65373]870 goto error;
[8b51009]871
[d1d355f]872 fibril_rwlock_write_lock(&hr_volumes_lock);
[8b51009]873 list_append(&vol->lvolumes, &hr_volumes);
874 fibril_rwlock_write_unlock(&hr_volumes_lock);
875
[d1d355f]876 HR_NOTE("assembled volume \"%s\"\n", vol->devname);
877
[8b51009]878 return EOK;
879error:
[b127da2f]880 /* let the caller fini the block svc list */
881 for (size_t e = 0; e < vol->extent_no; e++)
882 vol->extents[e].svc_id = 0;
883
[8b51009]884 hr_destroy_vol_struct(vol);
[b127da2f]885
[8b51009]886 return rc;
887}
888
[d082801]889static errno_t hr_fill_svcs_list_from_cfg(hr_config_t *cfg, list_t *list)
890{
[50603405]891 HR_DEBUG("%s()", __func__);
892
[d082801]893 errno_t rc = EOK;
894 for (size_t i = 0; i < cfg->dev_no; ++i) {
[50603405]895 rc = hr_add_svc_linked_to_list(list, cfg->devs[i], false,
896 NULL);
[d082801]897 if (rc != EOK)
898 goto error;
899 }
900
901 return EOK;
902error:
903 free_svc_id_list(list);
904 return rc;
905}
906
907errno_t hr_util_try_assemble(hr_config_t *cfg, size_t *rassembled_cnt)
[8b51009]908{
909 HR_DEBUG("%s()", __func__);
910
911 /*
912 * scan partitions or disks:
913 *
914 * When we find a metadata block with valid
915 * magic, take UUID and try to find other matching
916 * UUIDs.
917 *
918 * We ignore extents that are a part of already
919 * active volumes. (even when the counter is lower
920 * on active volumes... XXX: use timestamp as initial counter value
921 * when assembling, or writing dirty metadata?)
922 */
923
924 size_t asm_cnt = 0;
925 errno_t rc;
926 list_t dev_id_list;
[95ca19d]927 uint8_t vflags = 0;
[8b51009]928
929 list_initialize(&dev_id_list);
[d082801]930
[95ca19d]931 if (cfg == NULL) {
[d082801]932 rc = hr_fill_disk_part_svcs_list(&dev_id_list);
[95ca19d]933 } else {
[d082801]934 rc = hr_fill_svcs_list_from_cfg(cfg, &dev_id_list);
[95ca19d]935 vflags = cfg->vol_flags;
936 }
[d082801]937
[8b51009]938 if (rc != EOK)
939 goto error;
940
941 rc = block_init_dev_list(&dev_id_list);
942 if (rc != EOK)
943 goto error;
944
[50603405]945 struct dev_list_member *iter;
[8b51009]946 while (!list_empty(&dev_id_list)) {
[50603405]947 iter = list_pop(&dev_id_list, struct dev_list_member, link);
[8b51009]948
[50603405]949 void *metadata_struct_main;
[b883aa8]950 hr_metadata_type_t type;
[8b51009]951
[aa9bad8]952 rc = hr_find_metadata(iter->svc_id, &metadata_struct_main, &type);
[50603405]953 if (rc == ENOFS) {
[8b51009]954 block_fini(iter->svc_id);
[50603405]955 free_dev_list_member(iter);
956 rc = EOK;
[8b51009]957 continue;
958 }
959
[b127da2f]960 if (rc != EOK) {
961 block_fini(iter->svc_id);
962 free_dev_list_member(iter);
[50603405]963 goto error;
[b127da2f]964 }
[8b51009]965
966 char *svc_name = NULL;
967 rc = loc_service_get_name(iter->svc_id, &svc_name);
[b127da2f]968 if (rc != EOK) {
969 block_fini(iter->svc_id);
970 free_dev_list_member(iter);
[8b51009]971 goto error;
[b127da2f]972 }
[b883aa8]973 HR_DEBUG("found valid metadata on %s (type = %s), matching "
974 "other extents\n",
975 svc_name, hr_get_metadata_type_str(type));
[8b51009]976 free(svc_name);
977
978 list_t matching_svcs_list;
979 list_initialize(&matching_svcs_list);
980
981 rc = hr_util_get_matching_md_svcs_list(&matching_svcs_list,
[50603405]982 &dev_id_list, iter->svc_id, type, metadata_struct_main);
[b127da2f]983 if (rc != EOK) {
984 block_fini(iter->svc_id);
985 free_dev_list_member(iter);
[8b51009]986 goto error;
[b127da2f]987 }
[8b51009]988
989 /* add current iter to list as well */
990 rc = hr_add_svc_linked_to_list(&matching_svcs_list,
[50603405]991 iter->svc_id, true, metadata_struct_main);
[8b51009]992 if (rc != EOK) {
[b127da2f]993 block_fini(iter->svc_id);
[8b51009]994 free_svc_id_list(&matching_svcs_list);
995 goto error;
996 }
997
[b127da2f]998 free_dev_list_member(iter);
999
[8b51009]1000 /* remove matching list members from dev_id_list */
[50603405]1001 list_foreach(matching_svcs_list, link, struct dev_list_member,
[8b51009]1002 iter2) {
[50603405]1003 struct dev_list_member *to_remove;
[8b51009]1004 list_foreach_safe(dev_id_list, cur_link, next_link) {
1005 to_remove = list_get_instance(cur_link,
[50603405]1006 struct dev_list_member, link);
[8b51009]1007 if (to_remove->svc_id == iter2->svc_id) {
1008 list_remove(cur_link);
[50603405]1009 free_dev_list_member(to_remove);
[8b51009]1010 }
1011 }
1012 }
1013
[50603405]1014 rc = hr_util_assemble_from_matching_list(&matching_svcs_list,
[95ca19d]1015 type, vflags);
[8b51009]1016 switch (rc) {
1017 case EOK:
1018 asm_cnt++;
1019 break;
[80c760e]1020 case ENOMEM:
[8b51009]1021 goto error;
[80c760e]1022 default:
1023 rc = EOK;
[8b51009]1024 }
[80c760e]1025 block_fini_dev_list(&matching_svcs_list);
[8b51009]1026 free_svc_id_list(&matching_svcs_list);
1027 }
1028
1029error:
1030 if (rassembled_cnt != NULL)
1031 *rassembled_cnt = asm_cnt;
1032
1033 block_fini_dev_list(&dev_id_list);
1034 free_svc_id_list(&dev_id_list);
1035
1036 return rc;
1037}
1038
[56214383]1039errno_t hr_util_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
1040{
1041 HR_DEBUG("%s()", __func__);
1042
1043 errno_t rc = EOK;
1044
1045 fibril_mutex_lock(&vol->hotspare_lock);
1046
1047 if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
1048 HR_ERROR("%s(): cannot add more hotspares "
1049 "to \"%s\"\n", __func__, vol->devname);
1050 rc = ELIMIT;
[baa4929]1051 goto error;
[56214383]1052 }
1053
[c2f0160]1054 for (size_t i = 0; i < vol->hotspare_no; i++) {
1055 if (vol->hotspares[i].svc_id == hotspare) {
1056 HR_ERROR("%s(): hotspare (%" PRIun ") already used in "
1057 "%s\n", __func__, hotspare, vol->devname);
1058 rc = EEXIST;
1059 goto error;
1060 }
1061 }
1062
[56214383]1063 rc = block_init(hotspare);
1064 if (rc != EOK)
[baa4929]1065 goto error;
1066
[ca7fa5b]1067 uint64_t hs_blkno;
[baa4929]1068 rc = block_get_nblocks(hotspare, &hs_blkno);
1069 if (rc != EOK) {
1070 block_fini(hotspare);
1071 goto error;
1072 }
1073
[2f21cd4]1074 if (hs_blkno < vol->truncated_blkno) {
[c2f0160]1075 HR_ERROR("%s(): hotspare (%" PRIun ") doesn't have enough "
1076 "blocks\n", __func__, hotspare);
1077
[baa4929]1078 rc = EINVAL;
1079 block_fini(hotspare);
1080 goto error;
1081 }
[56214383]1082
1083 size_t hs_idx = vol->hotspare_no;
1084
1085 vol->hotspare_no++;
1086
1087 hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
[56602e0]1088 hr_update_hotspare_state(vol, hs_idx, HR_EXT_HOTSPARE);
[56214383]1089
1090 hr_mark_vol_state_dirty(vol);
[baa4929]1091error:
[56214383]1092 fibril_mutex_unlock(&vol->hotspare_lock);
1093 return rc;
1094}
1095
[a3486f2]1096void hr_raid5_xor(void *dst, const void *src, size_t size)
1097{
1098 size_t i;
1099 uint64_t *d = dst;
1100 const uint64_t *s = src;
1101
1102 for (i = 0; i < size / sizeof(uint64_t); ++i)
1103 *d++ ^= *s++;
1104}
1105
[137f7cf5]1106errno_t hr_sync_extents(hr_volume_t *vol)
1107{
1108 errno_t rc = EOK;
1109
1110 fibril_rwlock_read_lock(&vol->extents_lock);
1111 for (size_t e = 0; e < vol->extent_no; e++) {
1112 fibril_rwlock_read_lock(&vol->states_lock);
1113 hr_ext_state_t s = vol->extents[e].state;
1114 fibril_rwlock_read_unlock(&vol->states_lock);
1115
1116 service_id_t svc_id = vol->extents[e].svc_id;
1117
1118 if (s == HR_EXT_ONLINE || s == HR_EXT_REBUILD) {
1119 errno_t rc = hr_sync_cache(svc_id, 0, 0);
1120 if (rc != EOK && rc != ENOTSUP)
1121 vol->hr_ops.ext_state_cb(vol, e, rc);
1122 }
1123 }
1124 fibril_rwlock_read_unlock(&vol->extents_lock);
1125
1126 vol->hr_ops.vol_state_eval(vol);
1127
1128 fibril_rwlock_read_lock(&vol->states_lock);
1129 hr_vol_state_t s = vol->state;
1130 fibril_rwlock_read_unlock(&vol->states_lock);
1131
1132 if (s == HR_VOL_FAULTY)
1133 rc = EIO;
1134
1135 return rc;
1136}
1137
[09c195e8]1138errno_t hr_init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)
1139{
[6aafb48]1140 HR_DEBUG("%s()", __func__);
1141
[09c195e8]1142 errno_t rc = EOK;
[e0695ce]1143 size_t bad = vol->extent_no;
[09c195e8]1144
1145 if (vol->level == HR_LVL_0)
1146 return EINVAL;
1147
[6aafb48]1148 fibril_rwlock_read_lock(&vol->states_lock);
1149 if (vol->state != HR_VOL_DEGRADED) {
1150 fibril_rwlock_read_unlock(&vol->states_lock);
1151 return EINVAL;
1152 }
1153 fibril_rwlock_read_unlock(&vol->states_lock);
1154
[09c195e8]1155 fibril_rwlock_write_lock(&vol->extents_lock);
1156 fibril_rwlock_write_lock(&vol->states_lock);
1157 fibril_mutex_lock(&vol->hotspare_lock);
1158
[e0695ce]1159 size_t rebuild = vol->extent_no;
[09c195e8]1160 for (size_t i = 0; i < vol->extent_no; i++) {
[e0695ce]1161 if (vol->extents[i].state == HR_EXT_REBUILD) {
1162 rebuild = i;
[09c195e8]1163 break;
1164 }
1165 }
1166
[e0695ce]1167 if (rebuild < vol->extent_no) {
1168 bad = rebuild;
1169 goto init_rebuild;
1170 }
[09c195e8]1171
1172 size_t invalid = vol->extent_no;
1173 for (size_t i = 0; i < vol->extent_no; i++) {
1174 if (vol->extents[i].state == HR_EXT_INVALID) {
1175 invalid = i;
1176 break;
1177 }
1178 }
1179
[e0695ce]1180 if (invalid < vol->extent_no) {
[09c195e8]1181 bad = invalid;
[e0695ce]1182 goto init_rebuild;
1183 }
[09c195e8]1184
[e0695ce]1185 for (size_t i = 0; i < vol->extent_no; i++) {
1186 if (vol->extents[i].state != HR_EXT_ONLINE) {
1187 bad = i;
1188 break;
1189 }
1190 }
[09c195e8]1191
[e0695ce]1192 if (bad == vol->extent_no || vol->hotspare_no == 0) {
1193 rc = EINVAL;
[09c195e8]1194 goto error;
[e0695ce]1195 }
[09c195e8]1196
[e0695ce]1197 size_t hotspare_idx = vol->hotspare_no - 1;
[09c195e8]1198
[e0695ce]1199 hr_ext_state_t hs_state = vol->hotspares[hotspare_idx].state;
1200 if (hs_state != HR_EXT_HOTSPARE) {
1201 HR_ERROR("hr_raid1_rebuild(): invalid hotspare"
1202 "state \"%s\", aborting rebuild\n",
1203 hr_get_ext_state_str(hs_state));
1204 rc = EINVAL;
1205 goto error;
1206 }
[09c195e8]1207
[e0695ce]1208 rc = hr_swap_hs(vol, bad, hotspare_idx);
1209 if (rc != EOK) {
1210 HR_ERROR("hr_raid1_rebuild(): swapping "
1211 "hotspare failed, aborting rebuild\n");
1212 goto error;
[09c195e8]1213 }
1214
1215 hr_extent_t *rebuild_ext = &vol->extents[bad];
1216
1217 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %zu "
1218 "(%" PRIun ")\n", bad, rebuild_ext->svc_id);
1219
[e0695ce]1220init_rebuild:
[09c195e8]1221 hr_update_ext_state(vol, bad, HR_EXT_REBUILD);
1222 hr_update_vol_state(vol, HR_VOL_REBUILD);
1223
1224 *rebuild_idx = bad;
1225error:
1226 fibril_mutex_unlock(&vol->hotspare_lock);
1227 fibril_rwlock_write_unlock(&vol->states_lock);
1228 fibril_rwlock_write_unlock(&vol->extents_lock);
1229
1230 return rc;
1231}
1232
1233static errno_t hr_swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
1234{
1235 HR_DEBUG("%s()", __func__);
1236
1237 service_id_t faulty_svc_id = vol->extents[bad].svc_id;
1238 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
1239
1240 hr_update_ext_svc_id(vol, bad, hs_svc_id);
1241 hr_update_ext_state(vol, bad, HR_EXT_HOTSPARE);
1242
1243 hr_update_hotspare_svc_id(vol, hs, 0);
1244 hr_update_hotspare_state(vol, hs, HR_EXT_MISSING);
1245
1246 vol->hotspare_no--;
1247
1248 if (faulty_svc_id != 0)
1249 block_fini(faulty_svc_id);
1250
1251 return EOK;
1252}
1253
[ca212a51]1254uint32_t hr_closest_pow2(uint32_t n)
1255{
1256 if (n == 0)
1257 return 0;
1258
1259 n |= (n >> 1);
1260 n |= (n >> 2);
1261 n |= (n >> 4);
1262 n |= (n >> 8);
1263 n |= (n >> 16);
1264 return n - (n >> 1);
1265}
1266
[da5c257]1267/** @}
1268 */
Note: See TracBrowser for help on using the repository browser.