Changeset 58d82fa in mainline
- Timestamp:
- 2025-01-12T16:53:10Z (5 months ago)
- Children:
- 5ee041e
- Parents:
- ee47537
- git-author:
- Miroslav Cimerman <mc@…> (2025-01-12 16:49:51)
- git-committer:
- Miroslav Cimerman <mc@…> (2025-01-12 16:53:10)
- Location:
- uspace/srv/bd/hr
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/hr.c
ree47537 r58d82fa 44 44 #include <loc.h> 45 45 #include <task.h> 46 #include <stdatomic.h> 46 47 #include <stdio.h> 47 48 #include <stdlib.h> … … 254 255 fibril_rwlock_initialize(&new_volume->states_lock); 255 256 257 fibril_mutex_initialize(&new_volume->hotspare_lock); 258 256 259 list_initialize(&new_volume->range_lock_list); 257 260 fibril_mutex_initialize(&new_volume->range_lock_list_lock); 261 262 atomic_init(&new_volume->rebuild_blk, 0); 258 263 259 264 rc = new_volume->hr_ops.create(new_volume); … … 310 315 rc = loc_service_unregister(hr_srv, svc_id); 311 316 } else { 312 /* fibril safe for now */313 fibril_ mutex_lock(&vol->lock);317 fibril_rwlock_write_lock(&vol->states_lock); 318 fibril_rwlock_read_lock(&vol->extents_lock); 314 319 hr_update_ext_status(vol, fail_extent, HR_EXT_FAILED); 315 fibril_mutex_unlock(&vol->lock); 320 fibril_rwlock_read_unlock(&vol->extents_lock); 321 fibril_rwlock_write_unlock(&vol->states_lock); 316 322 317 323 vol->hr_ops.status_event(vol); -
uspace/srv/bd/hr/raid1.c
ree47537 r58d82fa 1 1 /* 2 * Copyright (c) 202 4Miroslav Cimerman2 * Copyright (c) 2025 Miroslav Cimerman 3 3 * All rights reserved. 4 4 * … … 43 43 #include <loc.h> 44 44 #include <task.h> 45 #include <stdatomic.h> 45 46 #include <stdio.h> 46 47 #include <stdlib.h> 47 48 #include <str_error.h> 48 49 50 #include "fge.h" 51 #include "io.h" 49 52 #include "superblock.h" 50 53 #include "util.h" … … 53 56 extern loc_srv_t *hr_srv; 54 57 55 static errno_t hr_raid1_check_vol_status(hr_volume_t *); 56 static errno_t hr_raid1_update_vol_status(hr_volume_t *); 57 static void hr_raid1_handle_extent_error(hr_volume_t *, size_t, errno_t); 58 static void hr_raid1_update_vol_status(hr_volume_t *); 59 static void hr_raid1_ext_state_callback(hr_volume_t *, size_t, errno_t); 60 static size_t hr_raid1_count_good_extents(hr_volume_t *, uint64_t, size_t, 61 uint64_t); 58 62 static errno_t hr_raid1_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t, 59 63 void *, const void *, size_t); 64 static errno_t swap_hs(hr_volume_t *, size_t, size_t); 65 static errno_t init_rebuild(hr_volume_t *, size_t *); 60 66 static errno_t hr_raid1_rebuild(void *); 61 67 … … 92 98 } 93 99 94 rc = hr_raid1_update_vol_status(new_volume);95 if (rc != EOK)96 return rc;97 98 100 bd_srvs_init(&new_volume->hr_bds); 99 101 new_volume->hr_bds.ops = &hr_raid1_bd_ops; 100 102 new_volume->hr_bds.sarg = new_volume; 103 104 hr_raid1_update_vol_status(new_volume); 105 if (new_volume->status == HR_VOL_FAULTY) 106 return EINVAL; 101 107 102 108 rc = hr_register_volume(new_volume); … … 128 134 void hr_raid1_status_event(hr_volume_t *vol) 129 135 { 130 fibril_mutex_lock(&vol->lock); 131 (void)hr_raid1_update_vol_status(vol); 132 fibril_mutex_unlock(&vol->lock); 136 hr_raid1_update_vol_status(vol); 133 137 } 134 138 … … 137 141 HR_DEBUG("hr_raid1_add_hotspare()\n"); 138 142 139 fibril_mutex_lock(&vol->lock); 143 errno_t rc = EOK; 144 145 fibril_mutex_lock(&vol->hotspare_lock); 140 146 141 147 if (vol->hotspare_no >= HR_MAX_HOTSPARES) { 142 148 HR_ERROR("hr_raid1_add_hotspare(): cannot add more hotspares " 143 149 "to \"%s\"\n", vol->devname); 144 fibril_mutex_unlock(&vol->lock); 145 return ELIMIT; 146 } 147 148 vol->hotspares[vol->hotspare_no].svc_id = hotspare; 149 hr_update_hotspare_status(vol, vol->hotspare_no, HR_EXT_HOTSPARE); 150 rc = ELIMIT; 151 goto error; 152 } 153 154 size_t hs_idx = vol->hotspare_no; 150 155 151 156 vol->hotspare_no++; 157 158 vol->hotspares[hs_idx].svc_id = hotspare; 159 hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE); 152 160 153 161 /* … … 158 166 "spawning new rebuild fibril\n"); 159 167 fid_t fib = fibril_create(hr_raid1_rebuild, vol); 160 if (fib == 0) 161 return ENOMEM; 168 if (fib == 0) { 169 rc = ENOMEM; 170 goto error; 171 } 162 172 fibril_start(fib); 163 173 fibril_detach(fib); 164 174 } 165 175 166 fibril_mutex_unlock(&vol->lock); 167 168 return EOK; 176 error: 177 fibril_mutex_unlock(&vol->hotspare_lock); 178 179 return rc; 169 180 } 170 181 … … 214 225 } 215 226 216 static errno_t hr_raid1_check_vol_status(hr_volume_t *vol) 217 { 218 if (vol->status == HR_VOL_ONLINE || 219 vol->status == HR_VOL_DEGRADED || 220 vol->status == HR_VOL_REBUILD) 221 return EOK; 222 return EIO; 223 } 224 225 /* 226 * Update vol->status and return EOK if volume 227 * is usable 228 */ 229 static errno_t hr_raid1_update_vol_status(hr_volume_t *vol) 230 { 227 static void hr_raid1_update_vol_status(hr_volume_t *vol) 228 { 229 fibril_rwlock_read_lock(&vol->extents_lock); 230 fibril_rwlock_read_lock(&vol->states_lock); 231 231 232 hr_vol_status_t old_state = vol->status; 232 233 size_t healthy = hr_count_extents(vol, HR_EXT_ONLINE); 233 234 235 fibril_rwlock_read_unlock(&vol->states_lock); 236 fibril_rwlock_read_unlock(&vol->extents_lock); 237 234 238 if (healthy == 0) { 235 if (old_state != HR_VOL_FAULTY) 239 if (old_state != HR_VOL_FAULTY) { 240 fibril_rwlock_write_lock(&vol->states_lock); 236 241 hr_update_vol_status(vol, HR_VOL_FAULTY); 237 return EIO; 242 fibril_rwlock_write_unlock(&vol->states_lock); 243 } 238 244 } else if (healthy < vol->extent_no) { 239 if (old_state != HR_VOL_DEGRADED && 240 old_state != HR_VOL_REBUILD) { 241 242 hr_update_vol_status(vol, HR_VOL_DEGRADED); 245 if (old_state != HR_VOL_REBUILD) { 246 if (old_state != HR_VOL_DEGRADED) { 247 fibril_rwlock_write_lock(&vol->states_lock); 248 hr_update_vol_status(vol, HR_VOL_DEGRADED); 249 fibril_rwlock_write_unlock(&vol->states_lock); 250 } 243 251 244 252 if (vol->hotspare_no > 0) { … … 246 254 vol); 247 255 if (fib == 0) 248 return ENOMEM;256 return; 249 257 fibril_start(fib); 250 258 fibril_detach(fib); 251 259 } 252 260 } 253 return EOK;254 261 } else { 255 if (old_state != HR_VOL_ONLINE) 262 if (old_state != HR_VOL_ONLINE) { 263 fibril_rwlock_write_lock(&vol->states_lock); 256 264 hr_update_vol_status(vol, HR_VOL_ONLINE); 257 return EOK; 258 } 259 } 260 261 static void hr_raid1_handle_extent_error(hr_volume_t *vol, size_t extent, 265 fibril_rwlock_write_unlock(&vol->states_lock); 266 } 267 } 268 } 269 270 static void hr_raid1_ext_state_callback(hr_volume_t *vol, size_t extent, 262 271 errno_t rc) 263 272 { 264 if (rc == ENOENT) 273 if (rc == EOK) 274 return; 275 276 assert(fibril_rwlock_is_locked(&vol->extents_lock)); 277 278 fibril_rwlock_write_lock(&vol->states_lock); 279 280 switch (rc) { 281 case ENOENT: 265 282 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 266 else if (rc != EOK) 283 break; 284 default: 267 285 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 286 } 287 288 fibril_rwlock_write_unlock(&vol->states_lock); 289 } 290 291 static size_t hr_raid1_count_good_extents(hr_volume_t *vol, uint64_t ba, 292 size_t cnt, uint64_t rebuild_blk) 293 { 294 assert(fibril_rwlock_is_locked(&vol->extents_lock)); 295 assert(fibril_rwlock_is_locked(&vol->states_lock)); 296 297 size_t count = 0; 298 for (size_t i = 0; i < vol->extent_no; i++) { 299 if (vol->extents[i].status == HR_EXT_ONLINE || 300 (vol->extents[i].status == HR_EXT_REBUILD && 301 ba < rebuild_blk)) { 302 count++; 303 } 304 } 305 306 return count; 307 268 308 } 269 309 … … 272 312 { 273 313 hr_volume_t *vol = bd->srvs->sarg; 314 hr_range_lock_t *rl = NULL; 274 315 errno_t rc; 275 316 size_t i; 317 uint64_t rebuild_blk; 318 319 fibril_rwlock_read_lock(&vol->states_lock); 320 hr_vol_status_t vol_state = vol->status; 321 fibril_rwlock_read_unlock(&vol->states_lock); 322 323 if (vol_state == HR_VOL_FAULTY) 324 return EIO; 276 325 277 326 if (type == HR_BD_READ || type == HR_BD_WRITE) … … 283 332 return rc; 284 333 285 hr_add_ba_offset(vol, &ba); 286 287 fibril_mutex_lock(&vol->lock); 288 289 rc = hr_raid1_check_vol_status(vol); 290 if (rc != EOK) 291 goto end; 334 /* allow full dev sync */ 335 if (type != HR_BD_SYNC || ba != 0) 336 hr_add_ba_offset(vol, &ba); 337 338 /* 339 * this is to allow adding hotspare or start a rebuild on 340 * very busy array, because of how rwlocks are implemented 341 * in HelenOS (no writer priority, so if there are multiple 342 * continuos readers, writer will never own the lock) 343 */ 344 if (vol->halt_please) { 345 fibril_mutex_lock(&vol->halt_lock); 346 fibril_mutex_unlock(&vol->halt_lock); 347 } 348 349 /* 350 * extent order has to be locked for the whole IO duration, 351 * so that workers have consistent targets 352 */ 353 fibril_rwlock_read_lock(&vol->extents_lock); 292 354 293 355 size_t successful = 0; 294 356 switch (type) { 295 case HR_BD_SYNC: 357 case HR_BD_READ: 358 rebuild_blk = atomic_load_explicit(&vol->rebuild_blk, 359 memory_order_relaxed); 360 296 361 for (i = 0; i < vol->extent_no; i++) { 297 if (vol->extents[i].status != HR_EXT_ONLINE) 362 fibril_rwlock_read_lock(&vol->states_lock); 363 hr_ext_status_t state = vol->extents[i].status; 364 fibril_rwlock_read_unlock(&vol->states_lock); 365 366 if (state != HR_EXT_ONLINE && 367 (state != HR_EXT_REBUILD || 368 ba + cnt - 1 >= rebuild_blk)) { 298 369 continue; 299 rc = block_sync_cache(vol->extents[i].svc_id, ba, cnt); 300 if (rc != EOK && rc != ENOTSUP) 301 hr_raid1_handle_extent_error(vol, i, rc); 302 else 303 successful++; 304 } 305 break; 306 case HR_BD_READ: 307 for (i = 0; i < vol->extent_no; i++) { 308 if (vol->extents[i].status != HR_EXT_ONLINE) 309 continue; 370 } 371 310 372 rc = block_read_direct(vol->extents[i].svc_id, ba, cnt, 311 373 data_read); 374 375 if (rc == ENOMEM && i + 1 == vol->extent_no) 376 goto end; 377 378 if (rc == ENOMEM) 379 continue; 380 312 381 if (rc != EOK) { 313 hr_raid1_ handle_extent_error(vol, i, rc);382 hr_raid1_ext_state_callback(vol, i, rc); 314 383 } else { 315 384 successful++; … … 318 387 } 319 388 break; 389 case HR_BD_SYNC: 320 390 case HR_BD_WRITE: 391 if (type == HR_BD_WRITE) { 392 rl = hr_range_lock_acquire(vol, ba, cnt); 393 if (rl == NULL) { 394 rc = ENOMEM; 395 goto end; 396 } 397 } 398 399 fibril_rwlock_read_lock(&vol->states_lock); 400 401 rebuild_blk = atomic_load_explicit(&vol->rebuild_blk, 402 memory_order_relaxed); 403 404 size_t good = hr_raid1_count_good_extents(vol, ba, cnt, 405 rebuild_blk); 406 407 hr_fgroup_t *group = hr_fgroup_create(vol->fge, good); 408 if (group == NULL) { 409 if (type == HR_BD_WRITE) 410 hr_range_lock_release(rl); 411 rc = ENOMEM; 412 fibril_rwlock_read_unlock(&vol->states_lock); 413 goto end; 414 } 415 321 416 for (i = 0; i < vol->extent_no; i++) { 322 if (vol->extents[i].status != HR_EXT_ONLINE ||323 (vol->extents[i].status == HR_EXT_REBUILD &&324 ba >= vol->rebuild_blk))417 if (vol->extents[i].status != HR_EXT_ONLINE && 418 (vol->extents[i].status != HR_EXT_REBUILD || 419 ba >= rebuild_blk)) { 325 420 /* 326 421 * When the extent is being rebuilt, 327 422 * we only write to the part that is already 328 * rebuilt. If ba is more than vol->rebuild_blk,329 * the write is going to be replicated later330 * in the rebuild. TODO: test423 * rebuilt. If IO starts after vol->rebuild_blk 424 * we do not proceed, the write is going to 425 * be replicated later in the rebuild. 331 426 */ 332 427 continue; 333 rc = block_write_direct(vol->extents[i].svc_id, ba, cnt, 334 data_write); 335 if (rc != EOK) 336 hr_raid1_handle_extent_error(vol, i, rc); 337 else 338 successful++; 339 } 428 } 429 430 hr_io_t *io = hr_fgroup_alloc(group); 431 io->extent = i; 432 io->data_write = data_write; 433 io->data_read = data_read; 434 io->ba = ba; 435 io->cnt = cnt; 436 io->type = type; 437 io->vol = vol; 438 io->state_callback = hr_raid1_ext_state_callback; 439 440 hr_fgroup_submit(group, hr_io_worker, io); 441 } 442 443 fibril_rwlock_read_unlock(&vol->states_lock); 444 445 (void)hr_fgroup_wait(group, &successful, NULL); 446 447 if (type == HR_BD_WRITE) 448 hr_range_lock_release(rl); 449 340 450 break; 341 451 default: … … 350 460 351 461 end: 352 (void)hr_raid1_update_vol_status(vol); 353 fibril_mutex_unlock(&vol->lock); 462 fibril_rwlock_read_unlock(&vol->extents_lock); 463 464 hr_raid1_update_vol_status(vol); 465 354 466 return rc; 355 467 } 356 468 357 /* 358 * Put the last HOTSPARE extent in place 359 * of first DEGRADED, and start the rebuild. 360 */ 361 static errno_t hr_raid1_rebuild(void *arg) 362 { 363 HR_DEBUG("hr_raid1_rebuild()\n"); 364 365 hr_volume_t *vol = arg; 366 void *buf = NULL; 469 static errno_t swap_hs(hr_volume_t *vol, size_t bad, size_t hs) 470 { 471 HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n"); 472 473 service_id_t faulty_svc_id = vol->extents[bad].svc_id; 474 service_id_t hs_svc_id = vol->hotspares[hs].svc_id; 475 476 errno_t rc = block_init(hs_svc_id); 477 if (rc != EOK) { 478 HR_ERROR("hr_raid1_rebuild(): initing hotspare (%lu) failed\n", 479 hs_svc_id); 480 return rc; 481 } 482 483 vol->extents[bad].svc_id = hs_svc_id; 484 hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE); 485 486 vol->hotspares[hs].svc_id = 0; 487 hr_update_hotspare_status(vol, hs, HR_EXT_INVALID); 488 489 vol->hotspare_no--; 490 491 if (faulty_svc_id != 0) 492 block_fini(faulty_svc_id); 493 494 return EOK; 495 } 496 497 static errno_t init_rebuild(hr_volume_t *vol, size_t *rebuild_idx) 498 { 367 499 errno_t rc = EOK; 368 500 369 fibril_mutex_lock(&vol->lock); 501 fibril_mutex_lock(&vol->halt_lock); 502 vol->halt_please = true; 503 fibril_rwlock_write_lock(&vol->extents_lock); 504 fibril_rwlock_write_lock(&vol->states_lock); 505 fibril_mutex_lock(&vol->hotspare_lock); 370 506 371 507 if (vol->hotspare_no == 0) { 372 508 HR_WARN("hr_raid1_rebuild(): no free hotspares on \"%s\", " 373 509 "aborting rebuild\n", vol->devname); 374 /* retval isn't checked for now */375 goto e nd;510 rc = EINVAL; 511 goto error; 376 512 } 377 513 378 514 size_t bad = vol->extent_no; 379 515 for (size_t i = 0; i < vol->extent_no; i++) { 380 if (vol->extents[i].status == HR_EXT_FAILED) {516 if (vol->extents[i].status != HR_EXT_ONLINE) { 381 517 bad = i; 382 518 break; … … 387 523 HR_WARN("hr_raid1_rebuild(): no bad extent on \"%s\", " 388 524 "aborting rebuild\n", vol->devname); 389 /* retval isn't checked for now */390 goto e nd;525 rc = EINVAL; 526 goto error; 391 527 } 392 528 … … 398 534 "aborting rebuild\n", hr_get_ext_status_msg(hs_state)); 399 535 rc = EINVAL; 400 goto end; 401 } 402 403 HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n"); 404 405 block_fini(vol->extents[bad].svc_id); 406 407 vol->extents[bad].svc_id = vol->hotspares[hotspare_idx].svc_id; 408 hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE); 409 410 vol->hotspares[hotspare_idx].svc_id = 0; 411 hr_update_hotspare_status(vol, hotspare_idx, HR_EXT_MISSING); 412 413 vol->hotspare_no--; 536 goto error; 537 } 538 539 rc = swap_hs(vol, bad, hotspare_idx); 540 if (rc != EOK) { 541 HR_ERROR("hr_raid1_rebuild(): swapping hotspare failed, " 542 "aborting rebuild\n"); 543 goto error; 544 } 414 545 415 546 hr_extent_t *rebuild_ext = &vol->extents[bad]; 416 547 417 rc = block_init(rebuild_ext->svc_id); 418 if (rc != EOK) { 419 HR_ERROR("hr_raid1_rebuild(): initing (%lu) failed, " 420 "aborting rebuild\n", rebuild_ext->svc_id); 421 goto end; 422 } 423 424 HR_DEBUG("hr_raid1_rebuild(): starting rebuild on (%lu)\n", 425 rebuild_ext->svc_id); 548 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %lu (%lu)" 549 "\n", bad, rebuild_ext->svc_id); 550 551 atomic_store_explicit(&vol->rebuild_blk, 0, memory_order_relaxed); 426 552 427 553 hr_update_ext_status(vol, bad, HR_EXT_REBUILD); 428 554 hr_update_vol_status(vol, HR_VOL_REBUILD); 555 556 *rebuild_idx = bad; 557 error: 558 fibril_mutex_unlock(&vol->hotspare_lock); 559 fibril_rwlock_write_unlock(&vol->states_lock); 560 fibril_rwlock_write_unlock(&vol->extents_lock); 561 vol->halt_please = false; 562 fibril_mutex_unlock(&vol->halt_lock); 563 564 return rc; 565 } 566 567 static errno_t hr_raid1_restore_blocks(hr_volume_t *vol, size_t rebuild_idx, 568 uint64_t ba, size_t cnt, void *buf) 569 { 570 HR_DEBUG("REBUILD restoring blocks (ba: %lu, cnt: %lu)\n", ba, cnt); 571 572 assert(fibril_rwlock_is_locked(&vol->extents_lock)); 573 574 errno_t rc = ENOENT; 575 hr_extent_t *ext, *rebuild_ext = &vol->extents[rebuild_idx]; 576 577 for (size_t i = 0; i < vol->extent_no; i++) { 578 fibril_rwlock_read_lock(&vol->states_lock); 579 580 ext = &vol->extents[i]; 581 if (ext->status != HR_EXT_ONLINE) 582 continue; 583 584 fibril_rwlock_read_unlock(&vol->states_lock); 585 586 rc = block_read_direct(ext->svc_id, ba, cnt, buf); 587 if (rc == EOK) 588 break; 589 590 if (rc != ENOMEM) 591 hr_raid1_ext_state_callback(vol, i, rc); 592 593 if (i + 1 >= vol->extent_no) { 594 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 595 "too many failed extents\n", 596 vol->devname, vol->svc_id); 597 598 return rc; 599 } 600 } 601 602 rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf); 603 if (rc != EOK) { 604 if (rc != ENOMEM) 605 hr_raid1_ext_state_callback(vol, rebuild_idx, rc); 606 607 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 608 "the rebuilt extent no. %lu WRITE (rc: %s)\n", 609 vol->devname, vol->svc_id, rebuild_idx, str_error(rc)); 610 611 return rc; 612 } 613 614 return EOK; 615 } 616 617 /* 618 * Put the last HOTSPARE extent in place 619 * of first that != ONLINE, and start the rebuild. 620 */ 621 static errno_t hr_raid1_rebuild(void *arg) 622 { 623 HR_DEBUG("hr_raid1_rebuild()\n"); 624 625 hr_volume_t *vol = arg; 626 void *buf = NULL; 627 size_t rebuild_idx; 628 errno_t rc; 629 630 rc = init_rebuild(vol, &rebuild_idx); 631 if (rc != EOK) 632 return rc; 429 633 430 634 size_t left = vol->data_blkno; … … 432 636 buf = malloc(max_blks * vol->bsize); 433 637 434 hr_extent_t *ext;435 436 vol->rebuild_blk = 0;437 438 638 size_t cnt; 439 639 uint64_t ba = 0; 440 640 hr_add_ba_offset(vol, &ba); 441 641 642 fibril_rwlock_read_lock(&vol->extents_lock); 643 644 hr_range_lock_t *rl = NULL; 645 442 646 while (left != 0) { 443 vol->rebuild_blk = ba; 647 if (vol->halt_please) { 648 fibril_rwlock_read_unlock(&vol->extents_lock); 649 fibril_mutex_lock(&vol->halt_lock); 650 fibril_mutex_unlock(&vol->halt_lock); 651 fibril_rwlock_read_lock(&vol->extents_lock); 652 } 653 444 654 cnt = min(max_blks, left); 445 for (size_t i = 0; i < vol->extent_no; i++) { 446 ext = &vol->extents[i]; 447 if (ext->status == HR_EXT_ONLINE) { 448 rc = block_read_direct(ext->svc_id, ba, cnt, 449 buf); 450 if (rc != EOK) { 451 hr_raid1_handle_extent_error(vol, i, rc); 452 if (i + 1 < vol->extent_no) { 453 /* still might have one ONLINE */ 454 continue; 455 } else { 456 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 457 "too many failed extents\n", 458 vol->devname, vol->svc_id); 459 goto end; 460 } 461 } 462 break; 463 } 464 } 465 466 rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf); 467 if (rc != EOK) { 468 hr_raid1_handle_extent_error(vol, bad, rc); 469 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 470 "the rebuilt extent number %lu failing\n", 471 vol->devname, vol->svc_id, bad); 655 656 rl = hr_range_lock_acquire(vol, ba, cnt); 657 if (rl == NULL) { 658 rc = ENOMEM; 472 659 goto end; 473 474 } 660 } 661 662 atomic_store_explicit(&vol->rebuild_blk, ba, 663 memory_order_relaxed); 664 665 rc = hr_raid1_restore_blocks(vol, rebuild_idx, ba, cnt, buf); 666 667 hr_range_lock_release(rl); 668 669 if (rc != EOK) 670 goto end; 475 671 476 672 ba += cnt; 477 673 left -= cnt; 478 479 /*480 * Let other IO requests be served481 * during rebuild.482 */483 fibril_mutex_unlock(&vol->lock);484 fibril_mutex_lock(&vol->lock);485 674 } 486 675 487 676 HR_DEBUG("hr_raid1_rebuild(): rebuild finished on \"%s\" (%lu), " 488 "extent number %lu\n", vol->devname, vol->svc_id, hotspare_idx); 489 490 hr_update_ext_status(vol, bad, HR_EXT_ONLINE); 677 "extent no. %lu\n", vol->devname, vol->svc_id, rebuild_idx); 678 679 fibril_rwlock_write_lock(&vol->states_lock); 680 hr_update_ext_status(vol, rebuild_idx, HR_EXT_ONLINE); 681 fibril_rwlock_write_unlock(&vol->states_lock); 682 491 683 /* 492 684 * For now write metadata at the end, because 493 685 * we don't sync metada accross extents yet. 494 686 */ 495 hr_write_meta_to_ext(vol, bad);687 hr_write_meta_to_ext(vol, rebuild_idx); 496 688 end: 497 (void)hr_raid1_update_vol_status(vol); 498 499 fibril_mutex_unlock(&vol->lock); 689 if (rc != EOK) { 690 fibril_rwlock_write_lock(&vol->states_lock); 691 hr_update_vol_status(vol, HR_VOL_DEGRADED); 692 fibril_rwlock_write_unlock(&vol->states_lock); 693 } 694 695 fibril_rwlock_read_unlock(&vol->extents_lock); 696 697 hr_raid1_update_vol_status(vol); 500 698 501 699 if (buf != NULL) 502 700 free(buf); 503 701 504 /* retval isn't checked anywhere for now */505 702 return rc; 506 703 } -
uspace/srv/bd/hr/var.h
ree47537 r58d82fa 49 49 #define HR_STRIP_SIZE DATA_XFER_LIMIT 50 50 51 struct hr_volume; 51 52 typedef struct hr_volume hr_volume_t; 52 53 … … 87 88 char devname[HR_DEVNAME_LEN]; 88 89 89 hr_extent_t extents[HR_MAX_EXTENTS];90 90 size_t hotspare_no; 91 91 hr_extent_t hotspares[HR_MAX_HOTSPARES]; 92 92 93 /* protects ordering (hr_extent_t.svc_id, hotspares) */ 93 /* protects hotspares (hotspares.{svc_id,status}, hotspare_no) */ 94 fibril_mutex_t hotspare_lock; 95 96 hr_extent_t extents[HR_MAX_EXTENTS]; 97 /* protects extents ordering (extents.svc_id) */ 94 98 fibril_rwlock_t extents_lock; 95 96 /* protects states (hr_extent_t.status, hr_vol_status_t.status) */ 99 /* protects states (extents.status, hr_volume_t.status) */ 97 100 fibril_rwlock_t states_lock; 98 101 … … 101 104 fibril_mutex_t halt_lock; 102 105 103 uint64_t rebuild_blk;106 _Atomic uint64_t rebuild_blk; 104 107 uint64_t counter; /* metadata syncing */ 105 108 hr_vol_status_t status;
Note:
See TracChangeset
for help on using the changeset viewer.