Changeset 35f2a877 in mainline
- Timestamp:
- 2025-01-12T21:53:55Z (5 months ago)
- Children:
- 23df41b
- Parents:
- d773bea9
- git-author:
- Miroslav Cimerman <mc@…> (2025-01-12 20:34:12)
- git-committer:
- Miroslav Cimerman <mc@…> (2025-01-12 21:53:55)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/raid1.c
rd773bea9 r35f2a877 56 56 extern loc_srv_t *hr_srv; 57 57 58 static void process_deferred_invalidations(hr_volume_t *);59 58 static void hr_raid1_update_vol_status(hr_volume_t *); 60 59 static void hr_raid1_ext_state_callback(hr_volume_t *, size_t, errno_t); … … 63 62 static errno_t hr_raid1_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t, 64 63 void *, const void *, size_t); 64 static errno_t hr_raid1_rebuild(void *); 65 static errno_t init_rebuild(hr_volume_t *, size_t *); 65 66 static errno_t swap_hs(hr_volume_t *, size_t, size_t); 66 static errno_t init_rebuild(hr_volume_t *, size_t *); 67 static errno_t hr_raid1_rebuild(void *); 67 static errno_t hr_raid1_restore_blocks(hr_volume_t *, size_t, uint64_t, size_t, 68 void *); 69 static void hr_process_deferred_invalidations(hr_volume_t *); 68 70 69 71 /* bdops */ … … 216 218 } 217 219 218 static void process_deferred_invalidations(hr_volume_t *vol)219 {220 HR_DEBUG("hr_raid1_update_vol_status(): deferred invalidations\n");221 222 fibril_mutex_lock(&vol->halt_lock);223 vol->halt_please = true;224 fibril_rwlock_write_lock(&vol->extents_lock);225 fibril_rwlock_write_lock(&vol->states_lock);226 fibril_mutex_lock(&vol->hotspare_lock);227 228 list_foreach(vol->deferred_invalidations_list, link,229 hr_deferred_invalidation_t, di) {230 assert(vol->extents[di->index].status == HR_EXT_INVALID);231 232 HR_DEBUG("moving invalidated extent no. %lu to hotspares\n",233 di->index);234 235 block_fini(di->svc_id);236 237 size_t hs_idx = vol->hotspare_no;238 239 vol->hotspare_no++;240 241 hr_update_hotspare_svc_id(vol, hs_idx, di->svc_id);242 hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE);243 244 hr_update_ext_svc_id(vol, di->index, 0);245 hr_update_ext_status(vol, di->index, HR_EXT_MISSING);246 247 assert(vol->hotspare_no < HR_MAX_HOTSPARES + HR_MAX_EXTENTS);248 }249 250 for (size_t i = 0; i < HR_MAX_EXTENTS; i++) {251 hr_deferred_invalidation_t *di = &vol->deferred_inval[i];252 if (di->svc_id != 0) {253 list_remove(&di->link);254 di->svc_id = 0;255 }256 }257 258 fibril_mutex_unlock(&vol->hotspare_lock);259 fibril_rwlock_write_unlock(&vol->states_lock);260 fibril_rwlock_write_unlock(&vol->extents_lock);261 vol->halt_please = false;262 fibril_mutex_unlock(&vol->halt_lock);263 }264 265 220 static void hr_raid1_update_vol_status(hr_volume_t *vol) 266 221 { … … 273 228 false)) { 274 229 fibril_mutex_lock(&vol->deferred_list_lock); 275 process_deferred_invalidations(vol);230 hr_process_deferred_invalidations(vol); 276 231 fibril_mutex_unlock(&vol->deferred_list_lock); 277 232 } … … 548 503 549 504 return rc; 550 }551 552 static errno_t swap_hs(hr_volume_t *vol, size_t bad, size_t hs)553 {554 HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n");555 556 service_id_t faulty_svc_id = vol->extents[bad].svc_id;557 service_id_t hs_svc_id = vol->hotspares[hs].svc_id;558 559 errno_t rc = block_init(hs_svc_id);560 if (rc != EOK) {561 HR_ERROR("hr_raid1_rebuild(): initing hotspare (%lu) failed\n",562 hs_svc_id);563 return rc;564 }565 566 hr_update_ext_svc_id(vol, bad, hs_svc_id);567 hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE);568 569 hr_update_hotspare_svc_id(vol, hs, 0);570 hr_update_hotspare_status(vol, hs, HR_EXT_INVALID);571 572 vol->hotspare_no--;573 574 if (faulty_svc_id != 0)575 block_fini(faulty_svc_id);576 577 return EOK;578 }579 580 static errno_t init_rebuild(hr_volume_t *vol, size_t *rebuild_idx)581 {582 errno_t rc = EOK;583 584 fibril_mutex_lock(&vol->halt_lock);585 vol->halt_please = true;586 fibril_rwlock_write_lock(&vol->extents_lock);587 fibril_rwlock_write_lock(&vol->states_lock);588 fibril_mutex_lock(&vol->hotspare_lock);589 590 if (vol->hotspare_no == 0) {591 HR_WARN("hr_raid1_rebuild(): no free hotspares on \"%s\", "592 "aborting rebuild\n", vol->devname);593 rc = EINVAL;594 goto error;595 }596 597 size_t bad = vol->extent_no;598 for (size_t i = 0; i < vol->extent_no; i++) {599 if (vol->extents[i].status != HR_EXT_ONLINE) {600 bad = i;601 break;602 }603 }604 605 if (bad == vol->extent_no) {606 HR_WARN("hr_raid1_rebuild(): no bad extent on \"%s\", "607 "aborting rebuild\n", vol->devname);608 rc = EINVAL;609 goto error;610 }611 612 size_t hotspare_idx = vol->hotspare_no - 1;613 614 hr_ext_status_t hs_state = vol->hotspares[hotspare_idx].status;615 if (hs_state != HR_EXT_HOTSPARE) {616 HR_ERROR("hr_raid1_rebuild(): invalid hotspare state \"%s\", "617 "aborting rebuild\n", hr_get_ext_status_msg(hs_state));618 rc = EINVAL;619 goto error;620 }621 622 rc = swap_hs(vol, bad, hotspare_idx);623 if (rc != EOK) {624 HR_ERROR("hr_raid1_rebuild(): swapping hotspare failed, "625 "aborting rebuild\n");626 goto error;627 }628 629 hr_extent_t *rebuild_ext = &vol->extents[bad];630 631 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %lu (%lu)"632 "\n", bad, rebuild_ext->svc_id);633 634 atomic_store_explicit(&vol->rebuild_blk, 0, memory_order_relaxed);635 636 hr_update_ext_status(vol, bad, HR_EXT_REBUILD);637 hr_update_vol_status(vol, HR_VOL_REBUILD);638 639 *rebuild_idx = bad;640 error:641 fibril_mutex_unlock(&vol->hotspare_lock);642 fibril_rwlock_write_unlock(&vol->states_lock);643 fibril_rwlock_write_unlock(&vol->extents_lock);644 vol->halt_please = false;645 fibril_mutex_unlock(&vol->halt_lock);646 647 return rc;648 }649 650 static errno_t hr_raid1_restore_blocks(hr_volume_t *vol, size_t rebuild_idx,651 uint64_t ba, size_t cnt, void *buf)652 {653 HR_DEBUG("REBUILD restoring blocks (ba: %lu, cnt: %lu)\n", ba, cnt);654 655 assert(fibril_rwlock_is_locked(&vol->extents_lock));656 657 errno_t rc = ENOENT;658 hr_extent_t *ext, *rebuild_ext = &vol->extents[rebuild_idx];659 660 for (size_t i = 0; i < vol->extent_no; i++) {661 fibril_rwlock_read_lock(&vol->states_lock);662 663 ext = &vol->extents[i];664 if (ext->status != HR_EXT_ONLINE)665 continue;666 667 fibril_rwlock_read_unlock(&vol->states_lock);668 669 rc = block_read_direct(ext->svc_id, ba, cnt, buf);670 if (rc == EOK)671 break;672 673 if (rc != ENOMEM)674 hr_raid1_ext_state_callback(vol, i, rc);675 676 if (i + 1 >= vol->extent_no) {677 if (rc != ENOMEM) {678 HR_ERROR("rebuild on \"%s\" (%lu), failed due "679 "to too many failed extents\n",680 vol->devname, vol->svc_id);681 }682 683 /* for now we have to invalidate the rebuild extent */684 if (rc == ENOMEM) {685 HR_ERROR("rebuild on \"%s\" (%lu), failed due "686 "to too many failed reads, because of not "687 "enough memory\n",688 vol->devname, vol->svc_id);689 hr_raid1_ext_state_callback(vol, rebuild_idx,690 ENOMEM);691 }692 693 return rc;694 }695 }696 697 rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf);698 if (rc != EOK) {699 /*700 * Here we dont handle ENOMEM, because maybe in the701 * future, there is going to be M_WAITOK, or we are702 * going to wait for more memory, so that we don't703 * have to invalidate it...704 *705 * XXX: for now we do706 */707 hr_raid1_ext_state_callback(vol, rebuild_idx, rc);708 709 HR_ERROR("rebuild on \"%s\" (%lu), failed due to "710 "the rebuilt extent no. %lu WRITE (rc: %s)\n",711 vol->devname, vol->svc_id, rebuild_idx, str_error(rc));712 713 return rc;714 }715 716 return EOK;717 505 } 718 506 … … 823 611 } 824 612 613 static errno_t init_rebuild(hr_volume_t *vol, size_t *rebuild_idx) 614 { 615 errno_t rc = EOK; 616 617 fibril_mutex_lock(&vol->halt_lock); 618 vol->halt_please = true; 619 fibril_rwlock_write_lock(&vol->extents_lock); 620 fibril_rwlock_write_lock(&vol->states_lock); 621 fibril_mutex_lock(&vol->hotspare_lock); 622 623 if (vol->hotspare_no == 0) { 624 HR_WARN("hr_raid1_rebuild(): no free hotspares on \"%s\", " 625 "aborting rebuild\n", vol->devname); 626 rc = EINVAL; 627 goto error; 628 } 629 630 size_t bad = vol->extent_no; 631 for (size_t i = 0; i < vol->extent_no; i++) { 632 if (vol->extents[i].status != HR_EXT_ONLINE) { 633 bad = i; 634 break; 635 } 636 } 637 638 if (bad == vol->extent_no) { 639 HR_WARN("hr_raid1_rebuild(): no bad extent on \"%s\", " 640 "aborting rebuild\n", vol->devname); 641 rc = EINVAL; 642 goto error; 643 } 644 645 size_t hotspare_idx = vol->hotspare_no - 1; 646 647 hr_ext_status_t hs_state = vol->hotspares[hotspare_idx].status; 648 if (hs_state != HR_EXT_HOTSPARE) { 649 HR_ERROR("hr_raid1_rebuild(): invalid hotspare state \"%s\", " 650 "aborting rebuild\n", hr_get_ext_status_msg(hs_state)); 651 rc = EINVAL; 652 goto error; 653 } 654 655 rc = swap_hs(vol, bad, hotspare_idx); 656 if (rc != EOK) { 657 HR_ERROR("hr_raid1_rebuild(): swapping hotspare failed, " 658 "aborting rebuild\n"); 659 goto error; 660 } 661 662 hr_extent_t *rebuild_ext = &vol->extents[bad]; 663 664 HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %lu (%lu)" 665 "\n", bad, rebuild_ext->svc_id); 666 667 atomic_store_explicit(&vol->rebuild_blk, 0, memory_order_relaxed); 668 669 hr_update_ext_status(vol, bad, HR_EXT_REBUILD); 670 hr_update_vol_status(vol, HR_VOL_REBUILD); 671 672 *rebuild_idx = bad; 673 error: 674 fibril_mutex_unlock(&vol->hotspare_lock); 675 fibril_rwlock_write_unlock(&vol->states_lock); 676 fibril_rwlock_write_unlock(&vol->extents_lock); 677 vol->halt_please = false; 678 fibril_mutex_unlock(&vol->halt_lock); 679 680 return rc; 681 } 682 683 static errno_t swap_hs(hr_volume_t *vol, size_t bad, size_t hs) 684 { 685 HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n"); 686 687 service_id_t faulty_svc_id = vol->extents[bad].svc_id; 688 service_id_t hs_svc_id = vol->hotspares[hs].svc_id; 689 690 /* TODO: if rc != EOK, try next hotspare */ 691 errno_t rc = block_init(hs_svc_id); 692 if (rc != EOK) { 693 HR_ERROR("hr_raid1_rebuild(): initing hotspare (%lu) failed\n", 694 hs_svc_id); 695 return rc; 696 } 697 698 hr_update_ext_svc_id(vol, bad, hs_svc_id); 699 hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE); 700 701 hr_update_hotspare_svc_id(vol, hs, 0); 702 hr_update_hotspare_status(vol, hs, HR_EXT_INVALID); 703 704 vol->hotspare_no--; 705 706 if (faulty_svc_id != 0) 707 block_fini(faulty_svc_id); 708 709 return EOK; 710 } 711 712 static errno_t hr_raid1_restore_blocks(hr_volume_t *vol, size_t rebuild_idx, 713 uint64_t ba, size_t cnt, void *buf) 714 { 715 HR_DEBUG("REBUILD restoring blocks (ba: %lu, cnt: %lu)\n", ba, cnt); 716 717 assert(fibril_rwlock_is_locked(&vol->extents_lock)); 718 719 errno_t rc = ENOENT; 720 hr_extent_t *ext, *rebuild_ext = &vol->extents[rebuild_idx]; 721 722 for (size_t i = 0; i < vol->extent_no; i++) { 723 fibril_rwlock_read_lock(&vol->states_lock); 724 725 ext = &vol->extents[i]; 726 if (ext->status != HR_EXT_ONLINE) 727 continue; 728 729 fibril_rwlock_read_unlock(&vol->states_lock); 730 731 rc = block_read_direct(ext->svc_id, ba, cnt, buf); 732 if (rc == EOK) 733 break; 734 735 if (rc != ENOMEM) 736 hr_raid1_ext_state_callback(vol, i, rc); 737 738 if (i + 1 >= vol->extent_no) { 739 if (rc != ENOMEM) { 740 HR_ERROR("rebuild on \"%s\" (%lu), failed due " 741 "to too many failed extents\n", 742 vol->devname, vol->svc_id); 743 } 744 745 /* for now we have to invalidate the rebuild extent */ 746 if (rc == ENOMEM) { 747 HR_ERROR("rebuild on \"%s\" (%lu), failed due " 748 "to too many failed reads, because of not " 749 "enough memory\n", 750 vol->devname, vol->svc_id); 751 hr_raid1_ext_state_callback(vol, rebuild_idx, 752 ENOMEM); 753 } 754 755 return rc; 756 } 757 } 758 759 rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf); 760 if (rc != EOK) { 761 /* 762 * Here we dont handle ENOMEM, because maybe in the 763 * future, there is going to be M_WAITOK, or we are 764 * going to wait for more memory, so that we don't 765 * have to invalidate it... 766 * 767 * XXX: for now we do 768 */ 769 hr_raid1_ext_state_callback(vol, rebuild_idx, rc); 770 771 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 772 "the rebuilt extent no. %lu WRITE (rc: %s)\n", 773 vol->devname, vol->svc_id, rebuild_idx, str_error(rc)); 774 775 return rc; 776 } 777 778 return EOK; 779 } 780 781 static void hr_process_deferred_invalidations(hr_volume_t *vol) 782 { 783 HR_DEBUG("hr_raid1_update_vol_status(): deferred invalidations\n"); 784 785 fibril_mutex_lock(&vol->halt_lock); 786 vol->halt_please = true; 787 fibril_rwlock_write_lock(&vol->extents_lock); 788 fibril_rwlock_write_lock(&vol->states_lock); 789 fibril_mutex_lock(&vol->hotspare_lock); 790 791 list_foreach(vol->deferred_invalidations_list, link, 792 hr_deferred_invalidation_t, di) { 793 assert(vol->extents[di->index].status == HR_EXT_INVALID); 794 795 HR_DEBUG("moving invalidated extent no. %lu to hotspares\n", 796 di->index); 797 798 block_fini(di->svc_id); 799 800 size_t hs_idx = vol->hotspare_no; 801 802 vol->hotspare_no++; 803 804 hr_update_hotspare_svc_id(vol, hs_idx, di->svc_id); 805 hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE); 806 807 hr_update_ext_svc_id(vol, di->index, 0); 808 hr_update_ext_status(vol, di->index, HR_EXT_MISSING); 809 810 assert(vol->hotspare_no < HR_MAX_HOTSPARES + HR_MAX_EXTENTS); 811 } 812 813 for (size_t i = 0; i < HR_MAX_EXTENTS; i++) { 814 hr_deferred_invalidation_t *di = &vol->deferred_inval[i]; 815 if (di->svc_id != 0) { 816 list_remove(&di->link); 817 di->svc_id = 0; 818 } 819 } 820 821 fibril_mutex_unlock(&vol->hotspare_lock); 822 fibril_rwlock_write_unlock(&vol->states_lock); 823 fibril_rwlock_write_unlock(&vol->extents_lock); 824 vol->halt_please = false; 825 fibril_mutex_unlock(&vol->halt_lock); 826 } 827 825 828 /** @} 826 829 */
Note:
See TracChangeset
for help on using the changeset viewer.