Changeset aa7864b in mainline
- Timestamp:
- 2024-11-22T22:17:01Z (7 months ago)
- Children:
- bf0a791
- Parents:
- a56ac5d1
- Location:
- uspace/srv/bd/hr
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/hr.c
ra56ac5d1 raa7864b 208 208 new_volume->hr_ops.init = hr_raid5_init; 209 209 new_volume->hr_ops.status_event = hr_raid5_status_event; 210 new_volume->hr_ops.add_hotspare = hr_raid5_add_hotspare; 210 211 break; 211 212 default: -
uspace/srv/bd/hr/raid5.c
ra56ac5d1 raa7864b 58 58 static ssize_t hr_raid5_get_bad_ext(hr_volume_t *); 59 59 static errno_t hr_raid5_update_vol_status(hr_volume_t *); 60 static void hr_raid5_handle_extent_error(hr_volume_t *, size_t, errno_t); 60 61 static void xor(void *, const void *, size_t); 61 62 static errno_t hr_raid5_read_degraded(hr_volume_t *, uint64_t, uint64_t, … … 67 68 static errno_t hr_raid5_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t, 68 69 void *, const void *, size_t); 70 static errno_t hr_raid5_rebuild(void *); 69 71 70 72 /* bdops */ … … 142 144 } 143 145 146 errno_t hr_raid5_add_hotspare(hr_volume_t *vol, service_id_t hotspare) 147 { 148 HR_DEBUG("hr_raid5_add_hotspare()\n"); 149 150 fibril_mutex_lock(&vol->lock); 151 152 if (vol->hotspare_no >= HR_MAX_HOTSPARES) { 153 HR_ERROR("hr_raid5_add_hotspare(): cannot add more hotspares " 154 "to \"%s\"\n", vol->devname); 155 fibril_mutex_unlock(&vol->lock); 156 return ELIMIT; 157 } 158 159 vol->hotspares[vol->hotspare_no].svc_id = hotspare; 160 vol->hotspares[vol->hotspare_no].status = HR_EXT_HOTSPARE; 161 vol->hotspare_no++; 162 163 /* 164 * If the volume is degraded, start rebuild right away. 165 */ 166 if (vol->status == HR_VOL_DEGRADED) { 167 HR_DEBUG("hr_raid5_add_hotspare(): volume in DEGRADED state, " 168 "spawning new rebuild fibril\n"); 169 fid_t fib = fibril_create(hr_raid5_rebuild, vol); 170 if (fib == 0) 171 return EINVAL; 172 fibril_start(fib); 173 fibril_detach(fib); 174 } 175 176 fibril_mutex_unlock(&vol->lock); 177 178 return EOK; 179 } 180 144 181 static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd) 145 182 { … … 225 262 return EOK; 226 263 case 1: 227 if (old_state != HR_VOL_DEGRADED) { 264 if (old_state != HR_VOL_DEGRADED && 265 old_state != HR_VOL_REBUILD) { 228 266 HR_WARN("RAID 5 array \"%s\" (%lu) has 1 extent " 229 267 "inactive, marking as DEGRADED", 230 268 vol->devname, vol->svc_id); 231 269 vol->status = HR_VOL_DEGRADED; 270 if (vol->hotspare_no > 0) { 271 fid_t fib = fibril_create(hr_raid5_rebuild, 272 vol); 273 if (fib == 0) { 274 return EINVAL; 275 } 276 fibril_start(fib); 277 fibril_detach(fib); 278 } 232 279 } 233 280 return EOK; … … 241 288 return EINVAL; 242 289 } 290 } 291 292 static void hr_raid5_handle_extent_error(hr_volume_t *vol, size_t extent, 293 errno_t rc) 294 { 295 if (rc == ENOENT) 296 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 297 else if (rc != EOK) 298 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 243 299 } 244 300 … … 514 570 goto error; 515 571 516 if (rc == ENOENT) 517 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 518 else if (rc != EOK) 519 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 572 hr_raid5_handle_extent_error(vol, extent, rc); 520 573 521 574 if (rc != EOK) { … … 558 611 } 559 612 613 static errno_t hr_raid5_rebuild(void *arg) 614 { 615 HR_DEBUG("hr_raid5_rebuild()\n"); 616 617 hr_volume_t *vol = arg; 618 errno_t rc = EOK; 619 void *buf = NULL, *xorbuf = NULL; 620 621 fibril_mutex_lock(&vol->lock); 622 623 if (vol->hotspare_no == 0) { 624 HR_WARN("hr_raid5_rebuild(): no free hotspares on \"%s\", " 625 "aborting rebuild\n", vol->devname); 626 /* retval isn't checked for now */ 627 goto end; 628 } 629 630 size_t bad = vol->dev_no; 631 for (size_t i = 0; i < vol->dev_no; i++) { 632 if (vol->extents[i].status == HR_EXT_FAILED) { 633 bad = i; 634 break; 635 } 636 } 637 638 if (bad == vol->dev_no) { 639 HR_WARN("hr_raid5_rebuild(): no bad extent on \"%s\", " 640 "aborting rebuild\n", vol->devname); 641 /* retval isn't checked for now */ 642 goto end; 643 } 644 645 block_fini(vol->extents[bad].svc_id); 646 647 size_t hotspare_idx = vol->hotspare_no - 1; 648 649 vol->extents[bad].svc_id = vol->hotspares[hotspare_idx].svc_id; 650 hr_update_ext_status(vol, bad, HR_EXT_REBUILD); 651 652 vol->hotspares[hotspare_idx].svc_id = 0; 653 vol->hotspares[hotspare_idx].status = HR_EXT_MISSING; 654 vol->hotspare_no--; 655 656 HR_WARN("hr_raid5_rebuild(): changing volume \"%s\" (%lu) state " 657 "from %s to %s\n", vol->devname, vol->svc_id, 658 hr_get_vol_status_msg(vol->status), 659 hr_get_vol_status_msg(HR_VOL_REBUILD)); 660 vol->status = HR_VOL_REBUILD; 661 662 hr_extent_t *hotspare = &vol->extents[bad]; 663 664 HR_DEBUG("hr_raid5_rebuild(): initing (%lu)\n", hotspare->svc_id); 665 666 rc = block_init(hotspare->svc_id); 667 if (rc != EOK) { 668 HR_ERROR("hr_raid5_rebuild(): initing (%lu) failed, " 669 "aborting rebuild\n", hotspare->svc_id); 670 goto end; 671 } 672 673 uint64_t max_blks = DATA_XFER_LIMIT / vol->bsize; 674 uint64_t left = vol->data_blkno / (vol->dev_no - 1); 675 buf = malloc(max_blks * vol->bsize); 676 xorbuf = malloc(max_blks * vol->bsize); 677 678 uint64_t ba = 0, cnt; 679 hr_add_ba_offset(vol, &ba); 680 while (left != 0) { 681 cnt = min(left, max_blks); 682 683 /* 684 * Almost the same as read_degraded, 685 * but we don't want to allocate new 686 * xorbuf each blk rebuild batch. 687 */ 688 bool first = true; 689 for (size_t i = 0; i < vol->dev_no; i++) { 690 if (i == bad) 691 continue; 692 rc = block_read_direct(vol->extents[i].svc_id, ba, cnt, 693 buf); 694 if (rc != EOK) { 695 hr_raid5_handle_extent_error(vol, i, rc); 696 HR_ERROR("rebuild on \"%s\" (%lu), failed due " 697 "to a failed ONLINE extent, number %lu\n", 698 vol->devname, vol->svc_id, i); 699 goto end; 700 } 701 702 if (first) 703 memcpy(xorbuf, buf, cnt * vol->bsize); 704 else 705 xor(xorbuf, buf, cnt * vol->bsize); 706 707 first = false; 708 } 709 710 rc = block_write_direct(hotspare->svc_id, ba, cnt, xorbuf); 711 if (rc != EOK) { 712 hr_raid5_handle_extent_error(vol, bad, rc); 713 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 714 "the rebuilt extent number %lu failing\n", 715 vol->devname, vol->svc_id, bad); 716 goto end; 717 } 718 719 ba += cnt; 720 left -= cnt; 721 } 722 723 HR_DEBUG("hr_raid5_rebuild(): rebuild finished on \"%s\" (%lu), " 724 "extent number %lu\n", vol->devname, vol->svc_id, hotspare_idx); 725 726 hr_update_ext_status(vol, bad, HR_EXT_ONLINE); 727 /* 728 * For now write metadata at the end, because 729 * we don't sync metada accross extents yet. 730 */ 731 hr_write_meta_to_ext(vol, bad); 732 end: 733 (void) hr_raid5_update_vol_status(vol); 734 735 fibril_mutex_unlock(&vol->lock); 736 737 if (buf != NULL) 738 free(buf); 739 740 if (xorbuf != NULL) 741 free(xorbuf); 742 743 return rc; 744 } 745 560 746 /** @} 561 747 */ -
uspace/srv/bd/hr/var.h
ra56ac5d1 raa7864b 107 107 extern errno_t hr_raid1_add_hotspare(hr_volume_t *, service_id_t); 108 108 extern errno_t hr_raid4_add_hotspare(hr_volume_t *, service_id_t); 109 extern errno_t hr_raid5_add_hotspare(hr_volume_t *, service_id_t); 109 110 110 111 #endif
Note:
See TracChangeset
for help on using the changeset viewer.