Changeset 30140c1b in mainline
- Timestamp:
- 2024-11-22T21:27:35Z (7 months ago)
- Children:
- a56ac5d1
- Parents:
- f81960c5
- Location:
- uspace/srv/bd/hr
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/hr.c
rf81960c5 r30140c1b 202 202 new_volume->hr_ops.init = hr_raid4_init; 203 203 new_volume->hr_ops.status_event = hr_raid4_status_event; 204 new_volume->hr_ops.add_hotspare = hr_raid4_add_hotspare; 204 205 break; 205 206 case HR_LVL_5: -
uspace/srv/bd/hr/raid4.c
rf81960c5 r30140c1b 58 58 static ssize_t hr_raid4_get_bad_ext(hr_volume_t *); 59 59 static errno_t hr_raid4_update_vol_status(hr_volume_t *); 60 static void hr_raid4_handle_extent_error(hr_volume_t *, size_t, errno_t); 60 61 static void xor(void *, const void *, size_t); 61 62 static errno_t hr_raid4_read_degraded(hr_volume_t *, uint64_t, uint64_t, … … 67 68 static errno_t hr_raid4_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t, 68 69 void *, const void *, size_t); 70 static errno_t hr_raid4_rebuild(void *); 69 71 70 72 /* bdops */ … … 145 147 } 146 148 149 errno_t hr_raid4_add_hotspare(hr_volume_t *vol, service_id_t hotspare) 150 { 151 HR_DEBUG("hr_raid4_add_hotspare()\n"); 152 153 fibril_mutex_lock(&vol->lock); 154 155 if (vol->hotspare_no >= HR_MAX_HOTSPARES) { 156 HR_ERROR("hr_raid4_add_hotspare(): cannot add more hotspares " 157 "to \"%s\"\n", vol->devname); 158 fibril_mutex_unlock(&vol->lock); 159 return ELIMIT; 160 } 161 162 vol->hotspares[vol->hotspare_no].svc_id = hotspare; 163 vol->hotspares[vol->hotspare_no].status = HR_EXT_HOTSPARE; 164 vol->hotspare_no++; 165 166 /* 167 * If the volume is degraded, start rebuild right away. 168 */ 169 if (vol->status == HR_VOL_DEGRADED) { 170 HR_DEBUG("hr_raid4_add_hotspare(): volume in DEGRADED state, " 171 "spawning new rebuild fibril\n"); 172 fid_t fib = fibril_create(hr_raid4_rebuild, vol); 173 if (fib == 0) 174 return EINVAL; 175 fibril_start(fib); 176 fibril_detach(fib); 177 } 178 179 fibril_mutex_unlock(&vol->lock); 180 181 return EOK; 182 } 183 147 184 static errno_t hr_raid4_bd_open(bd_srvs_t *bds, bd_srv_t *bd) 148 185 { … … 228 265 return EOK; 229 266 case 1: 230 if (old_state != HR_VOL_DEGRADED) { 267 if (old_state != HR_VOL_DEGRADED && 268 old_state != HR_VOL_REBUILD) { 231 269 HR_WARN("RAID 4 array \"%s\" (%lu) has 1 extent " 232 270 "inactive, marking as DEGRADED", 233 271 vol->devname, vol->svc_id); 234 272 vol->status = HR_VOL_DEGRADED; 273 if (vol->hotspare_no > 0) { 274 fid_t fib = fibril_create(hr_raid4_rebuild, 275 vol); 276 if (fib == 0) { 277 return EINVAL; 278 } 279 fibril_start(fib); 280 fibril_detach(fib); 281 } 235 282 } 236 283 return EOK; … … 238 285 if (old_state != HR_VOL_FAULTY) { 239 286 HR_WARN("RAID 4 array \"%s\" (%lu) has more " 240 "than one 1 extent inactive, marking as FAULTY",287 "than one 1 extent unusable, marking as FAULTY", 241 288 vol->devname, vol->svc_id); 242 289 vol->status = HR_VOL_FAULTY; … … 244 291 return EINVAL; 245 292 } 293 } 294 295 static void hr_raid4_handle_extent_error(hr_volume_t *vol, size_t extent, 296 errno_t rc) 297 { 298 if (rc == ENOENT) 299 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 300 else if (rc != EOK) 301 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 246 302 } 247 303 … … 512 568 goto error; 513 569 514 if (rc == ENOENT) 515 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 516 else if (rc != EOK) 517 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 570 hr_raid4_handle_extent_error(vol, extent, rc); 518 571 519 572 if (rc != EOK) { … … 552 605 } 553 606 607 static errno_t hr_raid4_rebuild(void *arg) 608 { 609 HR_DEBUG("hr_raid4_rebuild()\n"); 610 611 hr_volume_t *vol = arg; 612 errno_t rc = EOK; 613 void *buf = NULL, *xorbuf = NULL; 614 615 fibril_mutex_lock(&vol->lock); 616 617 if (vol->hotspare_no == 0) { 618 HR_WARN("hr_raid4_rebuild(): no free hotspares on \"%s\", " 619 "aborting rebuild\n", vol->devname); 620 /* retval isn't checked for now */ 621 goto end; 622 } 623 624 size_t bad = vol->dev_no; 625 for (size_t i = 0; i < vol->dev_no; i++) { 626 if (vol->extents[i].status == HR_EXT_FAILED) { 627 bad = i; 628 break; 629 } 630 } 631 632 if (bad == vol->dev_no) { 633 HR_WARN("hr_raid4_rebuild(): no bad extent on \"%s\", " 634 "aborting rebuild\n", vol->devname); 635 /* retval isn't checked for now */ 636 goto end; 637 } 638 639 block_fini(vol->extents[bad].svc_id); 640 641 size_t hotspare_idx = vol->hotspare_no - 1; 642 643 vol->extents[bad].svc_id = vol->hotspares[hotspare_idx].svc_id; 644 hr_update_ext_status(vol, bad, HR_EXT_REBUILD); 645 646 vol->hotspares[hotspare_idx].svc_id = 0; 647 vol->hotspares[hotspare_idx].status = HR_EXT_MISSING; 648 vol->hotspare_no--; 649 650 HR_WARN("hr_raid4_rebuild(): changing volume \"%s\" (%lu) state " 651 "from %s to %s\n", vol->devname, vol->svc_id, 652 hr_get_vol_status_msg(vol->status), 653 hr_get_vol_status_msg(HR_VOL_REBUILD)); 654 vol->status = HR_VOL_REBUILD; 655 656 hr_extent_t *hotspare = &vol->extents[bad]; 657 658 HR_DEBUG("hr_raid4_rebuild(): initing (%lu)\n", hotspare->svc_id); 659 660 rc = block_init(hotspare->svc_id); 661 if (rc != EOK) { 662 HR_ERROR("hr_raid4_rebuild(): initing (%lu) failed, " 663 "aborting rebuild\n", hotspare->svc_id); 664 goto end; 665 } 666 667 uint64_t max_blks = DATA_XFER_LIMIT / vol->bsize; 668 uint64_t left = vol->data_blkno / (vol->dev_no - 1); 669 buf = malloc(max_blks * vol->bsize); 670 xorbuf = malloc(max_blks * vol->bsize); 671 672 uint64_t ba = 0, cnt; 673 hr_add_ba_offset(vol, &ba); 674 while (left != 0) { 675 cnt = min(left, max_blks); 676 677 /* 678 * Almost the same as read_degraded, 679 * but we don't want to allocate new 680 * xorbuf each blk rebuild batch. 681 */ 682 bool first = true; 683 for (size_t i = 0; i < vol->dev_no; i++) { 684 if (i == bad) 685 continue; 686 rc = block_read_direct(vol->extents[i].svc_id, ba, cnt, 687 buf); 688 if (rc != EOK) { 689 hr_raid4_handle_extent_error(vol, i, rc); 690 HR_ERROR("rebuild on \"%s\" (%lu), failed due " 691 "to a failed ONLINE extent, number %lu\n", 692 vol->devname, vol->svc_id, i); 693 goto end; 694 } 695 696 if (first) 697 memcpy(xorbuf, buf, cnt * vol->bsize); 698 else 699 xor(xorbuf, buf, cnt * vol->bsize); 700 701 first = false; 702 } 703 704 rc = block_write_direct(hotspare->svc_id, ba, cnt, xorbuf); 705 if (rc != EOK) { 706 hr_raid4_handle_extent_error(vol, bad, rc); 707 HR_ERROR("rebuild on \"%s\" (%lu), failed due to " 708 "the rebuilt extent number %lu failing\n", 709 vol->devname, vol->svc_id, bad); 710 goto end; 711 } 712 713 ba += cnt; 714 left -= cnt; 715 } 716 717 HR_DEBUG("hr_raid4_rebuild(): rebuild finished on \"%s\" (%lu), " 718 "extent number %lu\n", vol->devname, vol->svc_id, hotspare_idx); 719 720 hr_update_ext_status(vol, bad, HR_EXT_ONLINE); 721 /* 722 * For now write metadata at the end, because 723 * we don't sync metada accross extents yet. 724 */ 725 hr_write_meta_to_ext(vol, bad); 726 end: 727 (void) hr_raid4_update_vol_status(vol); 728 729 fibril_mutex_unlock(&vol->lock); 730 731 if (buf != NULL) 732 free(buf); 733 734 if (xorbuf != NULL) 735 free(xorbuf); 736 737 return rc; 738 } 739 554 740 /** @} 555 741 */ -
uspace/srv/bd/hr/var.h
rf81960c5 r30140c1b 106 106 107 107 extern errno_t hr_raid1_add_hotspare(hr_volume_t *, service_id_t); 108 extern errno_t hr_raid4_add_hotspare(hr_volume_t *, service_id_t); 108 109 109 110 #endif
Note:
See TracChangeset
for help on using the changeset viewer.