Changeset da0570a in mainline
- Timestamp:
- 2024-11-13T12:56:47Z (7 months ago)
- Children:
- fde02a6
- Parents:
- d092d2c
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/raid5.c
rd092d2c rda0570a 65 65 static errno_t hr_raid5_bd_get_num_blocks(bd_srv_t *, aoff64_t *); 66 66 67 static errno_t hr_raid5_write_parity(hr_volume_t *, uint64_t, uint64_t, 68 uint64_t, const void *, size_t); 69 67 70 static bd_ops_t hr_raid5_bd_ops = { 68 71 .open = hr_raid5_bd_open, … … 75 78 }; 76 79 80 static errno_t hr_raid5_vol_usable(hr_volume_t *vol) 81 { 82 if (vol->status == HR_VOL_ONLINE || 83 vol->status == HR_VOL_DEGRADED) 84 return EOK; 85 return EINVAL; 86 } 87 88 /* 89 * Returns (-1) if all extents are online, 90 * else returns index of first bad one. 91 */ 92 static ssize_t hr_raid5_get_bad_ext(hr_volume_t *vol) 93 { 94 for (size_t i = 0; i < vol->dev_no; i++) 95 if (vol->extents[i].status != HR_EXT_ONLINE) 96 return i; 97 return -1; 98 } 99 100 static errno_t hr_raid5_update_vol_status(hr_volume_t *vol) 101 { 102 hr_vol_status_t old_state = vol->status; 103 size_t bad = 0; 104 for (size_t i = 0; i < vol->dev_no; i++) 105 if (vol->extents[i].status != HR_EXT_ONLINE) 106 bad++; 107 108 switch (bad) { 109 case 0: 110 if (old_state != HR_VOL_ONLINE) { 111 log_msg(LOG_DEFAULT, LVL_ERROR, 112 "RAID 5 has all extents online, " 113 "marking \"%s\" (%lu) as ONLINE", 114 vol->devname, vol->svc_id); 115 vol->status = HR_VOL_ONLINE; 116 } 117 return EOK; 118 case 1: 119 if (old_state != HR_VOL_DEGRADED) { 120 log_msg(LOG_DEFAULT, LVL_ERROR, 121 "RAID 5 array \"%s\" (%lu) has 1 extent inactive, " 122 "marking as DEGRADED", 123 vol->devname, vol->svc_id); 124 vol->status = HR_VOL_DEGRADED; 125 } 126 return EOK; 127 default: 128 if (old_state != HR_VOL_FAULTY) { 129 log_msg(LOG_DEFAULT, LVL_ERROR, 130 "RAID 5 array \"%s\" (%lu) has more than one 1 " 131 "extent inactive, marking as FAULTY", 132 vol->devname, vol->svc_id); 133 vol->status = HR_VOL_FAULTY; 134 } 135 return EINVAL; 136 } 137 } 138 77 139 static void xor(void *dst, const void *src, size_t size) 78 140 { … … 85 147 } 86 148 87 static errno_t write_parity(hr_volume_t *vol, uint64_t p_extent,88 uint64_t extent, uint64_t block, constvoid *data, size_t cnt)149 static errno_t hr_raid5_read_degraded(hr_volume_t *vol, uint64_t bad, 150 uint64_t block, void *data, size_t cnt) 89 151 { 90 152 errno_t rc; 91 size_t i , j;153 size_t i; 92 154 void *xorbuf; 93 155 void *buf; 94 95 xorbuf = malloc(vol->bsize); 156 uint64_t len = vol->bsize * cnt; 157 158 xorbuf = malloc(len); 96 159 if (xorbuf == NULL) 97 160 return ENOMEM; 98 161 99 buf = malloc( vol->bsize);162 buf = malloc(len); 100 163 if (buf == NULL) { 101 164 free(xorbuf); … … 103 166 } 104 167 105 for (j = 0; j < cnt; j++) { 106 memset(xorbuf, 0, vol->bsize); 107 for (i = 0; i < vol->dev_no; i++) { 108 if (i == p_extent) 109 continue; 110 111 if (i == extent) { 112 xor(xorbuf, data, vol->bsize); 113 } else { 114 rc = block_read_direct(vol->extents[i].svc_id, 115 block, 1, buf); 116 if (rc != EOK) 117 goto end; 118 xor(xorbuf, buf, vol->bsize); 119 } 120 } 121 122 rc = block_write_direct(vol->extents[p_extent].svc_id, block, 1, 123 xorbuf); 124 if (rc != EOK) 125 goto end; 126 data = (void *) ((uintptr_t) data + vol->bsize); 127 block++; 128 } 129 168 /* read all other extents in the stripe */ 169 memset(xorbuf, 0, len); 170 for (i = 0; i < vol->dev_no; i++) { 171 if (i == bad) { 172 continue; 173 } else { 174 rc = block_read_direct(vol->extents[i].svc_id, block, 175 cnt, buf); 176 if (rc != EOK) 177 goto end; 178 xor(xorbuf, buf, len); 179 } 180 } 181 182 memcpy(data, xorbuf, len); 130 183 end: 131 184 free(xorbuf); … … 134 187 } 135 188 189 static errno_t hr_raid5_write(hr_volume_t *vol, uint64_t p_extent, 190 uint64_t extent, aoff64_t ba, const void *data, size_t cnt) 191 { 192 errno_t rc; 193 size_t i; 194 void *xorbuf; 195 void *buf; 196 uint64_t len = vol->bsize * cnt; 197 198 ssize_t bad = hr_raid5_get_bad_ext(vol); 199 if (bad == -1 || (size_t)bad == p_extent) { 200 rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt, 201 data); 202 if (rc != EOK) 203 return rc; 204 /* 205 * DEGRADED parity - skip parity write 206 */ 207 if ((size_t)bad == p_extent) 208 return EOK; 209 210 rc = hr_raid5_write_parity(vol, p_extent, extent, ba, data, 211 cnt); 212 return rc; 213 } 214 215 xorbuf = malloc(len); 216 if (xorbuf == NULL) 217 return ENOMEM; 218 219 buf = malloc(len); 220 if (buf == NULL) { 221 free(xorbuf); 222 return ENOMEM; 223 } 224 225 if (extent == (size_t) bad) { 226 /* 227 * new parity = read other and xor in new data 228 * 229 * write new parity 230 */ 231 memset(xorbuf, 0, len); 232 for (i = 1; i < vol->dev_no; i++) { 233 if (i == (size_t) bad) { 234 continue; 235 } else { 236 rc = block_read_direct(vol->extents[i].svc_id, 237 ba, cnt, buf); 238 if (rc != EOK) 239 goto end; 240 xor(xorbuf, buf, len); 241 } 242 } 243 xor(xorbuf, data, len); 244 rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt, 245 xorbuf); 246 if (rc != EOK) 247 goto end; 248 } else { 249 /* 250 * new parity = xor original data and old parity and new data 251 * 252 * write parity, new data 253 */ 254 rc = block_read_direct(vol->extents[extent].svc_id, ba, cnt, 255 xorbuf); 256 if (rc != EOK) 257 goto end; 258 rc = block_read_direct(vol->extents[p_extent].svc_id, ba, cnt, 259 buf); 260 if (rc != EOK) 261 goto end; 262 263 xor(xorbuf, buf, len); 264 265 xor(xorbuf, data, len); 266 267 rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt, 268 xorbuf); 269 if (rc != EOK) 270 goto end; 271 rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt, 272 data); 273 if (rc != EOK) 274 goto end; 275 } 276 end: 277 free(xorbuf); 278 free(buf); 279 return rc; 280 } 281 282 static errno_t hr_raid5_write_parity(hr_volume_t *vol, uint64_t p_extent, 283 uint64_t extent, uint64_t block, const void *data, size_t cnt) 284 { 285 errno_t rc; 286 size_t i; 287 void *xorbuf; 288 void *buf; 289 uint64_t len = vol->bsize * cnt; 290 291 xorbuf = malloc(len); 292 if (xorbuf == NULL) 293 return ENOMEM; 294 295 buf = malloc(len); 296 if (buf == NULL) { 297 free(xorbuf); 298 return ENOMEM; 299 } 300 301 memset(xorbuf, 0, len); 302 for (i = 0; i < vol->dev_no; i++) { 303 if (i == p_extent) 304 continue; 305 if (i == extent) { 306 xor(xorbuf, data, vol->bsize); 307 } else { 308 rc = block_read_direct(vol->extents[i].svc_id, 309 block, cnt, buf); 310 if (rc != EOK) 311 goto end; 312 xor(xorbuf, buf, vol->bsize); 313 } 314 } 315 316 rc = block_write_direct(vol->extents[p_extent].svc_id, block, cnt, 317 xorbuf); 318 end: 319 free(xorbuf); 320 free(buf); 321 return rc; 322 } 323 136 324 static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd) 137 325 { … … 147 335 148 336 static errno_t hr_raid5_bd_op(hr_bd_op_type_t type, bd_srv_t *bd, aoff64_t ba, 149 size_t cnt, void *d ata_read, const void *data_write, size_t size)337 size_t cnt, void *dst, const void *src, size_t size) 150 338 { 151 339 hr_volume_t *vol = bd->srvs->sarg; 152 340 errno_t rc; 153 uint64_t phys_block ;341 uint64_t phys_block, len; 154 342 size_t left; 343 const uint8_t *data_write = src; 344 uint8_t *data_read = dst; 345 346 /* propagate sync */ 347 if (type == HR_BD_SYNC && ba == 0 && cnt == 0) { 348 hr_sync_all_extents(vol); 349 rc = hr_raid5_update_vol_status(vol); 350 return rc; 351 } 155 352 156 353 if (type == HR_BD_READ || type == HR_BD_WRITE) … … 175 372 fibril_mutex_lock(&vol->lock); 176 373 374 rc = hr_raid5_vol_usable(vol); 375 if (rc != EOK) { 376 fibril_mutex_unlock(&vol->lock); 377 return EIO; 378 } 379 177 380 left = cnt; 178 381 while (left != 0) { 179 382 phys_block = ext_stripe * strip_size + strip_off; 180 383 cnt = min(left, strip_size - strip_off); 384 len = vol->bsize * cnt; 181 385 hr_add_ba_offset(vol, &phys_block); 182 386 switch (type) { 183 387 case HR_BD_SYNC: 388 if (vol->extents[extent].status != HR_EXT_ONLINE) 389 break; 184 390 rc = block_sync_cache(vol->extents[extent].svc_id, 185 391 phys_block, cnt); 392 /* allow unsupported sync */ 393 if (rc == ENOTSUP) 394 rc = EOK; 186 395 break; 187 396 case HR_BD_READ: 188 rc = block_read_direct(vol->extents[extent].svc_id, 189 phys_block, cnt, data_read); 190 data_read = (void *) ((uintptr_t) data_read + 191 (vol->bsize * cnt)); 397 retry_read: 398 ssize_t bad = hr_raid5_get_bad_ext(vol); 399 if (bad > 0 && extent == (size_t) bad) { 400 rc = hr_raid5_read_degraded(vol, bad, 401 phys_block, data_read, cnt); 402 } else { 403 rc = block_read_direct(vol->extents[extent].svc_id, 404 phys_block, cnt, data_read); 405 } 406 data_read += len; 192 407 break; 193 408 case HR_BD_WRITE: 194 rc = block_write_direct(vol->extents[extent].svc_id, 195 phys_block, cnt, data_write); 196 if (rc != EOK) 197 goto error; 198 rc = write_parity(vol, p_extent, extent, phys_block, 409 retry_write: 410 rc = hr_raid5_write(vol, p_extent, extent, phys_block, 199 411 data_write, cnt); 200 if (rc != EOK) 201 goto error; 202 data_write = (void *) ((uintptr_t) data_write + 203 (vol->bsize * cnt)); 412 data_write += len; 204 413 break; 205 414 default: 206 415 rc = EINVAL; 207 }208 209 if (rc != EOK)210 416 goto error; 417 } 418 419 if (rc == ENOMEM) 420 goto error; 421 422 if (rc == ENOENT) 423 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 424 else if (rc != EOK) 425 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 426 427 if (rc != EOK) { 428 rc = hr_raid5_update_vol_status(vol); 429 if (rc == EOK) { 430 /* 431 * State changed from ONLINE -> DEGRADED, 432 * rewind and retry 433 */ 434 if (type == HR_BD_WRITE) { 435 data_write -= len; 436 goto retry_write; 437 } else if (type == HR_BD_WRITE) { 438 data_read -= len; 439 goto retry_read; 440 } 441 } else { 442 rc = EIO; 443 goto error; 444 } 445 } 211 446 212 447 left -= cnt; … … 224 459 225 460 error: 461 (void) hr_raid5_update_vol_status(vol); 226 462 fibril_mutex_unlock(&vol->lock); 227 463 return rc; … … 272 508 return EINVAL; 273 509 } 510 511 rc = hr_raid5_update_vol_status(new_volume); 512 if (rc != EOK) 513 return rc; 274 514 275 515 bd_srvs_init(&new_volume->hr_bds);
Note:
See TracChangeset
for help on using the changeset viewer.