Changeset 11111e4 in mainline
- Timestamp:
- 2024-11-10T21:29:06Z (6 months ago)
- Children:
- 90eec9c0
- Parents:
- 13ce552
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/raid4.c
r13ce552 r11111e4 65 65 static errno_t hr_raid4_bd_get_num_blocks(bd_srv_t *, aoff64_t *); 66 66 67 static errno_t hr_raid4_write_parity(hr_volume_t *, uint64_t, uint64_t, 68 const void *, size_t); 69 67 70 static bd_ops_t hr_raid4_bd_ops = { 68 71 .open = hr_raid4_bd_open, … … 75 78 }; 76 79 80 static errno_t hr_raid4_vol_usable(hr_volume_t *vol) 81 { 82 if (vol->status == HR_VOL_ONLINE || 83 vol->status == HR_VOL_DEGRADED) 84 return EOK; 85 return EINVAL; 86 } 87 88 /* 89 * Return first bad extent 90 */ 91 static ssize_t hr_raid4_get_bad_ext(hr_volume_t *vol) 92 { 93 for (size_t i = 0; i < vol->dev_no; i++) 94 if (vol->extents[i].status != HR_EXT_ONLINE) 95 return i; 96 return -1; 97 } 98 99 static errno_t hr_raid4_update_vol_status(hr_volume_t *vol) 100 { 101 hr_vol_status_t old_state = vol->status; 102 size_t bad = 0; 103 for (size_t i = 0; i < vol->dev_no; i++) 104 if (vol->extents[i].status != HR_EXT_ONLINE) 105 bad++; 106 107 switch (bad) { 108 case 0: 109 if (old_state != HR_VOL_ONLINE) { 110 log_msg(LOG_DEFAULT, LVL_ERROR, 111 "RAID 4 has all extents online, " 112 "marking \"%s\" (%lu) as ONLINE", 113 vol->devname, vol->svc_id); 114 vol->status = HR_VOL_ONLINE; 115 } 116 return EOK; 117 case 1: 118 if (old_state != HR_VOL_DEGRADED) { 119 log_msg(LOG_DEFAULT, LVL_ERROR, 120 "RAID 4 array \"%s\" (%lu) has 1 extent inactive, " 121 "marking as DEGRADED", 122 vol->devname, vol->svc_id); 123 vol->status = HR_VOL_DEGRADED; 124 } 125 return EOK; 126 default: 127 if (old_state != HR_VOL_FAULTY) { 128 log_msg(LOG_DEFAULT, LVL_ERROR, 129 "RAID 4 array \"%s\" (%lu) has more than one 1 " 130 "extent inactive, marking as FAULTY", 131 vol->devname, vol->svc_id); 132 vol->status = HR_VOL_FAULTY; 133 } 134 return EINVAL; 135 } 136 } 137 77 138 static void xor(void *dst, const void *src, size_t size) 78 139 { … … 85 146 } 86 147 87 static errno_t write_parity(hr_volume_t *vol, uint64_t extent, uint64_t block, 148 static errno_t hr_raid4_read_degraded(hr_volume_t *vol, uint64_t bad, 149 uint64_t block, void *data, size_t cnt) 150 { 151 errno_t rc; 152 size_t i, j; 153 void *xorbuf; 154 void *buf; 155 156 xorbuf = malloc(vol->bsize); 157 if (xorbuf == NULL) 158 return ENOMEM; 159 160 buf = malloc(vol->bsize); 161 if (buf == NULL) { 162 free(xorbuf); 163 return ENOMEM; 164 } 165 166 /* read all other extents in stripe */ 167 for (j = 0; j < cnt; j++) { 168 memset(xorbuf, 0, vol->bsize); 169 for (i = 0; i < vol->dev_no; i++) { 170 if (i == bad) { 171 continue; 172 } else { 173 rc = block_read_direct(vol->extents[i].svc_id, 174 block, 1, buf); 175 if (rc != EOK) 176 goto end; 177 xor(xorbuf, buf, vol->bsize); 178 } 179 } 180 memcpy(data, xorbuf, vol->bsize); 181 data = (void *) ((uintptr_t) data + vol->bsize); 182 block++; 183 } 184 end: 185 free(xorbuf); 186 free(buf); 187 return rc; 188 } 189 190 static errno_t hr_raid4_write(hr_volume_t *vol, uint64_t extent, aoff64_t ba, 88 191 const void *data, size_t cnt) 192 { 193 errno_t rc; 194 size_t i, j; 195 void *xorbuf; 196 void *buf; 197 198 ssize_t bad = hr_raid4_get_bad_ext(vol); 199 if (bad < 1) { 200 rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt, 201 data); 202 if (rc != EOK) 203 return rc; 204 /* 205 * DEGRADED parity - skip parity write 206 */ 207 if (bad == 0) 208 return EOK; 209 210 rc = hr_raid4_write_parity(vol, extent, ba, data, cnt); 211 return rc; 212 } 213 214 xorbuf = malloc(vol->bsize); 215 if (xorbuf == NULL) 216 return ENOMEM; 217 218 buf = malloc(vol->bsize); 219 if (buf == NULL) { 220 free(xorbuf); 221 return ENOMEM; 222 } 223 224 if (extent == (size_t) bad) { 225 /* 226 * new parity = read other and xor in new data 227 * 228 * write new parity 229 */ 230 for (j = 0; j < cnt; j++) { 231 memset(xorbuf, 0, vol->bsize); 232 for (i = 1; i < vol->dev_no; i++) { 233 if (i == (size_t) bad) { 234 continue; 235 } else { 236 rc = block_read_direct(vol->extents[i].svc_id, 237 ba, 1, buf); 238 if (rc != EOK) 239 goto end; 240 xor(xorbuf, buf, vol->bsize); 241 } 242 } 243 xor(xorbuf, data, vol->bsize); 244 rc = block_write_direct(vol->extents[0].svc_id, ba, 1, 245 xorbuf); 246 if (rc != EOK) 247 goto end; 248 data = (void *) ((uintptr_t) data + vol->bsize); 249 ba++; 250 } 251 } else { 252 /* 253 * new parity = xor original data and old parity and new data 254 * 255 * write parity, new data 256 */ 257 for (j = 0; j < cnt; j++) { 258 rc = block_read_direct(vol->extents[extent].svc_id, ba, 259 1, xorbuf); 260 if (rc != EOK) 261 goto end; 262 rc = block_read_direct(vol->extents[0].svc_id, ba, 1, 263 buf); 264 if (rc != EOK) 265 goto end; 266 xor(xorbuf, buf, vol->bsize); 267 268 xor(xorbuf, data, vol->bsize); 269 270 rc = block_write_direct(vol->extents[0].svc_id, ba, 1, 271 xorbuf); 272 if (rc != EOK) 273 goto end; 274 rc = block_write_direct(vol->extents[extent].svc_id, 275 ba, 1, data); 276 if (rc != EOK) 277 goto end; 278 data = (void *) ((uintptr_t) data + vol->bsize); 279 ba++; 280 } 281 } 282 end: 283 free(xorbuf); 284 free(buf); 285 return rc; 286 } 287 288 static errno_t hr_raid4_write_parity(hr_volume_t *vol, uint64_t extent, 289 uint64_t block, const void *data, size_t cnt) 89 290 { 90 291 errno_t rc; … … 149 350 size_t left; 150 351 352 /* propagate sync */ 353 if (type == HR_BD_SYNC && ba == 0 && cnt == 0) { 354 hr_sync_all_extents(vol); 355 rc = hr_raid4_update_vol_status(vol); 356 return rc; 357 } 358 151 359 if (type == HR_BD_READ || type == HR_BD_WRITE) 152 360 if (size < cnt * vol->bsize) … … 164 372 165 373 fibril_mutex_lock(&vol->lock); 374 375 rc = hr_raid4_vol_usable(vol); 376 if (rc != EOK) { 377 fibril_mutex_unlock(&vol->lock); 378 return EIO; 379 } 166 380 167 381 left = cnt; … … 172 386 switch (type) { 173 387 case HR_BD_SYNC: 388 if (vol->extents[extent].status != HR_EXT_ONLINE) 389 break; 174 390 rc = block_sync_cache(vol->extents[extent].svc_id, 175 391 phys_block, cnt); 392 /* allow unsupported sync */ 393 if (rc == ENOTSUP) 394 rc = EOK; 176 395 break; 177 396 case HR_BD_READ: 178 rc = block_read_direct(vol->extents[extent].svc_id, 179 phys_block, cnt, data_read); 180 data_read = (void *) ((uintptr_t) data_read + 181 (vol->bsize * cnt)); 397 retry_read: 398 ssize_t bad = hr_raid4_get_bad_ext(vol); 399 if (bad > 0 && extent == (size_t) bad) { 400 rc = hr_raid4_read_degraded(vol, bad, 401 phys_block, data_read, cnt); 402 } else { 403 rc = block_read_direct(vol->extents[extent].svc_id, 404 phys_block, cnt, data_read); 405 } 406 407 data_read += vol->bsize * cnt; 182 408 break; 183 409 case HR_BD_WRITE: 184 rc = block_write_direct(vol->extents[extent].svc_id, 185 phys_block, cnt, data_write); 186 if (rc != EOK) 187 goto error; 188 rc = write_parity(vol, extent, phys_block, data_write, 189 cnt); 190 if (rc != EOK) 191 goto error; 192 data_write = (void *) ((uintptr_t) data_write + 193 (vol->bsize * cnt)); 410 retry_write: 411 rc = hr_raid4_write(vol, extent, phys_block, 412 data_write, cnt); 413 414 data_write += vol->bsize * cnt; 194 415 break; 195 416 default: 196 417 rc = EINVAL; 197 }198 199 if (rc != EOK)200 418 goto error; 419 } 420 421 if (rc == ENOENT) 422 hr_update_ext_status(vol, extent, HR_EXT_MISSING); 423 else if (rc != EOK) 424 hr_update_ext_status(vol, extent, HR_EXT_FAILED); 425 426 if (rc != EOK) { 427 rc = hr_raid4_update_vol_status(vol); 428 if (rc == EOK) { 429 /* 430 * State changed from ONLINE -> DEGRADED, 431 * rewind and retry 432 */ 433 if (type == HR_BD_WRITE) { 434 data_write -= vol->bsize * cnt; 435 goto retry_write; 436 } else if (type == HR_BD_WRITE) { 437 data_read -= vol->bsize * cnt; 438 goto retry_read; 439 } 440 } else { 441 rc = EIO; 442 goto error; 443 } 444 } 201 445 202 446 left -= cnt; … … 210 454 211 455 error: 456 (void) hr_raid4_update_vol_status(vol); 212 457 fibril_mutex_unlock(&vol->lock); 213 458 return rc; … … 258 503 return EINVAL; 259 504 } 505 506 rc = hr_raid4_update_vol_status(new_volume); 507 if (rc != EOK) 508 return rc; 260 509 261 510 bd_srvs_init(&new_volume->hr_bds);
Note:
See TracChangeset
for help on using the changeset viewer.