Changeset a3486f2 in mainline
- Timestamp:
- 2025-06-17T14:17:20Z (4 months ago)
- Children:
- 137f7cf5
- Parents:
- 9d1685b
- Location:
- uspace/srv/bd/hr
- Files:
-
- 2 added
- 7 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/srv/bd/hr/io.c
r9d1685b ra3486f2 39 39 #include <inttypes.h> 40 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <str.h> 41 43 #include <str_error.h> 42 44 43 45 #include "io.h" 46 #include "parity_stripe.h" 44 47 #include "util.h" 45 48 #include "var.h" … … 115 118 } 116 119 120 errno_t hr_io_raid5_basic_reader(void *arg) 121 { 122 errno_t rc; 123 124 hr_io_raid5_t *io = arg; 125 126 size_t ext_idx = io->extent; 127 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 128 129 rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt, 130 io->data_read); 131 if (rc != EOK) 132 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 133 134 return rc; 135 } 136 137 errno_t hr_io_raid5_reader(void *arg) 138 { 139 errno_t rc; 140 141 hr_io_raid5_t *io = arg; 142 hr_stripe_t *stripe = io->stripe; 143 144 size_t ext_idx = io->extent; 145 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 146 147 rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt, 148 io->data_read); 149 if (rc != EOK) { 150 hr_stripe_parity_abort(stripe); 151 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 152 } 153 154 hr_stripe_commit_parity(stripe, io->strip_off, io->data_read, 155 io->cnt * io->vol->bsize); 156 157 return rc; 158 } 159 160 errno_t hr_io_raid5_basic_writer(void *arg) 161 { 162 errno_t rc; 163 164 hr_io_raid5_t *io = arg; 165 166 size_t ext_idx = io->extent; 167 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 168 169 rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt, 170 io->data_write); 171 if (rc != EOK) 172 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 173 174 return rc; 175 } 176 177 errno_t hr_io_raid5_writer(void *arg) 178 { 179 errno_t rc; 180 181 hr_io_raid5_t *io = arg; 182 hr_stripe_t *stripe = io->stripe; 183 184 size_t ext_idx = io->extent; 185 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 186 187 hr_stripe_commit_parity(stripe, io->strip_off, io->data_write, 188 io->cnt * io->vol->bsize); 189 190 hr_stripe_wait_for_parity_commits(stripe); 191 if (stripe->abort) 192 return EAGAIN; 193 194 rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt, 195 io->data_write); 196 if (rc != EOK) 197 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 198 199 return rc; 200 } 201 202 errno_t hr_io_raid5_noop_writer(void *arg) 203 { 204 hr_io_raid5_t *io = arg; 205 hr_stripe_t *stripe = io->stripe; 206 207 hr_stripe_commit_parity(stripe, io->strip_off, io->data_write, 208 io->cnt * io->vol->bsize); 209 210 return EOK; 211 } 212 213 errno_t hr_io_raid5_parity_getter(void *arg) 214 { 215 hr_io_raid5_t *io = arg; 216 hr_stripe_t *stripe = io->stripe; 217 size_t bsize = stripe->vol->bsize; 218 219 hr_stripe_wait_for_parity_commits(stripe); 220 if (stripe->abort) 221 return EAGAIN; 222 223 memcpy(io->data_read, stripe->parity + io->strip_off, io->cnt * bsize); 224 225 return EOK; 226 } 227 228 errno_t hr_io_raid5_subtract_writer(void *arg) 229 { 230 errno_t rc; 231 232 hr_io_raid5_t *io = arg; 233 hr_stripe_t *stripe = io->stripe; 234 235 size_t ext_idx = io->extent; 236 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 237 238 uint8_t *data = malloc_waitok(io->cnt * io->vol->bsize); 239 240 rc = hr_read_direct(extents[ext_idx].svc_id, io->ba, io->cnt, data); 241 if (rc != EOK) { 242 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 243 hr_stripe_parity_abort(stripe); 244 free(data); 245 return rc; 246 } 247 248 fibril_mutex_lock(&stripe->parity_lock); 249 250 hr_raid5_xor(stripe->parity + io->strip_off, data, 251 io->cnt * io->vol->bsize); 252 253 hr_raid5_xor(stripe->parity + io->strip_off, io->data_write, 254 io->cnt * io->vol->bsize); 255 256 stripe->ps_added++; 257 fibril_condvar_broadcast(&stripe->ps_added_cv); 258 fibril_mutex_unlock(&stripe->parity_lock); 259 260 hr_stripe_wait_for_parity_commits(stripe); 261 if (stripe->abort) 262 return EAGAIN; 263 264 rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt, 265 io->data_write); 266 if (rc != EOK) 267 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 268 269 free(data); 270 271 return rc; 272 } 273 274 errno_t hr_io_raid5_reconstruct_reader(void *arg) 275 { 276 errno_t rc; 277 278 hr_io_raid5_t *io = arg; 279 hr_stripe_t *stripe = io->stripe; 280 281 size_t ext_idx = io->extent; 282 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 283 284 uint8_t *data = malloc_waitok(io->cnt * io->vol->bsize); 285 286 rc = hr_write_direct(extents[ext_idx].svc_id, io->ba, io->cnt, data); 287 if (rc != EOK) { 288 hr_stripe_parity_abort(stripe); 289 io->vol->hr_ops.ext_state_cb(io->vol, io->extent, rc); 290 free(data); 291 return rc; 292 } 293 294 hr_stripe_commit_parity(stripe, io->strip_off, data, 295 io->cnt * io->vol->bsize); 296 297 free(data); 298 299 return EOK; 300 } 301 302 errno_t hr_io_raid5_parity_writer(void *arg) 303 { 304 errno_t rc; 305 306 hr_io_raid5_t *io = arg; 307 hr_stripe_t *stripe = io->stripe; 308 309 hr_extent_t *extents = (hr_extent_t *)&io->vol->extents; 310 311 hr_stripe_wait_for_parity_commits(stripe); 312 313 if (stripe->abort) 314 return EAGAIN; 315 316 rc = hr_write_direct(extents[io->extent].svc_id, io->ba, io->cnt, 317 stripe->parity + io->strip_off); 318 if (rc != EOK) 319 io->vol->hr_ops.ext_state_cb(io->vol, stripe->p_extent, rc); 320 321 return rc; 322 } 323 117 324 static errno_t exec_io_op(hr_io_t *io) 118 325 { … … 154 361 break; 155 362 default: 156 return EINVAL;363 assert(0); 157 364 } 158 365 -
uspace/srv/bd/hr/io.h
r9d1685b ra3486f2 37 37 #define _HR_IO_H 38 38 39 #include "parity_stripe.h" 39 40 #include "var.h" 41 #include "util.h" 40 42 41 43 typedef struct hr_io { 42 hr_bd_op_type_t type; 44 hr_bd_op_type_t type; /* read/write/sync */ 43 45 uint64_t ba; 44 46 uint64_t cnt; 45 size_t extent;46 47 void *data_read; 47 48 const void *data_write; 48 hr_volume_t *vol; 49 size_t extent; /* extent index */ 50 hr_volume_t *vol; /* volume back-pointer */ 49 51 } hr_io_t; 50 52 51 errno_t hr_io_worker(void *); 52 errno_t hr_io_worker_basic(void *); 53 typedef struct hr_io_raid5 { 54 uint64_t ba; 55 uint64_t cnt; 56 void *data_read; 57 const void *data_write; 58 size_t extent; 59 uint64_t strip_off; /* needed for offseting parity commits */ 60 hr_stripe_t *stripe; 61 hr_volume_t *vol; 62 } hr_io_raid5_t; 53 63 54 64 extern errno_t hr_write_direct(service_id_t, uint64_t, size_t, const void *); … … 56 66 extern errno_t hr_sync_cache(service_id_t, uint64_t, size_t); 57 67 68 extern errno_t hr_io_worker(void *); 69 extern errno_t hr_io_worker_basic(void *); 70 71 extern errno_t hr_io_raid5_basic_reader(void *); 72 extern errno_t hr_io_raid5_reader(void *); 73 extern errno_t hr_io_raid5_basic_writer(void *); 74 extern errno_t hr_io_raid5_writer(void *); 75 extern errno_t hr_io_raid5_noop_writer(void *); 76 extern errno_t hr_io_raid5_parity_getter(void *); 77 extern errno_t hr_io_raid5_subtract_writer(void *); 78 extern errno_t hr_io_raid5_reconstruct_reader(void *); 79 extern errno_t hr_io_raid5_parity_writer(void *); 80 58 81 #endif 59 82 -
uspace/srv/bd/hr/meson.build
r9d1685b ra3486f2 37 37 'metadata/foreign/softraid/softraid.c', 38 38 'metadata/native.c', 39 'parity_stripe.c', 39 40 'raid0.c', 40 41 'raid1.c', -
uspace/srv/bd/hr/raid5.c
r9d1685b ra3486f2 50 50 #include <str_error.h> 51 51 52 #include "io.h" 53 #include "parity_stripe.h" 52 54 #include "superblock.h" 53 55 #include "util.h" 54 56 #include "var.h" 55 57 56 static errno_t hr_raid5_vol_usable(hr_volume_t *); 57 static ssize_t hr_raid5_get_bad_ext(hr_volume_t *); 58 static errno_t hr_raid5_update_vol_state(hr_volume_t *); 59 static void xor(void *, const void *, size_t); 60 61 static errno_t hr_raid5_read_degraded(hr_volume_t *, uint64_t, uint64_t, 62 void *, size_t); 63 static errno_t hr_raid5_write(hr_volume_t *, uint64_t, uint64_t, aoff64_t, 64 const void *, size_t); 65 static errno_t hr_raid5_write_parity(hr_volume_t *, uint64_t, uint64_t, 66 uint64_t, const void *, size_t); 67 static errno_t hr_raid5_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t, 68 void *, const void *, size_t); 58 static void hr_raid5_vol_state_eval_forced(hr_volume_t *); 59 60 static size_t hr_raid5_parity_extent(hr_level_t, hr_layout_t, size_t, 61 uint64_t); 62 static size_t hr_raid5_data_extent(hr_level_t, hr_layout_t, size_t, uint64_t, 63 uint64_t); 64 69 65 static errno_t hr_raid5_rebuild(void *); 70 66 … … 104 100 } 105 101 106 fibril_rwlock_write_lock(&new_volume->states_lock);107 108 errno_t rc = hr_raid5_update_vol_state(new_volume);109 if (rc != EOK) {110 HR_NOTE("\"%s\": unusable state, not creating\n",111 new_volume->devname);112 fibril_rwlock_write_unlock(&new_volume->states_lock);113 return rc;114 }115 116 102 bd_srvs_init(&new_volume->hr_bds); 117 103 new_volume->hr_bds.ops = &hr_raid5_bd_ops; 118 104 new_volume->hr_bds.sarg = new_volume; 119 105 120 fibril_rwlock_write_unlock(&new_volume->states_lock); 106 hr_raid5_vol_state_eval_forced(new_volume); 107 108 fibril_rwlock_read_lock(&new_volume->states_lock); 109 hr_vol_state_t state = new_volume->state; 110 fibril_rwlock_read_unlock(&new_volume->states_lock); 111 if (state == HR_VOL_FAULTY || state == HR_VOL_NONE) { 112 HR_NOTE("\"%s\": unusable state, not creating\n", 113 new_volume->devname); 114 return EINVAL; 115 } 121 116 122 117 return EOK; … … 133 128 return EINVAL; 134 129 135 uint64_t total_blkno = vol->truncated_blkno * vol->extent_no;136 137 130 vol->data_offset = vol->meta_ops->get_data_offset(); 138 131 139 vol->data_blkno = total_blkno; 140 /* count md blocks */ 141 vol->data_blkno -= vol->meta_ops->get_size() * vol->extent_no; 142 vol->data_blkno -= vol->truncated_blkno; /* count parity */ 132 uint64_t single_sz = vol->truncated_blkno - vol->meta_ops->get_size(); 133 vol->data_blkno = single_sz * (vol->extent_no - 1); 143 134 144 135 vol->strip_size = HR_STRIP_SIZE; … … 154 145 void hr_raid5_vol_state_eval(hr_volume_t *vol) 155 146 { 156 fibril_mutex_lock(&vol->lock); 147 HR_DEBUG("%s()", __func__); 148 149 bool exp = true; 150 if (!atomic_compare_exchange_strong(&vol->state_dirty, &exp, false)) 151 return; 152 153 vol->meta_ops->inc_counter(vol); 154 (void)vol->meta_ops->save(vol, WITH_STATE_CALLBACK); 155 156 hr_raid5_vol_state_eval_forced(vol); 157 } 158 159 errno_t hr_raid5_add_hotspare(hr_volume_t *vol, service_id_t hotspare) 160 { 161 HR_DEBUG("%s()", __func__); 162 163 errno_t rc = hr_util_add_hotspare(vol, hotspare); 164 165 hr_raid5_vol_state_eval(vol); 166 167 return rc; 168 } 169 170 void hr_raid5_ext_state_cb(hr_volume_t *vol, size_t extent, errno_t rc) 171 { 172 HR_DEBUG("%s()", __func__); 173 174 assert(fibril_rwlock_is_locked(&vol->extents_lock)); 175 176 if (rc == EOK) 177 return; 178 157 179 fibril_rwlock_write_lock(&vol->states_lock); 158 (void)hr_raid5_update_vol_state(vol); 180 181 switch (rc) { 182 case ENOMEM: 183 hr_update_ext_state(vol, extent, HR_EXT_INVALID); 184 break; 185 case ENOENT: 186 hr_update_ext_state(vol, extent, HR_EXT_MISSING); 187 break; 188 default: 189 hr_update_ext_state(vol, extent, HR_EXT_FAILED); 190 } 191 192 hr_mark_vol_state_dirty(vol); 193 159 194 fibril_rwlock_write_unlock(&vol->states_lock); 160 fibril_mutex_unlock(&vol->lock); 161 } 162 163 errno_t hr_raid5_add_hotspare(hr_volume_t *vol, service_id_t hotspare) 164 { 165 HR_DEBUG("%s()", __func__); 166 167 fibril_mutex_lock(&vol->lock); 168 169 errno_t rc = hr_util_add_hotspare(vol, hotspare); 195 } 196 197 static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd) 198 { 199 HR_DEBUG("%s()\n", __func__); 200 201 hr_volume_t *vol = bd->srvs->sarg; 202 203 atomic_fetch_add_explicit(&vol->open_cnt, 1, memory_order_relaxed); 204 205 return EOK; 206 } 207 208 static errno_t hr_raid5_bd_close(bd_srv_t *bd) 209 { 210 HR_DEBUG("%s()\n", __func__); 211 212 hr_volume_t *vol = bd->srvs->sarg; 213 214 atomic_fetch_sub_explicit(&vol->open_cnt, 1, memory_order_relaxed); 215 216 return EOK; 217 } 218 219 static errno_t hr_raid5_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt) 220 { 221 /* XXX */ 222 return EOK; 223 } 224 225 static errno_t hr_raid5_bd_read_blocks(bd_srv_t *bd, uint64_t ba, size_t cnt, 226 void *data_read, size_t size) 227 { 228 hr_volume_t *vol = bd->srvs->sarg; 229 errno_t rc; 230 231 if (size < cnt * vol->bsize) 232 return EINVAL; 233 234 fibril_rwlock_read_lock(&vol->states_lock); 235 hr_vol_state_t vol_state = vol->state; 236 fibril_rwlock_read_unlock(&vol->states_lock); 237 238 if (vol_state == HR_VOL_FAULTY || vol_state == HR_VOL_NONE) 239 return EIO; 240 241 rc = hr_check_ba_range(vol, cnt, ba); 170 242 if (rc != EOK) 243 return rc; 244 245 uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */ 246 uint64_t strip_no = ba / strip_size; 247 248 /* calculate number of stripes touched */ 249 uint64_t last_ba = ba + cnt - 1; 250 uint64_t end_strip_no = last_ba / strip_size; 251 uint64_t start_stripe = strip_no / (vol->extent_no - 1); 252 uint64_t end_stripe = end_strip_no / (vol->extent_no - 1); 253 size_t stripes_cnt = end_stripe - start_stripe + 1; 254 255 hr_stripe_t *stripes = hr_create_stripes(vol, stripes_cnt, false); 256 if (stripes == NULL) 257 return ENOMEM; 258 259 /* 260 * Pre-allocate range locks, because after group creation and 261 * firing off IO requests there is no easy consistent ENOMEM error 262 * path. 263 */ 264 hr_range_lock_t **rlps = malloc_waitok(stripes_cnt * sizeof(*rlps)); 265 for (size_t i = 0; i < stripes_cnt; i++) 266 rlps[i] = malloc_waitok(sizeof(**rlps)); 267 268 /* 269 * extent order has to be locked for the whole IO duration, 270 * so that workers have consistent targets 271 */ 272 fibril_rwlock_read_lock(&vol->extents_lock); 273 274 for (uint64_t s = start_stripe; s <= end_stripe; s++) { 275 uint64_t relative = s - start_stripe; 276 hr_range_lock_acquire_noalloc(rlps[relative], vol, s, 1); 277 } 278 279 uint64_t phys_block, len; 280 size_t left; 281 282 hr_layout_t layout = vol->layout; 283 hr_level_t level = vol->level; 284 285 /* parity extent */ 286 size_t p_extent = hr_raid5_parity_extent(level, layout, 287 vol->extent_no, strip_no); 288 289 uint64_t strip_off = ba % strip_size; 290 291 left = cnt; 292 293 while (left != 0) { 294 if (level == HR_LVL_5) { 295 p_extent = hr_raid5_parity_extent(level, layout, 296 vol->extent_no, strip_no); 297 } 298 299 size_t extent = hr_raid5_data_extent(level, layout, 300 vol->extent_no, strip_no, p_extent); 301 302 uint64_t stripe_no = strip_no / (vol->extent_no - 1); 303 size_t relative_si = stripe_no - start_stripe; /* relative stripe index */ 304 hr_stripe_t *stripe = &stripes[relative_si]; 305 stripe->p_extent = p_extent; 306 307 stripe->strips_touched++; 308 309 phys_block = stripe_no * strip_size + strip_off; 310 cnt = min(left, strip_size - strip_off); 311 len = vol->bsize * cnt; 312 hr_add_data_offset(vol, &phys_block); 313 314 stripe->extent_span[extent].range.start = phys_block; 315 stripe->extent_span[extent].range.end = phys_block + cnt - 1; 316 stripe->extent_span[extent].cnt = cnt; 317 stripe->extent_span[extent].data_read = data_read; 318 stripe->extent_span[extent].strip_off = strip_off; 319 320 data_read += len; 321 left -= cnt; 322 strip_off = 0; 323 strip_no++; 324 } 325 326 retry: 327 size_t bad_extent = vol->extent_no; 328 329 uint64_t rebuild_pos = atomic_load_explicit(&vol->rebuild_blk, 330 memory_order_relaxed); 331 332 fibril_rwlock_read_lock(&vol->states_lock); 333 334 for (size_t e = 0; e < vol->extent_no; e++) { 335 hr_ext_state_t s = vol->extents[e].state; 336 if ((vol->state == HR_VOL_DEGRADED && s != HR_EXT_ONLINE) || 337 (s == HR_EXT_REBUILD && rebuild_pos < start_stripe)) { 338 bad_extent = e; 339 break; 340 } 341 } 342 343 fibril_rwlock_read_unlock(&vol->states_lock); 344 345 for (size_t s = 0; s < stripes_cnt; s++) { 346 if (stripes[s].done) 347 continue; 348 execute_stripe(&stripes[s], bad_extent); 349 } 350 351 for (size_t s = 0; s < stripes_cnt; s++) { 352 if (stripes[s].done) 353 continue; 354 wait_for_stripe(&stripes[s]); 355 } 356 357 hr_raid5_vol_state_eval(vol); 358 359 rc = EOK; 360 361 fibril_rwlock_read_lock(&vol->states_lock); 362 363 if (vol->state == HR_VOL_FAULTY) { 364 fibril_rwlock_read_unlock(&vol->states_lock); 365 rc = EIO; 171 366 goto end; 367 } 368 369 fibril_rwlock_read_unlock(&vol->states_lock); 370 371 for (size_t s = 0; s < stripes_cnt; s++) 372 if (stripes[s].rc == EAGAIN) 373 goto retry; 374 375 /* all stripes are done */ 376 end: 377 fibril_rwlock_read_unlock(&vol->extents_lock); 378 379 for (size_t i = 0; i < stripes_cnt; i++) 380 hr_range_lock_release(rlps[i]); 381 382 hr_destroy_stripes(stripes, stripes_cnt); 383 384 return rc; 385 } 386 387 static errno_t hr_raid5_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt, 388 const void *data_write, size_t size) 389 { 390 hr_volume_t *vol = bd->srvs->sarg; 391 errno_t rc; 392 393 if (size < cnt * vol->bsize) 394 return EINVAL; 395 396 fibril_rwlock_read_lock(&vol->states_lock); 397 hr_vol_state_t vol_state = vol->state; 398 fibril_rwlock_read_unlock(&vol->states_lock); 399 400 if (vol_state == HR_VOL_FAULTY || vol_state == HR_VOL_NONE) 401 return EIO; 402 403 /* increment metadata counter only on first write */ 404 bool exp = false; 405 if (atomic_compare_exchange_strong(&vol->first_write, &exp, true)) { 406 vol->meta_ops->inc_counter(vol); 407 vol->meta_ops->save(vol, WITH_STATE_CALLBACK); 408 } 409 410 rc = hr_check_ba_range(vol, cnt, ba); 411 if (rc != EOK) 412 return rc; 413 414 uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */ 415 uint64_t strip_no = ba / strip_size; 416 417 /* calculate number of stripes touched */ 418 uint64_t last_ba = ba + cnt - 1; 419 uint64_t end_strip_no = last_ba / strip_size; 420 uint64_t start_stripe = strip_no / (vol->extent_no - 1); 421 uint64_t end_stripe = end_strip_no / (vol->extent_no - 1); 422 size_t stripes_cnt = end_stripe - start_stripe + 1; 423 424 hr_stripe_t *stripes = hr_create_stripes(vol, stripes_cnt, true); 425 if (stripes == NULL) 426 return ENOMEM; 427 428 uint64_t stripe_size = strip_size * (vol->extent_no - 1); 429 430 for (uint64_t stripe = start_stripe; stripe <= end_stripe; stripe++) { 431 uint64_t relative_stripe = stripe - start_stripe; 432 433 uint64_t s_start = stripe * stripe_size; 434 uint64_t s_end = s_start + stripe_size - 1; 435 436 uint64_t overlap_start; 437 if (ba > s_start) 438 overlap_start = ba; 439 else 440 overlap_start = s_start; 441 442 uint64_t overlap_end; 443 if (last_ba < s_end) 444 overlap_end = last_ba; 445 else 446 overlap_end = s_end; 447 448 uint64_t start_strip_index = 449 (overlap_start - s_start) / strip_size; 450 uint64_t end_strip_index = (overlap_end - s_start) / strip_size; 451 size_t strips_touched = end_strip_index - start_strip_index + 1; 452 453 stripes[relative_stripe].strips_touched = strips_touched; 454 455 uint64_t first_offset = (overlap_start - s_start) % strip_size; 456 uint64_t last_offset = (overlap_end - s_start) % strip_size; 457 458 size_t partials = 0; 459 if (first_offset != 0) 460 partials++; 461 if (last_offset != strip_size - 1) 462 partials++; 463 if (start_strip_index == end_strip_index && partials == 2) 464 partials = 1; 465 466 stripes[relative_stripe].strips_touched = strips_touched; 467 stripes[relative_stripe].partial_strips_touched = partials; 468 469 if (strips_touched < (vol->extent_no - 1) / 2) 470 stripes[relative_stripe].subtract = true; 471 } 172 472 173 473 /* 174 * If the volume is degraded, start rebuild right away. 474 * Pre-allocate range locks, because after group creation and 475 * firing off IO requests there is no easy consistent ENOMEM error 476 * path. 175 477 */ 176 if (vol->state == HR_VOL_DEGRADED) { 177 HR_DEBUG("hr_raid5_add_hotspare(): volume in DEGRADED state, " 178 "spawning new rebuild fibril\n"); 179 fid_t fib = fibril_create(hr_raid5_rebuild, vol); 180 if (fib == 0) { 181 fibril_mutex_unlock(&vol->hotspare_lock); 182 fibril_mutex_unlock(&vol->lock); 183 return ENOMEM; 184 } 185 fibril_start(fib); 186 fibril_detach(fib); 187 } 188 478 hr_range_lock_t **rlps = malloc_waitok(stripes_cnt * sizeof(*rlps)); 479 for (size_t i = 0; i < stripes_cnt; i++) 480 rlps[i] = malloc_waitok(sizeof(**rlps)); 481 482 /* 483 * extent order has to be locked for the whole IO duration, 484 * so that workers have consistent targets 485 */ 486 fibril_rwlock_read_lock(&vol->extents_lock); 487 488 for (uint64_t s = start_stripe; s <= end_stripe; s++) { 489 uint64_t relative = s - start_stripe; 490 hr_range_lock_acquire_noalloc(rlps[relative], vol, s, 1); 491 } 492 493 uint64_t phys_block, len; 494 size_t left; 495 496 hr_layout_t layout = vol->layout; 497 hr_level_t level = vol->level; 498 499 /* parity extent */ 500 size_t p_extent = hr_raid5_parity_extent(level, layout, 501 vol->extent_no, strip_no); 502 503 uint64_t strip_off = ba % strip_size; 504 505 left = cnt; 506 507 while (left != 0) { 508 if (level == HR_LVL_5) { 509 p_extent = hr_raid5_parity_extent(level, layout, 510 vol->extent_no, strip_no); 511 } 512 513 size_t extent = hr_raid5_data_extent(level, layout, 514 vol->extent_no, strip_no, p_extent); 515 516 uint64_t stripe_no = strip_no / (vol->extent_no - 1); 517 size_t relative_si = stripe_no - start_stripe; /* relative stripe index */ 518 hr_stripe_t *stripe = &stripes[relative_si]; 519 stripe->p_extent = p_extent; 520 521 phys_block = stripe_no * strip_size + strip_off; 522 cnt = min(left, strip_size - strip_off); 523 len = vol->bsize * cnt; 524 hr_add_data_offset(vol, &phys_block); 525 526 stripe->extent_span[extent].range.start = phys_block; 527 stripe->extent_span[extent].range.end = phys_block + cnt - 1; 528 stripe->extent_span[extent].cnt = cnt; 529 stripe->extent_span[extent].data_write = data_write; 530 stripe->extent_span[extent].strip_off = strip_off; 531 532 data_write += len; 533 left -= cnt; 534 strip_off = 0; 535 strip_no++; 536 } 537 538 retry: 539 size_t bad_extent = vol->extent_no; 540 541 uint64_t rebuild_pos = atomic_load_explicit(&vol->rebuild_blk, 542 memory_order_relaxed); 543 544 fibril_rwlock_read_lock(&vol->states_lock); 545 546 for (size_t e = 0; e < vol->extent_no; e++) { 547 hr_ext_state_t s = vol->extents[e].state; 548 if ((vol->state == HR_VOL_DEGRADED && s != HR_EXT_ONLINE) || 549 (s == HR_EXT_REBUILD && rebuild_pos < start_stripe)) { 550 bad_extent = e; 551 break; 552 } 553 } 554 555 fibril_rwlock_read_unlock(&vol->states_lock); 556 557 for (size_t s = 0; s < stripes_cnt; s++) { 558 if (stripes[s].done) 559 continue; 560 execute_stripe(&stripes[s], bad_extent); 561 } 562 563 for (size_t s = 0; s < stripes_cnt; s++) { 564 if (stripes[s].done) 565 continue; 566 wait_for_stripe(&stripes[s]); 567 } 568 569 hr_raid5_vol_state_eval(vol); 570 571 rc = EOK; 572 573 fibril_rwlock_read_lock(&vol->states_lock); 574 575 if (vol->state == HR_VOL_FAULTY) { 576 fibril_rwlock_read_unlock(&vol->states_lock); 577 rc = EIO; 578 goto end; 579 } 580 581 fibril_rwlock_read_unlock(&vol->states_lock); 582 583 for (size_t s = 0; s < stripes_cnt; s++) 584 if (stripes[s].rc == EAGAIN) 585 goto retry; 586 587 /* all stripes are done */ 189 588 end: 190 fibril_mutex_unlock(&vol->lock); 589 fibril_rwlock_read_unlock(&vol->extents_lock); 590 591 for (size_t i = 0; i < stripes_cnt; i++) 592 hr_range_lock_release(rlps[i]); 593 594 hr_destroy_stripes(stripes, stripes_cnt); 191 595 192 596 return rc; 193 }194 195 void hr_raid5_ext_state_cb(hr_volume_t *vol, size_t extent,196 errno_t rc)197 {198 if (rc == ENOENT)199 hr_update_ext_state(vol, extent, HR_EXT_MISSING);200 else if (rc != EOK)201 hr_update_ext_state(vol, extent, HR_EXT_FAILED);202 }203 204 static errno_t hr_raid5_bd_open(bd_srvs_t *bds, bd_srv_t *bd)205 {206 HR_DEBUG("%s()\n", __func__);207 208 hr_volume_t *vol = bd->srvs->sarg;209 210 atomic_fetch_add_explicit(&vol->open_cnt, 1, memory_order_relaxed);211 212 return EOK;213 }214 215 static errno_t hr_raid5_bd_close(bd_srv_t *bd)216 {217 HR_DEBUG("%s()\n", __func__);218 219 hr_volume_t *vol = bd->srvs->sarg;220 221 atomic_fetch_sub_explicit(&vol->open_cnt, 1, memory_order_relaxed);222 223 return EOK;224 }225 226 static errno_t hr_raid5_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt)227 {228 return hr_raid5_bd_op(HR_BD_SYNC, bd, ba, cnt, NULL, NULL, 0);229 }230 231 static errno_t hr_raid5_bd_read_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,232 void *buf, size_t size)233 {234 return hr_raid5_bd_op(HR_BD_READ, bd, ba, cnt, buf, NULL, size);235 }236 237 static errno_t hr_raid5_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,238 const void *data, size_t size)239 {240 return hr_raid5_bd_op(HR_BD_WRITE, bd, ba, cnt, NULL, data, size);241 597 } 242 598 … … 257 613 } 258 614 259 static errno_t hr_raid5_vol_usable(hr_volume_t *vol) 260 { 261 if (vol->state == HR_VOL_ONLINE || 262 vol->state == HR_VOL_DEGRADED || 263 vol->state == HR_VOL_REBUILD) 264 return EOK; 265 return EIO; 266 } 267 268 /* 269 * Returns (-1) if all extents are online, 270 * else returns index of first bad one. 271 */ 272 static ssize_t hr_raid5_get_bad_ext(hr_volume_t *vol) 273 { 274 for (size_t i = 0; i < vol->extent_no; i++) 275 if (vol->extents[i].state != HR_EXT_ONLINE) 276 return i; 277 return -1; 278 } 279 280 static errno_t hr_raid5_update_vol_state(hr_volume_t *vol) 281 { 282 hr_vol_state_t old_state = vol->state; 615 static void hr_raid5_vol_state_eval_forced(hr_volume_t *vol) 616 { 617 fibril_rwlock_read_lock(&vol->extents_lock); 618 fibril_rwlock_write_lock(&vol->states_lock); 619 620 hr_vol_state_t state = vol->state; 621 283 622 size_t bad = 0; 284 623 for (size_t i = 0; i < vol->extent_no; i++) … … 288 627 switch (bad) { 289 628 case 0: 290 if ( old_state != HR_VOL_ONLINE)629 if (state != HR_VOL_ONLINE) 291 630 hr_update_vol_state(vol, HR_VOL_ONLINE); 292 return EOK;631 break; 293 632 case 1: 294 if (old_state != HR_VOL_DEGRADED && 295 old_state != HR_VOL_REBUILD) { 296 633 if (state != HR_VOL_DEGRADED && state != HR_VOL_REBUILD) 297 634 hr_update_vol_state(vol, HR_VOL_DEGRADED); 298 635 299 if (vol->hotspare_no > 0) { 636 if (state != HR_VOL_REBUILD) { 637 /* XXX: allow REBUILD on INVALID extents */ 638 fibril_mutex_lock(&vol->hotspare_lock); 639 size_t hs_no = vol->hotspare_no; 640 fibril_mutex_unlock(&vol->hotspare_lock); 641 if (hs_no > 0) { 300 642 fid_t fib = fibril_create(hr_raid5_rebuild, 301 643 vol); 302 644 if (fib == 0) 303 return ENOMEM;645 break; 304 646 fibril_start(fib); 305 647 fibril_detach(fib); 306 648 } 307 649 } 308 return EOK;650 break; 309 651 default: 310 if ( old_state != HR_VOL_FAULTY)652 if (state != HR_VOL_FAULTY) 311 653 hr_update_vol_state(vol, HR_VOL_FAULTY); 312 return EIO; 313 } 654 break; 655 } 656 657 fibril_rwlock_write_unlock(&vol->states_lock); 658 fibril_rwlock_read_unlock(&vol->extents_lock); 314 659 } 315 660 … … 324 669 } 325 670 326 static errno_t hr_raid5_read_degraded(hr_volume_t *vol, uint64_t bad, 327 uint64_t block, void *data, size_t cnt) 328 { 329 errno_t rc; 330 size_t i; 331 void *xorbuf; 332 void *buf; 333 uint64_t len = vol->bsize * cnt; 334 335 xorbuf = malloc(len); 336 if (xorbuf == NULL) 337 return ENOMEM; 338 339 buf = malloc(len); 340 if (buf == NULL) { 341 free(xorbuf); 342 return ENOMEM; 343 } 344 345 /* read all other extents in the stripe */ 346 bool first = true; 347 for (i = 0; i < vol->extent_no; i++) { 348 if (i == bad) 349 continue; 350 351 if (first) { 352 rc = block_read_direct(vol->extents[i].svc_id, block, 353 cnt, xorbuf); 354 if (rc != EOK) 355 goto end; 356 357 first = false; 358 } else { 359 rc = block_read_direct(vol->extents[i].svc_id, block, 360 cnt, buf); 361 if (rc != EOK) 362 goto end; 363 xor(xorbuf, buf, len); 364 } 365 } 366 367 memcpy(data, xorbuf, len); 368 end: 369 free(xorbuf); 370 free(buf); 371 return rc; 372 } 373 374 static errno_t hr_raid5_write(hr_volume_t *vol, uint64_t p_extent, 375 uint64_t extent, aoff64_t ba, const void *data, size_t cnt) 376 { 377 errno_t rc; 378 size_t i; 379 void *xorbuf; 380 void *buf; 381 uint64_t len = vol->bsize * cnt; 382 383 ssize_t bad = hr_raid5_get_bad_ext(vol); 384 if (bad == -1 || (size_t)bad == p_extent) { 385 rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt, 386 data); 387 if (rc != EOK) 388 return rc; 389 /* 390 * DEGRADED parity - skip parity write 391 */ 392 if ((size_t)bad == p_extent) 393 return EOK; 394 395 rc = hr_raid5_write_parity(vol, p_extent, extent, ba, data, 396 cnt); 397 return rc; 398 } 399 400 xorbuf = malloc(len); 401 if (xorbuf == NULL) 402 return ENOMEM; 403 404 buf = malloc(len); 405 if (buf == NULL) { 406 free(xorbuf); 407 return ENOMEM; 408 } 409 410 if (extent == (size_t)bad) { 411 /* 412 * new parity = read other and xor in new data 413 * 414 * write new parity 415 */ 416 bool first = true; 417 for (i = 0; i < vol->extent_no; i++) { 418 if (i == (size_t)bad) 419 continue; 420 if (i == p_extent) 421 continue; 422 if (first) { 423 rc = block_read_direct(vol->extents[i].svc_id, 424 ba, cnt, xorbuf); 425 if (rc != EOK) 426 goto end; 427 428 first = false; 429 } else { 430 rc = block_read_direct(vol->extents[i].svc_id, 431 ba, cnt, buf); 432 if (rc != EOK) 433 goto end; 434 xor(xorbuf, buf, len); 435 } 436 } 437 xor(xorbuf, data, len); 438 rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt, 439 xorbuf); 440 if (rc != EOK) 441 goto end; 442 } else { 443 /* 444 * new parity = xor original data and old parity and new data 445 * 446 * write parity, new data 447 */ 448 rc = block_read_direct(vol->extents[extent].svc_id, ba, cnt, 449 xorbuf); 450 if (rc != EOK) 451 goto end; 452 rc = block_read_direct(vol->extents[p_extent].svc_id, ba, cnt, 453 buf); 454 if (rc != EOK) 455 goto end; 456 457 xor(xorbuf, buf, len); 458 459 xor(xorbuf, data, len); 460 461 rc = block_write_direct(vol->extents[p_extent].svc_id, ba, cnt, 462 xorbuf); 463 if (rc != EOK) 464 goto end; 465 rc = block_write_direct(vol->extents[extent].svc_id, ba, cnt, 466 data); 467 if (rc != EOK) 468 goto end; 469 } 470 end: 471 free(xorbuf); 472 free(buf); 473 return rc; 474 } 475 476 static errno_t hr_raid5_write_parity(hr_volume_t *vol, uint64_t p_extent, 477 uint64_t extent, uint64_t block, const void *data, size_t cnt) 478 { 479 errno_t rc; 480 size_t i; 481 void *xorbuf; 482 void *buf; 483 uint64_t len = vol->bsize * cnt; 484 485 xorbuf = malloc(len); 486 if (xorbuf == NULL) 487 return ENOMEM; 488 489 buf = malloc(len); 490 if (buf == NULL) { 491 free(xorbuf); 492 return ENOMEM; 493 } 494 495 bool first = true; 496 for (i = 0; i < vol->extent_no; i++) { 497 if (i == p_extent) 498 continue; 499 500 if (first) { 501 if (i == extent) { 502 memcpy(xorbuf, data, len); 503 } else { 504 rc = block_read_direct(vol->extents[i].svc_id, 505 block, cnt, xorbuf); 506 if (rc != EOK) 507 goto end; 508 } 509 510 first = false; 511 } else { 512 if (i == extent) { 513 xor(xorbuf, data, len); 514 } else { 515 rc = block_read_direct(vol->extents[i].svc_id, 516 block, cnt, buf); 517 if (rc != EOK) 518 goto end; 519 520 xor(xorbuf, buf, len); 521 } 522 } 523 } 524 525 rc = block_write_direct(vol->extents[p_extent].svc_id, block, cnt, 526 xorbuf); 527 end: 528 free(xorbuf); 529 free(buf); 530 return rc; 531 } 532 533 static errno_t hr_raid5_bd_op(hr_bd_op_type_t type, bd_srv_t *bd, aoff64_t ba, 534 size_t cnt, void *dst, const void *src, size_t size) 535 { 536 hr_volume_t *vol = bd->srvs->sarg; 537 errno_t rc; 538 uint64_t phys_block, len; 539 size_t left; 540 const uint8_t *data_write = src; 541 uint8_t *data_read = dst; 542 543 /* propagate sync */ 544 if (type == HR_BD_SYNC && ba == 0 && cnt == 0) { 545 hr_sync_all_extents(vol); 546 rc = hr_raid5_update_vol_state(vol); 547 return rc; 548 } 549 550 if (type == HR_BD_READ || type == HR_BD_WRITE) 551 if (size < cnt * vol->bsize) 552 return EINVAL; 553 554 rc = hr_check_ba_range(vol, cnt, ba); 555 if (rc != EOK) 556 return rc; 557 558 hr_layout_t layout = vol->layout; 559 hr_level_t level = vol->level; 560 561 uint64_t strip_size = vol->strip_size / vol->bsize; /* in blocks */ 562 uint64_t stripe = (ba / strip_size); /* stripe number */ 563 564 /* parity extent */ 565 uint64_t p_extent; 566 if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) { 567 p_extent = 0; 568 } else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) { 569 p_extent = vol->extent_no - 1; 570 } else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_0R) { 571 p_extent = (stripe / (vol->extent_no - 1)) % vol->extent_no; 572 } else if (level == HR_LVL_5 && 573 (layout == HR_LAYOUT_RAID5_NR || layout == HR_LAYOUT_RAID5_NC)) { 574 p_extent = (vol->extent_no - 1) - 575 (stripe / (vol->extent_no - 1)) % vol->extent_no; 576 } else { 577 return EINVAL; 578 } 579 580 uint64_t extent; 581 if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) { 582 extent = (stripe % (vol->extent_no - 1)) + 1; 583 } else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) { 584 extent = stripe % (vol->extent_no - 1); 585 } else if (level == HR_LVL_5 && 586 (layout == HR_LAYOUT_RAID5_0R || layout == HR_LAYOUT_RAID5_NR)) { 587 if ((stripe % (vol->extent_no - 1)) < p_extent) 588 extent = stripe % (vol->extent_no - 1); 589 else 590 extent = (stripe % (vol->extent_no - 1)) + 1; 591 } else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_NC) { 592 extent = 593 ((stripe % (vol->extent_no - 1)) + p_extent + 1) % 594 vol->extent_no; 595 } else { 596 return EINVAL; 597 } 598 599 uint64_t ext_stripe = stripe / (vol->extent_no - 1); /* stripe level */ 600 uint64_t strip_off = ba % strip_size; /* strip offset */ 601 602 fibril_mutex_lock(&vol->lock); 603 604 rc = hr_raid5_vol_usable(vol); 605 if (rc != EOK) { 606 fibril_mutex_unlock(&vol->lock); 607 return EIO; 608 } 609 610 left = cnt; 611 612 fibril_rwlock_write_lock(&vol->states_lock); 613 while (left != 0) { 614 phys_block = ext_stripe * strip_size + strip_off; 615 cnt = min(left, strip_size - strip_off); 616 len = vol->bsize * cnt; 617 hr_add_ba_offset(vol, &phys_block); 618 switch (type) { 619 case HR_BD_SYNC: 620 if (vol->extents[extent].state != HR_EXT_ONLINE) 621 break; 622 rc = block_sync_cache(vol->extents[extent].svc_id, 623 phys_block, cnt); 624 /* allow unsupported sync */ 625 if (rc == ENOTSUP) 626 rc = EOK; 627 break; 628 case HR_BD_READ: 629 retry_read: 630 ssize_t bad = hr_raid5_get_bad_ext(vol); 631 if (bad > -1 && extent == (size_t)bad) { 632 rc = hr_raid5_read_degraded(vol, bad, 633 phys_block, data_read, cnt); 634 } else { 635 rc = block_read_direct(vol->extents[extent].svc_id, 636 phys_block, cnt, data_read); 637 } 638 data_read += len; 639 break; 640 case HR_BD_WRITE: 641 retry_write: 642 rc = hr_raid5_write(vol, p_extent, extent, phys_block, 643 data_write, cnt); 644 data_write += len; 645 break; 671 static size_t hr_raid5_parity_extent(hr_level_t level, 672 hr_layout_t layout, size_t extent_no, uint64_t strip_no) 673 { 674 switch (level) { 675 case HR_LVL_4: 676 switch (layout) { 677 case HR_LAYOUT_RAID4_0: 678 return (0); 679 case HR_LAYOUT_RAID4_N: 680 return (extent_no - 1); 646 681 default: 647 rc = EINVAL; 648 goto error; 649 } 650 651 if (rc == ENOMEM) 652 goto error; 653 654 hr_raid5_ext_state_cb(vol, extent, rc); 655 656 if (rc != EOK) { 657 rc = hr_raid5_update_vol_state(vol); 658 if (rc == EOK) { 659 /* 660 * State changed from ONLINE -> DEGRADED, 661 * rewind and retry 662 */ 663 if (type == HR_BD_WRITE) { 664 data_write -= len; 665 goto retry_write; 666 } else if (type == HR_BD_WRITE) { 667 data_read -= len; 668 goto retry_read; 669 } 670 } else { 671 rc = EIO; 672 goto error; 673 } 674 } 675 676 left -= cnt; 677 strip_off = 0; 678 stripe++; 679 680 ext_stripe = stripe / (vol->extent_no - 1); /* stripe level */ 681 682 if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_0R) { 683 p_extent = 684 (stripe / (vol->extent_no - 1)) % vol->extent_no; 685 } else if (level == HR_LVL_5 && 686 (layout == HR_LAYOUT_RAID5_NR || layout == HR_LAYOUT_RAID5_NC)) { 687 p_extent = (vol->extent_no - 1) - 688 (stripe / (vol->extent_no - 1)) % vol->extent_no; 689 } 690 691 if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_0) { 692 extent = (stripe % (vol->extent_no - 1)) + 1; 693 } else if (level == HR_LVL_4 && layout == HR_LAYOUT_RAID4_N) { 694 extent = stripe % (vol->extent_no - 1); 695 } else if (level == HR_LVL_5 && 696 (layout == HR_LAYOUT_RAID5_0R || layout == HR_LAYOUT_RAID5_NR)) { 697 if ((stripe % (vol->extent_no - 1)) < p_extent) 698 extent = stripe % (vol->extent_no - 1); 682 assert(0 && "invalid layout configuration"); 683 } 684 case HR_LVL_5: 685 switch (layout) { 686 case HR_LAYOUT_RAID5_0R: 687 return ((strip_no / (extent_no - 1)) % extent_no); 688 case HR_LAYOUT_RAID5_NR: 689 case HR_LAYOUT_RAID5_NC: 690 return ((extent_no - 1) - 691 (strip_no / (extent_no - 1)) % extent_no); 692 default: 693 assert(0 && "invalid layout configuration"); 694 } 695 default: 696 assert(0 && "invalid layout configuration"); 697 } 698 } 699 700 static size_t hr_raid5_data_extent(hr_level_t level, 701 hr_layout_t layout, size_t extent_no, uint64_t strip_no, size_t p_extent) 702 { 703 switch (level) { 704 case HR_LVL_4: 705 switch (layout) { 706 case HR_LAYOUT_RAID4_0: 707 return ((strip_no % (extent_no - 1)) + 1); 708 case HR_LAYOUT_RAID4_N: 709 return (strip_no % (extent_no - 1)); 710 default: 711 assert(0 && "invalid layout configuration"); 712 } 713 case HR_LVL_5: 714 switch (layout) { 715 case HR_LAYOUT_RAID5_0R: 716 case HR_LAYOUT_RAID5_NR: 717 if ((strip_no % (extent_no - 1)) < p_extent) 718 return (strip_no % (extent_no - 1)); 699 719 else 700 extent = (stripe % (vol->extent_no - 1)) + 1; 701 } else if (level == HR_LVL_5 && layout == HR_LAYOUT_RAID5_NC) { 702 extent = 703 ((stripe % (vol->extent_no - 1)) + p_extent + 1) % 704 vol->extent_no; 705 } 706 } 707 708 error: 709 (void)hr_raid5_update_vol_state(vol); 710 fibril_rwlock_write_unlock(&vol->states_lock); 711 fibril_mutex_unlock(&vol->lock); 712 return rc; 720 return ((strip_no % (extent_no - 1)) + 1); 721 case HR_LAYOUT_RAID5_NC: 722 return (((strip_no % (extent_no - 1)) + p_extent + 1) % 723 extent_no); 724 default: 725 assert(0 && "invalid layout configuration"); 726 } 727 default: 728 assert(0 && "invalid layout configuration"); 729 } 713 730 } 714 731 … … 721 738 void *buf = NULL, *xorbuf = NULL; 722 739 723 fibril_mutex_lock(&vol->lock);724 740 fibril_rwlock_read_lock(&vol->extents_lock); 725 741 fibril_rwlock_write_lock(&vol->states_lock); … … 785 801 786 802 uint64_t ba = 0, cnt; 787 hr_add_ ba_offset(vol, &ba);803 hr_add_data_offset(vol, &ba); 788 804 789 805 while (left != 0) { … … 852 868 fibril_rwlock_write_unlock(&vol->states_lock); 853 869 fibril_rwlock_read_unlock(&vol->extents_lock); 854 fibril_mutex_unlock(&vol->lock);855 870 856 871 rc = vol->meta_ops->save(vol, WITH_STATE_CALLBACK); 857 872 858 fibril_mutex_lock(&vol->lock);859 873 fibril_rwlock_read_lock(&vol->extents_lock); 860 874 fibril_rwlock_write_lock(&vol->states_lock); 861 875 862 876 end: 863 (void)hr_raid5_update_vol_state(vol);877 hr_raid5_vol_state_eval_forced(vol); 864 878 865 879 fibril_rwlock_write_unlock(&vol->states_lock); 866 880 fibril_rwlock_read_unlock(&vol->extents_lock); 867 fibril_mutex_unlock(&vol->lock);868 881 869 882 if (buf != NULL) -
uspace/srv/bd/hr/util.c
r9d1685b ra3486f2 150 150 } 151 151 152 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t)); 152 if (level == HR_LVL_4 || level == HR_LVL_5) 153 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_raid5_t)); 154 else 155 vol->fge = hr_fpool_create(16, 32, sizeof(hr_io_t)); 156 153 157 if (vol->fge == NULL) { 154 158 rc = ENOMEM; … … 164 168 165 169 vol->state = HR_VOL_NONE; 166 167 fibril_mutex_initialize(&vol->lock); /* XXX: will remove this */168 170 169 171 fibril_mutex_initialize(&vol->md_lock); … … 496 498 "(%" PRIun ")\n", vol->devname, hs_idx, old, new); 497 499 vol->hotspares[hs_idx].svc_id = new; 498 }499 500 /*501 * Do a whole sync (ba = 0, cnt = 0) across all extents,502 * and update extent state. *For now*, the caller has to503 * update volume state after the syncs.504 *505 * TODO: add update_vol_state fcn ptr for each raid506 */507 void hr_sync_all_extents(hr_volume_t *vol)508 {509 errno_t rc;510 511 fibril_mutex_lock(&vol->lock);512 for (size_t i = 0; i < vol->extent_no; i++) {513 if (vol->extents[i].state != HR_EXT_ONLINE)514 continue;515 rc = block_sync_cache(vol->extents[i].svc_id, 0, 0);516 if (rc == ENOMEM || rc == ENOTSUP)517 continue;518 if (rc != EOK) {519 if (rc == ENOENT)520 hr_update_ext_state(vol, i, HR_EXT_MISSING);521 else if (rc != EOK)522 hr_update_ext_state(vol, i, HR_EXT_FAILED);523 }524 }525 fibril_mutex_unlock(&vol->lock);526 500 } 527 501 … … 1116 1090 } 1117 1091 1092 void hr_raid5_xor(void *dst, const void *src, size_t size) 1093 { 1094 size_t i; 1095 uint64_t *d = dst; 1096 const uint64_t *s = src; 1097 1098 for (i = 0; i < size / sizeof(uint64_t); ++i) 1099 *d++ ^= *s++; 1100 } 1101 1118 1102 /** @} 1119 1103 */ -
uspace/srv/bd/hr/util.h
r9d1685b ra3486f2 110 110 extern errno_t hr_util_try_assemble(hr_config_t *, size_t *); 111 111 extern errno_t hr_util_add_hotspare(hr_volume_t *, service_id_t); 112 extern void hr_raid5_xor(void *, const void *, size_t); 112 113 113 114 #endif -
uspace/srv/bd/hr/var.h
r9d1685b ra3486f2 52 52 struct hr_volume; 53 53 typedef struct hr_volume hr_volume_t; 54 typedef struct hr_stripe hr_stripe_t; 54 55 typedef struct hr_metadata hr_metadata_t; 55 56 typedef struct hr_superblock_ops hr_superblock_ops_t; … … 69 70 service_id_t svc_id; /* service id */ 70 71 71 fibril_mutex_t lock; /* XXX: gone after para */72 72 list_t range_lock_list; /* list of range locks */ 73 73 fibril_mutex_t range_lock_list_lock; /* range locks list lock */ 74 74 75 hr_fpool_t *fge; /* fibril pool */ 75 76
Note:
See TracChangeset
for help on using the changeset viewer.