Context Navigation

source: mainline/uspace/srv/bd/hr/raid1.c@ 23df41b

Visit:

Last change on this file since 23df41b was 23df41b, checked in by Miroslav Cimerman <mc@…>, 7 months ago
hr: move hr_process_deferred_invalidations() to util
Property mode set to `100644`
File size: 19.6 KB

Line
1	/*
2	* Copyright (c) 2025 Miroslav Cimerman
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29	/** @addtogroup hr
30	* @{
31	*/
32	/**
33	* @file
34	*/
35
36	#include <bd_srv.h>
37	#include <block.h>
38	#include <errno.h>
39	#include <hr.h>
40	#include <io/log.h>
41	#include <ipc/hr.h>
42	#include <ipc/services.h>
43	#include <loc.h>
44	#include <task.h>
45	#include <stdatomic.h>
46	#include <stdio.h>
47	#include <stdlib.h>
48	#include <str_error.h>
49
50	#include "fge.h"
51	#include "io.h"
52	#include "superblock.h"
53	#include "util.h"
54	#include "var.h"
55
56	extern loc_srv_t *hr_srv;
57
58	static void hr_raid1_update_vol_status(hr_volume_t *);
59	static void hr_raid1_ext_state_callback(hr_volume_t *, size_t, errno_t);
60	static size_t hr_raid1_count_good_extents(hr_volume_t *, uint64_t, size_t,
61	uint64_t);
62	static errno_t hr_raid1_bd_op(hr_bd_op_type_t, bd_srv_t *, aoff64_t, size_t,
63	void , const void , size_t);
64	static errno_t hr_raid1_rebuild(void *);
65	static errno_t init_rebuild(hr_volume_t , size_t );
66	static errno_t swap_hs(hr_volume_t *, size_t, size_t);
67	static errno_t hr_raid1_restore_blocks(hr_volume_t *, size_t, uint64_t, size_t,
68	void *);
69
70	/* bdops */
71	static errno_t hr_raid1_bd_open(bd_srvs_t , bd_srv_t );
72	static errno_t hr_raid1_bd_close(bd_srv_t *);
73	static errno_t hr_raid1_bd_read_blocks(bd_srv_t , aoff64_t, size_t, void ,
74	size_t);
75	static errno_t hr_raid1_bd_sync_cache(bd_srv_t *, aoff64_t, size_t);
76	static errno_t hr_raid1_bd_write_blocks(bd_srv_t *, aoff64_t, size_t,
77	const void *, size_t);
78	static errno_t hr_raid1_bd_get_block_size(bd_srv_t , size_t );
79	static errno_t hr_raid1_bd_get_num_blocks(bd_srv_t , aoff64_t );
80
81	static bd_ops_t hr_raid1_bd_ops = {
82	.open = hr_raid1_bd_open,
83	.close = hr_raid1_bd_close,
84	.sync_cache = hr_raid1_bd_sync_cache,
85	.read_blocks = hr_raid1_bd_read_blocks,
86	.write_blocks = hr_raid1_bd_write_blocks,
87	.get_block_size = hr_raid1_bd_get_block_size,
88	.get_num_blocks = hr_raid1_bd_get_num_blocks
89	};
90
91	errno_t hr_raid1_create(hr_volume_t *new_volume)
92	{
93	errno_t rc;
94
95	assert(new_volume->level == HR_LVL_1);
96
97	if (new_volume->extent_no < 2) {
98	HR_ERROR("RAID 1 array needs at least 2 devices\n");
99	return EINVAL;
100	}
101
102	bd_srvs_init(&new_volume->hr_bds);
103	new_volume->hr_bds.ops = &hr_raid1_bd_ops;
104	new_volume->hr_bds.sarg = new_volume;
105
106	/* force volume state update */
107	atomic_store(&new_volume->state_changed, true);
108	hr_raid1_update_vol_status(new_volume);
109
110	fibril_rwlock_read_lock(&new_volume->states_lock);
111	hr_vol_status_t state = new_volume->status;
112	fibril_rwlock_read_unlock(&new_volume->states_lock);
113	if (state == HR_VOL_FAULTY \|\| state == HR_VOL_INVALID)
114	return EINVAL;
115
116	rc = hr_register_volume(new_volume);
117
118	return rc;
119	}
120
121	errno_t hr_raid1_init(hr_volume_t *vol)
122	{
123	errno_t rc;
124	size_t bsize;
125	uint64_t total_blkno;
126
127	assert(vol->level == HR_LVL_1);
128
129	rc = hr_check_devs(vol, &total_blkno, &bsize);
130	if (rc != EOK)
131	return rc;
132
133	vol->nblocks = total_blkno / vol->extent_no;
134	vol->bsize = bsize;
135	vol->data_offset = HR_DATA_OFF;
136	vol->data_blkno = vol->nblocks - vol->data_offset;
137	vol->strip_size = 0;
138
139	return EOK;
140	}
141
142	void hr_raid1_status_event(hr_volume_t *vol)
143	{
144	hr_raid1_update_vol_status(vol);
145	}
146
147	errno_t hr_raid1_add_hotspare(hr_volume_t *vol, service_id_t hotspare)
148	{
149	HR_DEBUG("hr_raid1_add_hotspare()\n");
150
151	errno_t rc = EOK;
152
153	fibril_mutex_lock(&vol->hotspare_lock);
154
155	if (vol->hotspare_no >= HR_MAX_HOTSPARES) {
156	HR_ERROR("hr_raid1_add_hotspare(): cannot add more hotspares "
157	"to \"%s\"\n", vol->devname);
158	rc = ELIMIT;
159	goto error;
160	}
161
162	size_t hs_idx = vol->hotspare_no;
163
164	vol->hotspare_no++;
165
166	hr_update_hotspare_svc_id(vol, hs_idx, hotspare);
167	hr_update_hotspare_status(vol, hs_idx, HR_EXT_HOTSPARE);
168
169	atomic_store(&vol->state_changed, true);
170	error:
171	fibril_mutex_unlock(&vol->hotspare_lock);
172
173	hr_raid1_update_vol_status(vol);
174
175	return rc;
176	}
177
178	static errno_t hr_raid1_bd_open(bd_srvs_t bds, bd_srv_t bd)
179	{
180	HR_DEBUG("hr_bd_open()\n");
181	return EOK;
182	}
183
184	static errno_t hr_raid1_bd_close(bd_srv_t *bd)
185	{
186	HR_DEBUG("hr_bd_close()\n");
187	return EOK;
188	}
189
190	static errno_t hr_raid1_bd_sync_cache(bd_srv_t *bd, aoff64_t ba, size_t cnt)
191	{
192	return hr_raid1_bd_op(HR_BD_SYNC, bd, ba, cnt, NULL, NULL, 0);
193	}
194
195	static errno_t hr_raid1_bd_read_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
196	void *buf, size_t size)
197	{
198	return hr_raid1_bd_op(HR_BD_READ, bd, ba, cnt, buf, NULL, size);
199	}
200
201	static errno_t hr_raid1_bd_write_blocks(bd_srv_t *bd, aoff64_t ba, size_t cnt,
202	const void *data, size_t size)
203	{
204	return hr_raid1_bd_op(HR_BD_WRITE, bd, ba, cnt, NULL, data, size);
205	}
206
207	static errno_t hr_raid1_bd_get_block_size(bd_srv_t bd, size_t rsize)
208	{
209	hr_volume_t *vol = bd->srvs->sarg;
210
211	*rsize = vol->bsize;
212	return EOK;
213	}
214
215	static errno_t hr_raid1_bd_get_num_blocks(bd_srv_t bd, aoff64_t rnb)
216	{
217	hr_volume_t *vol = bd->srvs->sarg;
218
219	*rnb = vol->data_blkno;
220	return EOK;
221	}
222
223	static void hr_raid1_update_vol_status(hr_volume_t *vol)
224	{
225	bool exp = true;
226
227	if (!atomic_compare_exchange_strong(&vol->state_changed, &exp, false))
228	return;
229
230	if (atomic_compare_exchange_strong(&vol->pending_invalidation, &exp,
231	false)) {
232	fibril_mutex_lock(&vol->deferred_list_lock);
233	hr_process_deferred_invalidations(vol);
234	fibril_mutex_unlock(&vol->deferred_list_lock);
235	}
236
237	fibril_rwlock_read_lock(&vol->extents_lock);
238	fibril_rwlock_read_lock(&vol->states_lock);
239
240	hr_vol_status_t old_state = vol->status;
241	size_t healthy = hr_count_extents(vol, HR_EXT_ONLINE);
242
243	fibril_rwlock_read_unlock(&vol->states_lock);
244	fibril_rwlock_read_unlock(&vol->extents_lock);
245
246	if (healthy == 0) {
247	if (old_state != HR_VOL_FAULTY) {
248	fibril_rwlock_write_lock(&vol->states_lock);
249	hr_update_vol_status(vol, HR_VOL_FAULTY);
250	fibril_rwlock_write_unlock(&vol->states_lock);
251	}
252	} else if (healthy < vol->extent_no) {
253	if (old_state != HR_VOL_REBUILD &&
254	old_state != HR_VOL_DEGRADED) {
255	fibril_rwlock_write_lock(&vol->states_lock);
256	hr_update_vol_status(vol, HR_VOL_DEGRADED);
257	fibril_rwlock_write_unlock(&vol->states_lock);
258	}
259
260	if (old_state != HR_VOL_REBUILD) {
261	if (vol->hotspare_no > 0) {
262	fid_t fib = fibril_create(hr_raid1_rebuild,
263	vol);
264	if (fib == 0)
265	return;
266	fibril_start(fib);
267	fibril_detach(fib);
268	}
269	}
270	} else {
271	if (old_state != HR_VOL_ONLINE) {
272	fibril_rwlock_write_lock(&vol->states_lock);
273	hr_update_vol_status(vol, HR_VOL_ONLINE);
274	fibril_rwlock_write_unlock(&vol->states_lock);
275	}
276	}
277	}
278
279	static void hr_raid1_ext_state_callback(hr_volume_t *vol, size_t extent,
280	errno_t rc)
281	{
282	if (rc == EOK)
283	return;
284
285	assert(fibril_rwlock_is_locked(&vol->extents_lock));
286
287	fibril_rwlock_write_lock(&vol->states_lock);
288
289	switch (rc) {
290	case ENOMEM:
291	fibril_mutex_lock(&vol->deferred_list_lock);
292
293	service_id_t invalid_svc_id = vol->extents[extent].svc_id;
294
295	list_foreach(vol->deferred_invalidations_list, link,
296	hr_deferred_invalidation_t, di) {
297	if (di->svc_id == invalid_svc_id) {
298	assert(vol->extents[extent].status ==
299	HR_EXT_INVALID);
300	goto deferring_end;
301	}
302	}
303
304	assert(vol->extents[extent].svc_id != HR_EXT_INVALID);
305
306	hr_update_ext_status(vol, extent, HR_EXT_INVALID);
307
308	size_t i = list_count(&vol->deferred_invalidations_list);
309	vol->deferred_inval[i].svc_id = invalid_svc_id;
310	vol->deferred_inval[i].index = extent;
311
312	list_append(&vol->deferred_inval[i].link,
313	&vol->deferred_invalidations_list);
314
315	atomic_store(&vol->pending_invalidation, true);
316	deferring_end:
317
318	fibril_mutex_unlock(&vol->deferred_list_lock);
319	break;
320	case ENOENT:
321	hr_update_ext_status(vol, extent, HR_EXT_MISSING);
322	break;
323	default:
324	hr_update_ext_status(vol, extent, HR_EXT_FAILED);
325	}
326
327	atomic_store(&vol->state_changed, true);
328
329	fibril_rwlock_write_unlock(&vol->states_lock);
330	}
331
332	static size_t hr_raid1_count_good_extents(hr_volume_t *vol, uint64_t ba,
333	size_t cnt, uint64_t rebuild_blk)
334	{
335	assert(fibril_rwlock_is_locked(&vol->extents_lock));
336	assert(fibril_rwlock_is_locked(&vol->states_lock));
337
338	size_t count = 0;
339	for (size_t i = 0; i < vol->extent_no; i++) {
340	if (vol->extents[i].status == HR_EXT_ONLINE \|\|
341	(vol->extents[i].status == HR_EXT_REBUILD &&
342	ba < rebuild_blk)) {
343	count++;
344	}
345	}
346
347	return count;
348
349	}
350
351	static errno_t hr_raid1_bd_op(hr_bd_op_type_t type, bd_srv_t *bd, aoff64_t ba,
352	size_t cnt, void data_read, const void data_write, size_t size)
353	{
354	hr_volume_t *vol = bd->srvs->sarg;
355	hr_range_lock_t *rl = NULL;
356	errno_t rc;
357	size_t i;
358	uint64_t rebuild_blk;
359
360	fibril_rwlock_read_lock(&vol->states_lock);
361	hr_vol_status_t vol_state = vol->status;
362	fibril_rwlock_read_unlock(&vol->states_lock);
363
364	if (vol_state == HR_VOL_FAULTY \|\| vol_state == HR_VOL_INVALID)
365	return EIO;
366
367	if (type == HR_BD_READ \|\| type == HR_BD_WRITE)
368	if (size < cnt * vol->bsize)
369	return EINVAL;
370
371	rc = hr_check_ba_range(vol, cnt, ba);
372	if (rc != EOK)
373	return rc;
374
375	/* allow full dev sync */
376	if (type != HR_BD_SYNC \|\| ba != 0)
377	hr_add_ba_offset(vol, &ba);
378
379	/*
380	* this is to allow adding hotspare or start a rebuild on
381	* very busy array, because of how rwlocks are implemented
382	* in HelenOS (no writer priority, so if there are multiple
383	* continuos readers, writer will never own the lock)
384	*/
385	if (vol->halt_please) {
386	fibril_mutex_lock(&vol->halt_lock);
387	fibril_mutex_unlock(&vol->halt_lock);
388	}
389
390	/*
391	* extent order has to be locked for the whole IO duration,
392	* so that workers have consistent targets
393	*/
394	fibril_rwlock_read_lock(&vol->extents_lock);
395
396	size_t successful = 0;
397	switch (type) {
398	case HR_BD_READ:
399	rebuild_blk = atomic_load_explicit(&vol->rebuild_blk,
400	memory_order_relaxed);
401
402	for (i = 0; i < vol->extent_no; i++) {
403	fibril_rwlock_read_lock(&vol->states_lock);
404	hr_ext_status_t state = vol->extents[i].status;
405	fibril_rwlock_read_unlock(&vol->states_lock);
406
407	if (state != HR_EXT_ONLINE &&
408	(state != HR_EXT_REBUILD \|\|
409	ba + cnt - 1 >= rebuild_blk)) {
410	continue;
411	}
412
413	rc = block_read_direct(vol->extents[i].svc_id, ba, cnt,
414	data_read);
415
416	if (rc == ENOMEM && i + 1 == vol->extent_no)
417	goto end;
418
419	if (rc == ENOMEM)
420	continue;
421
422	if (rc != EOK) {
423	hr_raid1_ext_state_callback(vol, i, rc);
424	} else {
425	successful++;
426	break;
427	}
428	}
429	break;
430	case HR_BD_SYNC:
431	case HR_BD_WRITE:
432	if (type == HR_BD_WRITE) {
433	rl = hr_range_lock_acquire(vol, ba, cnt);
434	if (rl == NULL) {
435	rc = ENOMEM;
436	goto end;
437	}
438	}
439
440	fibril_rwlock_read_lock(&vol->states_lock);
441
442	rebuild_blk = atomic_load_explicit(&vol->rebuild_blk,
443	memory_order_relaxed);
444
445	size_t good = hr_raid1_count_good_extents(vol, ba, cnt,
446	rebuild_blk);
447
448	hr_fgroup_t *group = hr_fgroup_create(vol->fge, good);
449	if (group == NULL) {
450	if (type == HR_BD_WRITE)
451	hr_range_lock_release(rl);
452	rc = ENOMEM;
453	fibril_rwlock_read_unlock(&vol->states_lock);
454	goto end;
455	}
456
457	for (i = 0; i < vol->extent_no; i++) {
458	if (vol->extents[i].status != HR_EXT_ONLINE &&
459	(vol->extents[i].status != HR_EXT_REBUILD \|\|
460	ba >= rebuild_blk)) {
461	/*
462	* When the extent is being rebuilt,
463	* we only write to the part that is already
464	* rebuilt. If IO starts after vol->rebuild_blk
465	* we do not proceed, the write is going to
466	* be replicated later in the rebuild.
467	*/
468	continue;
469	}
470
471	hr_io_t *io = hr_fgroup_alloc(group);
472	io->extent = i;
473	io->data_write = data_write;
474	io->data_read = data_read;
475	io->ba = ba;
476	io->cnt = cnt;
477	io->type = type;
478	io->vol = vol;
479	io->state_callback = hr_raid1_ext_state_callback;
480
481	hr_fgroup_submit(group, hr_io_worker, io);
482	}
483
484	fibril_rwlock_read_unlock(&vol->states_lock);
485
486	(void)hr_fgroup_wait(group, &successful, NULL);
487
488	if (type == HR_BD_WRITE)
489	hr_range_lock_release(rl);
490
491	break;
492	default:
493	rc = EINVAL;
494	goto end;
495	}
496
497	if (successful > 0)
498	rc = EOK;
499	else
500	rc = EIO;
501
502	end:
503	fibril_rwlock_read_unlock(&vol->extents_lock);
504
505	hr_raid1_update_vol_status(vol);
506
507	return rc;
508	}
509
510	/*
511	* Put the last HOTSPARE extent in place
512	* of first that != ONLINE, and start the rebuild.
513	*/
514	static errno_t hr_raid1_rebuild(void *arg)
515	{
516	HR_DEBUG("hr_raid1_rebuild()\n");
517
518	hr_volume_t *vol = arg;
519	void *buf = NULL;
520	size_t rebuild_idx;
521	errno_t rc;
522
523	rc = init_rebuild(vol, &rebuild_idx);
524	if (rc != EOK)
525	return rc;
526
527	size_t left = vol->data_blkno;
528	size_t max_blks = DATA_XFER_LIMIT / vol->bsize;
529	buf = malloc(max_blks * vol->bsize);
530
531	size_t cnt;
532	uint64_t ba = 0;
533	hr_add_ba_offset(vol, &ba);
534
535	fibril_rwlock_read_lock(&vol->extents_lock);
536
537	hr_range_lock_t *rl = NULL;
538
539	while (left != 0) {
540	if (vol->halt_please) {
541	fibril_rwlock_read_unlock(&vol->extents_lock);
542	fibril_mutex_lock(&vol->halt_lock);
543	fibril_mutex_unlock(&vol->halt_lock);
544	fibril_rwlock_read_lock(&vol->extents_lock);
545	}
546
547	cnt = min(max_blks, left);
548
549	rl = hr_range_lock_acquire(vol, ba, cnt);
550	if (rl == NULL) {
551	rc = ENOMEM;
552	goto end;
553	}
554
555	atomic_store_explicit(&vol->rebuild_blk, ba,
556	memory_order_relaxed);
557
558	rc = hr_raid1_restore_blocks(vol, rebuild_idx, ba, cnt, buf);
559
560	hr_range_lock_release(rl);
561
562	if (rc != EOK)
563	goto end;
564
565	ba += cnt;
566	left -= cnt;
567	}
568
569	HR_DEBUG("hr_raid1_rebuild(): rebuild finished on \"%s\" (%lu), "
570	"extent no. %lu\n", vol->devname, vol->svc_id, rebuild_idx);
571
572	fibril_rwlock_write_lock(&vol->states_lock);
573
574	hr_update_ext_status(vol, rebuild_idx, HR_EXT_ONLINE);
575	/*
576	* We can be optimistic here, if some extents are
577	* still INVALID, FAULTY or MISSING, the update vol
578	* function will pick them up, and set the volume
579	* state accordingly.
580	*/
581	hr_update_vol_status(vol, HR_VOL_ONLINE);
582	atomic_store(&vol->state_changed, true);
583
584	fibril_rwlock_write_unlock(&vol->states_lock);
585
586	/*
587	* For now write metadata at the end, because
588	* we don't sync metada accross extents yet.
589	*/
590	hr_write_meta_to_ext(vol, rebuild_idx);
591	end:
592	if (rc != EOK) {
593	/*
594	* We can fail either because:
595	* - the rebuild extent failing or invalidation
596	* - there is are no ONLINE extents (vol is FAULTY)
597	* - we got ENOMEM on all READs (we also invalidate the
598	* rebuild extent here, for now)
599	*/
600	fibril_rwlock_write_lock(&vol->states_lock);
601	hr_update_vol_status(vol, HR_VOL_DEGRADED);
602	atomic_store(&vol->state_changed, true);
603	fibril_rwlock_write_unlock(&vol->states_lock);
604	}
605
606	fibril_rwlock_read_unlock(&vol->extents_lock);
607
608	hr_raid1_update_vol_status(vol);
609
610	if (buf != NULL)
611	free(buf);
612
613	return rc;
614	}
615
616	static errno_t init_rebuild(hr_volume_t vol, size_t rebuild_idx)
617	{
618	errno_t rc = EOK;
619
620	fibril_mutex_lock(&vol->halt_lock);
621	vol->halt_please = true;
622	fibril_rwlock_write_lock(&vol->extents_lock);
623	fibril_rwlock_write_lock(&vol->states_lock);
624	fibril_mutex_lock(&vol->hotspare_lock);
625
626	if (vol->hotspare_no == 0) {
627	HR_WARN("hr_raid1_rebuild(): no free hotspares on \"%s\", "
628	"aborting rebuild\n", vol->devname);
629	rc = EINVAL;
630	goto error;
631	}
632
633	size_t bad = vol->extent_no;
634	for (size_t i = 0; i < vol->extent_no; i++) {
635	if (vol->extents[i].status != HR_EXT_ONLINE) {
636	bad = i;
637	break;
638	}
639	}
640
641	if (bad == vol->extent_no) {
642	HR_WARN("hr_raid1_rebuild(): no bad extent on \"%s\", "
643	"aborting rebuild\n", vol->devname);
644	rc = EINVAL;
645	goto error;
646	}
647
648	size_t hotspare_idx = vol->hotspare_no - 1;
649
650	hr_ext_status_t hs_state = vol->hotspares[hotspare_idx].status;
651	if (hs_state != HR_EXT_HOTSPARE) {
652	HR_ERROR("hr_raid1_rebuild(): invalid hotspare state \"%s\", "
653	"aborting rebuild\n", hr_get_ext_status_msg(hs_state));
654	rc = EINVAL;
655	goto error;
656	}
657
658	rc = swap_hs(vol, bad, hotspare_idx);
659	if (rc != EOK) {
660	HR_ERROR("hr_raid1_rebuild(): swapping hotspare failed, "
661	"aborting rebuild\n");
662	goto error;
663	}
664
665	hr_extent_t *rebuild_ext = &vol->extents[bad];
666
667	HR_DEBUG("hr_raid1_rebuild(): starting REBUILD on extent no. %lu (%lu)"
668	"\n", bad, rebuild_ext->svc_id);
669
670	atomic_store_explicit(&vol->rebuild_blk, 0, memory_order_relaxed);
671
672	hr_update_ext_status(vol, bad, HR_EXT_REBUILD);
673	hr_update_vol_status(vol, HR_VOL_REBUILD);
674
675	*rebuild_idx = bad;
676	error:
677	fibril_mutex_unlock(&vol->hotspare_lock);
678	fibril_rwlock_write_unlock(&vol->states_lock);
679	fibril_rwlock_write_unlock(&vol->extents_lock);
680	vol->halt_please = false;
681	fibril_mutex_unlock(&vol->halt_lock);
682
683	return rc;
684	}
685
686	static errno_t swap_hs(hr_volume_t *vol, size_t bad, size_t hs)
687	{
688	HR_DEBUG("hr_raid1_rebuild(): swapping in hotspare\n");
689
690	service_id_t faulty_svc_id = vol->extents[bad].svc_id;
691	service_id_t hs_svc_id = vol->hotspares[hs].svc_id;
692
693	/* TODO: if rc != EOK, try next hotspare */
694	errno_t rc = block_init(hs_svc_id);
695	if (rc != EOK) {
696	HR_ERROR("hr_raid1_rebuild(): initing hotspare (%lu) failed\n",
697	hs_svc_id);
698	return rc;
699	}
700
701	hr_update_ext_svc_id(vol, bad, hs_svc_id);
702	hr_update_ext_status(vol, bad, HR_EXT_HOTSPARE);
703
704	hr_update_hotspare_svc_id(vol, hs, 0);
705	hr_update_hotspare_status(vol, hs, HR_EXT_INVALID);
706
707	vol->hotspare_no--;
708
709	if (faulty_svc_id != 0)
710	block_fini(faulty_svc_id);
711
712	return EOK;
713	}
714
715	static errno_t hr_raid1_restore_blocks(hr_volume_t *vol, size_t rebuild_idx,
716	uint64_t ba, size_t cnt, void *buf)
717	{
718	HR_DEBUG("REBUILD restoring blocks (ba: %lu, cnt: %lu)\n", ba, cnt);
719
720	assert(fibril_rwlock_is_locked(&vol->extents_lock));
721
722	errno_t rc = ENOENT;
723	hr_extent_t ext, rebuild_ext = &vol->extents[rebuild_idx];
724
725	for (size_t i = 0; i < vol->extent_no; i++) {
726	fibril_rwlock_read_lock(&vol->states_lock);
727
728	ext = &vol->extents[i];
729	if (ext->status != HR_EXT_ONLINE)
730	continue;
731
732	fibril_rwlock_read_unlock(&vol->states_lock);
733
734	rc = block_read_direct(ext->svc_id, ba, cnt, buf);
735	if (rc == EOK)
736	break;
737
738	if (rc != ENOMEM)
739	hr_raid1_ext_state_callback(vol, i, rc);
740
741	if (i + 1 >= vol->extent_no) {
742	if (rc != ENOMEM) {
743	HR_ERROR("rebuild on \"%s\" (%lu), failed due "
744	"to too many failed extents\n",
745	vol->devname, vol->svc_id);
746	}
747
748	/* for now we have to invalidate the rebuild extent */
749	if (rc == ENOMEM) {
750	HR_ERROR("rebuild on \"%s\" (%lu), failed due "
751	"to too many failed reads, because of not "
752	"enough memory\n",
753	vol->devname, vol->svc_id);
754	hr_raid1_ext_state_callback(vol, rebuild_idx,
755	ENOMEM);
756	}
757
758	return rc;
759	}
760	}
761
762	rc = block_write_direct(rebuild_ext->svc_id, ba, cnt, buf);
763	if (rc != EOK) {
764	/*
765	* Here we dont handle ENOMEM, because maybe in the
766	* future, there is going to be M_WAITOK, or we are
767	* going to wait for more memory, so that we don't
768	* have to invalidate it...
769	*
770	* XXX: for now we do
771	*/
772	hr_raid1_ext_state_callback(vol, rebuild_idx, rc);
773
774	HR_ERROR("rebuild on \"%s\" (%lu), failed due to "
775	"the rebuilt extent no. %lu WRITE (rc: %s)\n",
776	vol->devname, vol->svc_id, rebuild_idx, str_error(rc));
777
778	return rc;
779	}
780
781	return EOK;
782	}
783
784	/** @}
785	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: