Context Navigation

source: mainline/kernel/generic/src/adt/cht.c@ 85d31de9

Visit:

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export

Last change on this file since 85d31de9 was 85d31de9, checked in by Adam Hraska <adam.hraska+hos@…>, 13 years ago
Fix: hash table key type mismatch on 64bit machines in locfs_ops.c.
Property mode set to `100644`
File size: 56.8 KB

Line
1	/*
2	* Copyright (c) 2012 Adam Hraska
3	* All rights reserved.
4	*
5	* Redistribution and use in source and binary forms, with or without
6	* modification, are permitted provided that the following conditions
7	* are met:
8	*
9	* - Redistributions of source code must retain the above copyright
10	* notice, this list of conditions and the following disclaimer.
11	* - Redistributions in binary form must reproduce the above copyright
12	* notice, this list of conditions and the following disclaimer in the
13	* documentation and/or other materials provided with the distribution.
14	* - The name of the author may not be used to endorse or promote products
15	* derived from this software without specific prior written permission.
16	*
17	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18	* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19	* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20	* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21	* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22	* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26	* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27	*/
28
29
30	/** @addtogroup genericadt
31	* @{
32	*/
33
34	/**
35	* @file
36	* @brief Scalable resizable concurrent lock-free hash table.
37	*
38	*/
39
40	#include <adt/cht.h>
41	#include <adt/hash.h>
42	#include <debug.h>
43	#include <memstr.h>
44	#include <mm/slab.h>
45	#include <arch/barrier.h>
46	#include <compiler/barrier.h>
47	#include <atomic.h>
48	#include <synch/rcu.h>
49
50
51	/* Logarithm of the min bucket count. Must be at least 3. 2^6 == 64 buckets. */
52	#define CHT_MIN_ORDER 6
53	/* Logarithm of the max bucket count. */
54	#define CHT_MAX_ORDER (8 * sizeof(size_t))
55	/* Minimum number of hash table buckets. */
56	#define CHT_MIN_BUCKET_CNT (1 << CHT_MIN_ORDER)
57	/* Does not have to be a power of 2. */
58	#define CHT_MAX_LOAD 2
59
60	typedef cht_ptr_t marked_ptr_t;
61	typedef bool (equal_pred_t)(void arg, const cht_link_t *item);
62
63	typedef enum mark {
64	N_NORMAL = 0,
65	N_DELETED = 1,
66	N_CONST = 1,
67	N_INVALID = 3,
68	N_JOIN = 2,
69	N_JOIN_FOLLOWS = 2,
70	N_MARK_MASK = 3
71	} mark_t;
72
73	typedef enum walk_mode {
74	WM_NORMAL = 4,
75	WM_LEAVE_JOIN,
76	WM_MOVE_JOIN_FOLLOWS
77	} walk_mode_t;
78
79	typedef struct wnd {
80	marked_ptr_t *ppred;
81	cht_link_t *cur;
82	cht_link_t *last;
83	} wnd_t;
84
85
86	/* Sentinel node used by all buckets. Stores the greatest possible hash value.*/
87	static const cht_link_t sentinel = {
88	.link = 0,
89	.hash = -1
90	};
91
92
93	static size_t size_to_order(size_t bucket_cnt, size_t min_order);
94	static cht_buckets_t *alloc_buckets(size_t order, bool set_invalid);
95	static inline cht_link_t find_lazy(cht_t h, void *key);
96	static cht_link_t search_bucket(cht_t h, marked_ptr_t head, void *key,
97	size_t search_hash);
98	static cht_link_t find_resizing(cht_t h, void *key, size_t hash,
99	marked_ptr_t old_head, size_t old_idx);
100	static bool insert_impl(cht_t h, cht_link_t item, bool unique);
101	static bool insert_at(cht_link_t item, const wnd_t wnd, walk_mode_t walk_mode,
102	bool *resizing);
103	static bool has_duplicates(cht_t h, const cht_link_t item, size_t hash,
104	const wnd_t *cwnd);
105	static cht_link_t find_duplicate(cht_t h, const cht_link_t *item, size_t hash,
106	cht_link_t *start);
107	static bool remove_pred(cht_t h, size_t hash, equal_pred_t pred, void pred_arg);
108	static bool delete_at(cht_t h, wnd_t wnd, walk_mode_t walk_mode,
109	bool deleted_but_gc, bool resizing);
110	static bool mark_deleted(cht_link_t cur, walk_mode_t walk_mode, bool resizing);
111	static bool unlink_from_pred(wnd_t wnd, walk_mode_t walk_mode, bool resizing);
112	static bool find_wnd_and_gc_pred(cht_t *h, size_t hash, walk_mode_t walk_mode,
113	equal_pred_t pred, void pred_arg, wnd_t wnd, bool *resizing);
114	static bool find_wnd_and_gc(cht_t *h, size_t hash, walk_mode_t walk_mode,
115	wnd_t wnd, bool resizing);
116	static bool gc_deleted_node(cht_t h, walk_mode_t walk_mode, wnd_t wnd,
117	bool *resizing);
118	static bool join_completed(cht_t h, const wnd_t wnd);
119	static void upd_resizing_head(cht_t h, size_t hash, marked_ptr_t *phead,
120	bool join_finishing, walk_mode_t walk_mode);
121	static void item_removed(cht_t *h);
122	static void item_inserted(cht_t *h);
123	static void free_later(cht_t h, cht_link_t item);
124	static void help_head_move(marked_ptr_t psrc_head, marked_ptr_t pdest_head);
125	static void start_head_move(marked_ptr_t *psrc_head);
126	static void mark_const(marked_ptr_t *psrc_head);
127	static void complete_head_move(marked_ptr_t psrc_head, marked_ptr_t pdest_head);
128	static void split_bucket(cht_t h, marked_ptr_t psrc_head,
129	marked_ptr_t *pdest_head, size_t split_hash);
130	static void mark_join_follows(cht_t h, marked_ptr_t psrc_head,
131	size_t split_hash, wnd_t *wnd);
132	static void mark_join_node(cht_link_t *join_node);
133	static void join_buckets(cht_t h, marked_ptr_t psrc_head,
134	marked_ptr_t *pdest_head, size_t split_hash);
135	static void link_to_join_node(cht_t h, marked_ptr_t pdest_head,
136	cht_link_t *join_node, size_t split_hash);
137	static void resize_table(work_t *arg);
138	static void grow_table(cht_t *h);
139	static void shrink_table(cht_t *h);
140	static void cleanup_join_node(cht_t h, marked_ptr_t new_head);
141	static void clear_join_and_gc(cht_t h, cht_link_t join_node,
142	marked_ptr_t *new_head);
143	static void cleanup_join_follows(cht_t h, marked_ptr_t new_head);
144	static marked_ptr_t make_link(const cht_link_t *next, mark_t mark);
145	static cht_link_t * get_next(marked_ptr_t link);
146	static mark_t get_mark(marked_ptr_t link);
147	static void next_wnd(wnd_t *wnd);
148	static bool same_node_pred(void node, const cht_link_t item2);
149	static size_t calc_key_hash(cht_t h, void key);
150	static size_t node_hash(cht_t h, const cht_link_t item);
151	static size_t calc_node_hash(cht_t h, const cht_link_t item);
152	static void memoize_node_hash(cht_t h, cht_link_t item);
153	static size_t calc_split_hash(size_t split_idx, size_t order);
154	static size_t calc_bucket_idx(size_t hash, size_t order);
155	static size_t grow_to_split_idx(size_t old_idx);
156	static size_t grow_idx(size_t idx);
157	static size_t shrink_idx(size_t idx);
158	static marked_ptr_t cas_link(marked_ptr_t link, const cht_link_t cur_next,
159	mark_t cur_mark, const cht_link_t *new_next, mark_t new_mark);
160	static marked_ptr_t _cas_link(marked_ptr_t *link, marked_ptr_t cur,
161	marked_ptr_t new);
162	static void cas_order_barrier(void);
163
164
165	bool cht_create(cht_t *h, size_t init_size, size_t min_size, size_t max_load,
166	cht_ops_t *op)
167	{
168	ASSERT(h);
169	ASSERT(op && op->hash && op->key_hash && op->equal && op->key_equal);
170	/* Memoized hashes are stored in the rcu_link.func function pointer. */
171	ASSERT(sizeof(size_t) == sizeof(rcu_func_t));
172	ASSERT(sentinel.hash == (uintptr_t)sentinel.rcu_link.func);
173
174	/* All operations are compulsory. */
175	if (!op \|\| !op->hash \|\| !op->key_hash \|\| !op->equal \|\| !op->key_equal)
176	return false;
177
178	size_t min_order = size_to_order(min_size, CHT_MIN_ORDER);
179	size_t order = size_to_order(init_size, min_order);
180
181	h->b = alloc_buckets(order, false);
182
183	if (!h->b)
184	return false;
185
186	h->max_load = (max_load == 0) ? CHT_MAX_LOAD : max_load;
187	h->min_order = min_order;
188	h->new_b = 0;
189	h->op = op;
190	atomic_set(&h->item_cnt, 0);
191	atomic_set(&h->resize_reqs, 0);
192	/*
193	* Cached item hashes are stored in item->rcu_link.func. Once the item
194	* is deleted rcu_link.func will contain the value of invalid_hash.
195	*/
196	h->invalid_hash = (uintptr_t)h->op->remove_callback;
197
198	/* Ensure the initialization takes place before we start using the table. */
199	write_barrier();
200
201	return true;
202	}
203
204	static cht_buckets_t *alloc_buckets(size_t order, bool set_invalid)
205	{
206	size_t bucket_cnt = (1 << order);
207	size_t bytes =
208	sizeof(cht_buckets_t) + (bucket_cnt - 1) * sizeof(marked_ptr_t);
209	cht_buckets_t *b = malloc(bytes, FRAME_ATOMIC);
210
211	if (!b)
212	return 0;
213
214	b->order = order;
215
216	marked_ptr_t head_link = set_invalid
217	? make_link(&sentinel, N_INVALID)
218	: make_link(&sentinel, N_NORMAL);
219
220	for (size_t i = 0; i < bucket_cnt; ++i) {
221	b->head[i] = head_link;
222	}
223
224	return b;
225	}
226
227	static size_t size_to_order(size_t bucket_cnt, size_t min_order)
228	{
229	size_t order = min_order;
230
231	/* Find a power of two such that bucket_cnt <= 2^order */
232	do {
233	if (bucket_cnt <= ((size_t)1 << order))
234	return order;
235
236	++order;
237	} while (order < CHT_MAX_ORDER);
238
239	return order;
240	}
241
242
243	void cht_destroy(cht_t *h)
244	{
245	/* Wait for resize to complete. */
246	while (0 < atomic_get(&h->resize_reqs)) {
247	rcu_barrier();
248	}
249
250	/* Wait for all remove_callback()s to complete. */
251	rcu_barrier();
252
253	free(h->b);
254	h->b = 0;
255	}
256
257	cht_link_t cht_find(cht_t h, void *key)
258	{
259	/* Make the most recent changes to the table visible. */
260	read_barrier();
261	return cht_find_lazy(h, key);
262	}
263
264	cht_link_t cht_find_lazy(cht_t h, void *key)
265	{
266	return find_lazy(h, key);
267	}
268
269	static inline cht_link_t find_lazy(cht_t h, void *key)
270	{
271	ASSERT(h);
272	ASSERT(rcu_read_locked());
273
274	size_t hash = calc_key_hash(h, key);
275
276	cht_buckets_t *b = rcu_access(h->b);
277	size_t idx = calc_bucket_idx(hash, b->order);
278	/*
279	* No need for access_once. b->head[idx] will point to an allocated node
280	* even if marked invalid until we exit rcu read section.
281	*/
282	marked_ptr_t head = b->head[idx];
283
284	if (N_INVALID == get_mark(head))
285	return find_resizing(h, key, hash, head, idx);
286
287	return search_bucket(h, head, key, hash);
288	}
289
290	cht_link_t cht_find_next(cht_t h, const cht_link_t *item)
291	{
292	/* Make the most recent changes to the table visible. */
293	read_barrier();
294	return cht_find_next_lazy(h, item);
295	}
296
297	cht_link_t cht_find_next_lazy(cht_t h, const cht_link_t *item)
298	{
299	ASSERT(h);
300	ASSERT(rcu_read_locked());
301	ASSERT(item);
302
303	return find_duplicate(h, item, calc_node_hash(h, item), get_next(item->link));
304	}
305
306	static inline cht_link_t search_bucket(cht_t h, marked_ptr_t head, void *key,
307	size_t search_hash)
308	{
309	/*
310	* It is safe to access nodes even outside of this bucket (eg when
311	* splitting the bucket). The resizer makes sure that any node we
312	* may find by following the next pointers is allocated.
313	*/
314
315	cht_link_t *cur = 0;
316	marked_ptr_t prev = head;
317
318	try_again:
319	/* Filter out items with different hashes. */
320	do {
321	cur = get_next(prev);
322	ASSERT(cur);
323	prev = cur->link;
324	} while (node_hash(h, cur) < search_hash);
325
326	/*
327	* Only search for an item with an equal key if cur is not the sentinel
328	* node or a node with a different hash.
329	*/
330	while (node_hash(h, cur) == search_hash) {
331	if (h->op->key_equal(key, cur)) {
332	if (!(N_DELETED & get_mark(cur->link)))
333	return cur;
334	}
335
336	cur = get_next(cur->link);
337	ASSERT(cur);
338	}
339
340	/*
341	* In the unlikely case that we have encountered a node whose cached
342	* hash has been overwritten due to a pending rcu_call for it, skip
343	* the node and try again.
344	*/
345	if (node_hash(h, cur) == h->invalid_hash) {
346	prev = cur->link;
347	goto try_again;
348	}
349
350	return 0;
351	}
352
353	static cht_link_t find_resizing(cht_t h, void *key, size_t hash,
354	marked_ptr_t old_head, size_t old_idx)
355	{
356	ASSERT(N_INVALID == get_mark(old_head));
357	ASSERT(h->new_b);
358
359	size_t new_idx = calc_bucket_idx(hash, h->new_b->order);
360	marked_ptr_t new_head = h->new_b->head[new_idx];
361	marked_ptr_t search_head = new_head;
362
363	/* Growing. */
364	if (h->b->order < h->new_b->order) {
365	/*
366	* Old bucket head is invalid, so it must have been already
367	* moved. Make the new head visible if still not visible, ie
368	* invalid.
369	*/
370	if (N_INVALID == get_mark(new_head)) {
371	/*
372	* We should be searching a newly added bucket but the old
373	* moved bucket has not yet been split (its marked invalid)
374	* or we have not yet seen the split.
375	*/
376	if (grow_idx(old_idx) != new_idx) {
377	/*
378	* Search the moved bucket. It is guaranteed to contain
379	* items of the newly added bucket that were present
380	* before the moved bucket was split.
381	*/
382	new_head = h->new_b->head[grow_idx(old_idx)];
383	}
384
385	/* new_head is now the moved bucket, either valid or invalid. */
386
387	/*
388	* The old bucket was definitely moved to new_head but the
389	* change of new_head had not yet propagated to this cpu.
390	*/
391	if (N_INVALID == get_mark(new_head)) {
392	/*
393	* We could issue a read_barrier() and make the now valid
394	* moved bucket head new_head visible, but instead fall back
395	* on using the old bucket. Although the old bucket head is
396	* invalid, it points to a node that is allocated and in the
397	* right bucket. Before the node can be freed, it must be
398	* unlinked from the head (or another item after that item
399	* modified the new_head) and a grace period must elapse.
400	* As a result had the node been already freed the grace
401	* period preceeding the free() would make the unlink and
402	* any changes to new_head visible. Therefore, it is safe
403	* to use the node pointed to from the old bucket head.
404	*/
405
406	search_head = old_head;
407	} else {
408	search_head = new_head;
409	}
410	}
411
412	return search_bucket(h, search_head, key, hash);
413	} else if (h->b->order > h->new_b->order) {
414	/* Shrinking. */
415
416	/* Index of the bucket in the old table that was moved. */
417	size_t move_src_idx = grow_idx(new_idx);
418	marked_ptr_t moved_old_head = h->b->head[move_src_idx];
419
420	/*
421	* h->b->head[move_src_idx] had already been moved to new_head
422	* but the change to new_head had not yet propagated to us.
423	*/
424	if (N_INVALID == get_mark(new_head)) {
425	/*
426	* new_head is definitely valid and we could make it visible
427	* to this cpu with a read_barrier(). Instead, use the bucket
428	* in the old table that was moved even though it is now marked
429	* as invalid. The node it points to must be allocated because
430	* a grace period would have to elapse before it could be freed;
431	* and the grace period would make the now valid new_head
432	* visible to all cpus.
433	*
434	* Note that move_src_idx may not be the same as old_idx.
435	* If move_src_idx != old_idx then old_idx is the bucket
436	* in the old table that is not moved but instead it is
437	* appended to the moved bucket, ie it is added at the tail
438	* of new_head. In that case an invalid old_head notes that
439	* it had already been merged into (the moved) new_head.
440	* We will try to search that bucket first because it
441	* may contain some newly added nodes after the bucket
442	* join. Moreover, the bucket joining link may already be
443	* visible even if new_head is not. Therefore, if we're
444	* lucky we'll find the item via moved_old_head. In any
445	* case, we'll retry in proper old_head if not found.
446	*/
447	search_head = moved_old_head;
448	}
449
450	cht_link_t *ret = search_bucket(h, search_head, key, hash);
451
452	if (ret)
453	return ret;
454	/*
455	* Bucket old_head was already joined with moved_old_head
456	* in the new table but we have not yet seen change of the
457	* joining link (or the item is not in the table).
458	*/
459	if (move_src_idx != old_idx && get_next(old_head) != &sentinel) {
460	/*
461	* Note that old_head (the bucket to be merged into new_head)
462	* points to an allocated join node (if non-null) even if marked
463	* invalid. Before the resizer lets join nodes to be unlinked
464	* (and freed) it sets old_head to 0 and waits for a grace period.
465	* So either the invalid old_head points to join node; or old_head
466	* is null and we would have seen a completed bucket join while
467	* traversing search_head.
468	*/
469	ASSERT(N_JOIN & get_mark(get_next(old_head)->link));
470	return search_bucket(h, old_head, key, hash);
471	}
472
473	return 0;
474	} else {
475	/*
476	* Resize is almost done. The resizer is waiting to make
477	* sure all cpus see that the new table replaced the old one.
478	*/
479	ASSERT(h->b->order == h->new_b->order);
480	/*
481	* The resizer must ensure all new bucket heads are visible before
482	* replacing the old table.
483	*/
484	ASSERT(N_NORMAL == get_mark(new_head));
485	return search_bucket(h, new_head, key, hash);
486	}
487	}
488
489
490	void cht_insert(cht_t h, cht_link_t item)
491	{
492	insert_impl(h, item, false);
493	}
494
495	bool cht_insert_unique(cht_t h, cht_link_t item)
496	{
497	return insert_impl(h, item, true);
498	}
499
500	static bool insert_impl(cht_t h, cht_link_t item, bool unique)
501	{
502	rcu_read_lock();
503
504	cht_buckets_t *b = rcu_access(h->b);
505	memoize_node_hash(h, item);
506	size_t hash = node_hash(h, item);
507	size_t idx = calc_bucket_idx(hash, b->order);
508	marked_ptr_t *phead = &b->head[idx];
509
510	bool resizing = false;
511	bool inserted = false;
512
513	do {
514	walk_mode_t walk_mode = WM_NORMAL;
515	bool join_finishing;
516
517	resizing = resizing \|\| (N_NORMAL != get_mark(*phead));
518
519	/* The table is resizing. Get the correct bucket head. */
520	if (resizing) {
521	upd_resizing_head(h, hash, &phead, &join_finishing, &walk_mode);
522	}
523
524	wnd_t wnd = {
525	.ppred = phead,
526	.cur = get_next(*phead),
527	.last = 0
528	};
529
530	if (!find_wnd_and_gc(h, hash, walk_mode, &wnd, &resizing)) {
531	/* Could not GC a node; or detected an unexpected resize. */
532	continue;
533	}
534
535	if (unique && has_duplicates(h, item, hash, &wnd)) {
536	rcu_read_unlock();
537	return false;
538	}
539
540	inserted = insert_at(item, &wnd, walk_mode, &resizing);
541	} while (!inserted);
542
543	rcu_read_unlock();
544
545	item_inserted(h);
546	return true;
547	}
548
549	inline static bool insert_at(cht_link_t item, const wnd_t wnd,
550	walk_mode_t walk_mode, bool *resizing)
551	{
552	marked_ptr_t ret;
553
554	if (walk_mode == WM_NORMAL) {
555	item->link = make_link(wnd->cur, N_NORMAL);
556	/* Initialize the item before adding it to a bucket. */
557	memory_barrier();
558
559	/* Link a clean/normal predecessor to the item. */
560	ret = cas_link(wnd->ppred, wnd->cur, N_NORMAL, item, N_NORMAL);
561
562	if (ret == make_link(wnd->cur, N_NORMAL)) {
563	return true;
564	} else {
565	/* This includes an invalid head but not a const head. */
566	*resizing = ((N_JOIN_FOLLOWS \| N_JOIN) & get_mark(ret));
567	return false;
568	}
569	} else if (walk_mode == WM_MOVE_JOIN_FOLLOWS) {
570	/* Move JOIN_FOLLOWS mark but filter out the DELETED mark. */
571	mark_t jf_mark = get_mark(*wnd->ppred) & N_JOIN_FOLLOWS;
572	item->link = make_link(wnd->cur, jf_mark);
573	/* Initialize the item before adding it to a bucket. */
574	memory_barrier();
575
576	/* Link the not-deleted predecessor to the item. Move its JF mark. */
577	ret = cas_link(wnd->ppred, wnd->cur, jf_mark, item, N_NORMAL);
578
579	return ret == make_link(wnd->cur, jf_mark);
580	} else {
581	ASSERT(walk_mode == WM_LEAVE_JOIN);
582
583	item->link = make_link(wnd->cur, N_NORMAL);
584	/* Initialize the item before adding it to a bucket. */
585	memory_barrier();
586
587	mark_t pred_mark = get_mark(*wnd->ppred);
588	/* If the predecessor is a join node it may be marked deleted.*/
589	mark_t exp_pred_mark = (N_JOIN & pred_mark) ? pred_mark : N_NORMAL;
590
591	ret = cas_link(wnd->ppred, wnd->cur, exp_pred_mark, item, exp_pred_mark);
592	return ret == make_link(wnd->cur, exp_pred_mark);
593	}
594	}
595
596	static inline bool has_duplicates(cht_t h, const cht_link_t item, size_t hash,
597	const wnd_t *wnd)
598	{
599	ASSERT(wnd->cur);
600	ASSERT(wnd->cur == &sentinel \|\| hash <= node_hash(h, wnd->cur)
601	\|\| node_hash(h, wnd->cur) == h->invalid_hash);
602
603	/* hash < node_hash(h, wnd->cur) */
604	if (hash != node_hash(h, wnd->cur) && h->invalid_hash != node_hash(h, wnd->cur))
605	return false;
606
607	/*
608	* Load the most recent node marks. Otherwise we might pronounce a
609	* logically deleted node for a duplicate of the item just because
610	* the deleted node's DEL mark had not yet propagated to this cpu.
611	*/
612	read_barrier();
613	return 0 != find_duplicate(h, item, hash, wnd->cur);
614	}
615
616	static cht_link_t find_duplicate(cht_t h, const cht_link_t *item, size_t hash,
617	cht_link_t *start)
618	{
619	ASSERT(hash <= node_hash(h, start) \|\| h->invalid_hash == node_hash(h, start));
620
621	cht_link_t *cur = start;
622
623	try_again:
624	ASSERT(cur);
625
626	while (node_hash(h, cur) == hash) {
627	ASSERT(cur != &sentinel);
628
629	bool deleted = (N_DELETED & get_mark(cur->link));
630
631	/* Skip logically deleted nodes. */
632	if (!deleted && h->op->equal(item, cur))
633	return cur;
634
635	cur = get_next(cur->link);
636	ASSERT(cur);
637	}
638
639	if (h->invalid_hash == node_hash(h, cur)) {
640	cur = get_next(cur->link);
641	goto try_again;
642	}
643
644	return 0;
645	}
646
647	size_t cht_remove_key(cht_t h, void key)
648	{
649	ASSERT(h);
650
651	size_t hash = calc_key_hash(h, key);
652	size_t removed = 0;
653
654	while (remove_pred(h, hash, h->op->key_equal, key))
655	++removed;
656
657	return removed;
658	}
659
660	bool cht_remove_item(cht_t h, cht_link_t item)
661	{
662	ASSERT(h);
663	ASSERT(item);
664
665	/*
666	* Even though we know the node we want to delete we must unlink it
667	* from the correct bucket and from a clean/normal predecessor. Therefore,
668	* we search for it again from the beginning of the correct bucket.
669	*/
670	size_t hash = calc_node_hash(h, item);
671	return remove_pred(h, hash, same_node_pred, item);
672	}
673
674
675	static bool remove_pred(cht_t h, size_t hash, equal_pred_t pred, void pred_arg)
676	{
677	rcu_read_lock();
678
679	bool resizing = false;
680	bool deleted = false;
681	bool deleted_but_gc = false;
682
683	cht_buckets_t *b = rcu_access(h->b);
684	size_t idx = calc_bucket_idx(hash, b->order);
685	marked_ptr_t *phead = &b->head[idx];
686
687	do {
688	walk_mode_t walk_mode = WM_NORMAL;
689	bool join_finishing = false;
690
691	resizing = resizing \|\| (N_NORMAL != get_mark(*phead));
692
693	/* The table is resizing. Get the correct bucket head. */
694	if (resizing) {
695	upd_resizing_head(h, hash, &phead, &join_finishing, &walk_mode);
696	}
697
698	wnd_t wnd = {
699	.ppred = phead,
700	.cur = get_next(*phead),
701	.last = 0
702	};
703
704	if (!find_wnd_and_gc_pred(
705	h, hash, walk_mode, pred, pred_arg, &wnd, &resizing)) {
706	/* Could not GC a node; or detected an unexpected resize. */
707	continue;
708	}
709
710	/*
711	* The item lookup is affected by a bucket join but effects of
712	* the bucket join have not been seen while searching for the item.
713	*/
714	if (join_finishing && !join_completed(h, &wnd)) {
715	/*
716	* Bucket was appended at the end of another but the next
717	* ptr linking them together was not visible on this cpu.
718	* join_completed() makes this appended bucket visible.
719	*/
720	continue;
721	}
722
723	/* Already deleted, but delete_at() requested one GC pass. */
724	if (deleted_but_gc)
725	break;
726
727	bool found = (wnd.cur != &sentinel && pred(pred_arg, wnd.cur));
728
729	if (!found) {
730	rcu_read_unlock();
731	return false;
732	}
733
734	deleted = delete_at(h, &wnd, walk_mode, &deleted_but_gc, &resizing);
735	} while (!deleted \|\| deleted_but_gc);
736
737	rcu_read_unlock();
738	return true;
739	}
740
741
742	static inline bool delete_at(cht_t h, wnd_t wnd, walk_mode_t walk_mode,
743	bool deleted_but_gc, bool resizing)
744	{
745	ASSERT(wnd->cur && wnd->cur != &sentinel);
746
747	*deleted_but_gc = false;
748
749	if (!mark_deleted(wnd->cur, walk_mode, resizing)) {
750	/* Already deleted, or unexpectedly marked as JOIN/JOIN_FOLLOWS. */
751	return false;
752	}
753
754	/* Marked deleted. Unlink from the bucket. */
755
756	/* Never unlink join nodes. */
757	if (walk_mode == WM_LEAVE_JOIN && (N_JOIN & get_mark(wnd->cur->link)))
758	return true;
759
760	cas_order_barrier();
761
762	if (unlink_from_pred(wnd, walk_mode, resizing)) {
763	free_later(h, wnd->cur);
764	} else {
765	*deleted_but_gc = true;
766	}
767
768	return true;
769	}
770
771	static inline bool mark_deleted(cht_link_t *cur, walk_mode_t walk_mode,
772	bool *resizing)
773	{
774	ASSERT(cur && cur != &sentinel);
775
776	/*
777	* Btw, we could loop here if the cas fails but let's not complicate
778	* things and let's retry from the head of the bucket.
779	*/
780
781	cht_link_t *next = get_next(cur->link);
782
783	if (walk_mode == WM_NORMAL) {
784	/* Only mark clean/normal nodes - JF/JN is used only during resize. */
785	marked_ptr_t ret = cas_link(&cur->link, next, N_NORMAL, next, N_DELETED);
786
787	if (ret != make_link(next, N_NORMAL)) {
788	*resizing = (N_JOIN \| N_JOIN_FOLLOWS) & get_mark(ret);
789	return false;
790	}
791	} else {
792	ASSERT(N_JOIN == N_JOIN_FOLLOWS);
793
794	/* Keep the N_JOIN/N_JOIN_FOLLOWS mark but strip N_DELETED. */
795	mark_t cur_mark = get_mark(cur->link) & N_JOIN_FOLLOWS;
796
797	marked_ptr_t ret =
798	cas_link(&cur->link, next, cur_mark, next, cur_mark \| N_DELETED);
799
800	if (ret != make_link(next, cur_mark))
801	return false;
802	}
803
804	return true;
805	}
806
807	static inline bool unlink_from_pred(wnd_t *wnd, walk_mode_t walk_mode,
808	bool *resizing)
809	{
810	ASSERT(wnd->cur != &sentinel);
811	ASSERT(wnd->cur && (N_DELETED & get_mark(wnd->cur->link)));
812
813	cht_link_t *next = get_next(wnd->cur->link);
814
815	if (walk_mode == WM_LEAVE_JOIN) {
816	/* Never try to unlink join nodes. */
817	ASSERT(!(N_JOIN & get_mark(wnd->cur->link)));
818
819	mark_t pred_mark = get_mark(*wnd->ppred);
820	/* Succeed only if the predecessor is clean/normal or a join node. */
821	mark_t exp_pred_mark = (N_JOIN & pred_mark) ? pred_mark : N_NORMAL;
822
823	marked_ptr_t pred_link = make_link(wnd->cur, exp_pred_mark);
824	marked_ptr_t next_link = make_link(next, exp_pred_mark);
825
826	if (pred_link != _cas_link(wnd->ppred, pred_link, next_link))
827	return false;
828	} else {
829	ASSERT(walk_mode == WM_MOVE_JOIN_FOLLOWS \|\| walk_mode == WM_NORMAL);
830	/* Move the JF mark if set. Clear DEL mark. */
831	mark_t cur_mark = N_JOIN_FOLLOWS & get_mark(wnd->cur->link);
832
833	/* The predecessor must be clean/normal. */
834	marked_ptr_t pred_link = make_link(wnd->cur, N_NORMAL);
835	/* Link to cur's successor keeping/copying cur's JF mark. */
836	marked_ptr_t next_link = make_link(next, cur_mark);
837
838	marked_ptr_t ret = _cas_link(wnd->ppred, pred_link, next_link);
839
840	if (pred_link != ret) {
841	/* If we're not resizing the table there are no JF/JN nodes. */
842	*resizing = (walk_mode == WM_NORMAL)
843	&& (N_JOIN_FOLLOWS & get_mark(ret));
844	return false;
845	}
846	}
847
848	return true;
849	}
850
851
852	static bool find_wnd_and_gc_pred(cht_t *h, size_t hash, walk_mode_t walk_mode,
853	equal_pred_t pred, void pred_arg, wnd_t wnd, bool *resizing)
854	{
855	ASSERT(wnd->cur);
856
857	if (wnd->cur == &sentinel)
858	return true;
859
860	/*
861	* A read barrier is not needed here to bring up the most recent
862	* node marks (esp the N_DELETED). At worst we'll try to delete
863	* an already deleted node; fail in delete_at(); and retry.
864	*/
865
866	size_t cur_hash;
867
868	try_again:
869	cur_hash = node_hash(h, wnd->cur);
870
871	while (cur_hash <= hash) {
872	ASSERT(wnd->cur && wnd->cur != &sentinel);
873
874	/* GC any deleted nodes on the way. */
875	if (N_DELETED & get_mark(wnd->cur->link)) {
876	if (!gc_deleted_node(h, walk_mode, wnd, resizing)) {
877	/* Retry from the head of a bucket. */
878	return false;
879	}
880	} else {
881	/* Is this the node we were looking for? */
882	if (cur_hash == hash && pred(pred_arg, wnd->cur))
883	return true;
884
885	next_wnd(wnd);
886	}
887
888	cur_hash = node_hash(h, wnd->cur);
889	}
890
891	if (cur_hash == h->invalid_hash) {
892	next_wnd(wnd);
893	ASSERT(wnd->cur);
894	goto try_again;
895	}
896
897	/* The searched for node is not in the current bucket. */
898	return true;
899	}
900
901	/* todo: comment different semantics (eg deleted JN first w/ specific hash) */
902	static bool find_wnd_and_gc(cht_t *h, size_t hash, walk_mode_t walk_mode,
903	wnd_t wnd, bool resizing)
904	{
905	try_again:
906	ASSERT(wnd->cur);
907
908	while (node_hash(h, wnd->cur) < hash) {
909	/* GC any deleted nodes along the way to our desired node. */
910	if (N_DELETED & get_mark(wnd->cur->link)) {
911	if (!gc_deleted_node(h, walk_mode, wnd, resizing)) {
912	/* Failed to remove the garbage node. Retry. */
913	return false;
914	}
915	} else {
916	next_wnd(wnd);
917	}
918
919	ASSERT(wnd->cur);
920	}
921
922	if (node_hash(h, wnd->cur) == h->invalid_hash) {
923	next_wnd(wnd);
924	goto try_again;
925	}
926
927	/* wnd->cur may be 0 or even marked N_DELETED. */
928	return true;
929	}
930
931	static bool gc_deleted_node(cht_t h, walk_mode_t walk_mode, wnd_t wnd,
932	bool *resizing)
933	{
934	ASSERT(N_DELETED & get_mark(wnd->cur->link));
935
936	/* Skip deleted JOIN nodes. */
937	if (walk_mode == WM_LEAVE_JOIN && (N_JOIN & get_mark(wnd->cur->link))) {
938	next_wnd(wnd);
939	} else {
940	/* Ordinary deleted node or a deleted JOIN_FOLLOWS. */
941	ASSERT(walk_mode != WM_LEAVE_JOIN
942	\|\| !((N_JOIN \| N_JOIN_FOLLOWS) & get_mark(wnd->cur->link)));
943
944	/* Unlink an ordinary deleted node, move JOIN_FOLLOWS mark. */
945	if (!unlink_from_pred(wnd, walk_mode, resizing)) {
946	/* Retry. The predecessor was deleted, invalid, const, join_follows. */
947	return false;
948	}
949
950	free_later(h, wnd->cur);
951
952	/* Leave ppred as is. */
953	wnd->last = wnd->cur;
954	wnd->cur = get_next(wnd->cur->link);
955	}
956
957	return true;
958	}
959
960	static bool join_completed(cht_t h, const wnd_t wnd)
961	{
962	/*
963	* The table is shrinking and the searched for item is in a bucket
964	* appended to another. Check that the link joining these two buckets
965	* is visible and if not, make it visible to this cpu.
966	*/
967
968	/*
969	* Resizer ensures h->b->order stays the same for the duration of this
970	* func. We got here because there was an alternative head to search.
971	* The resizer waits for all preexisting readers to finish after
972	* it
973	*/
974	ASSERT(h->b->order > h->new_b->order);
975	ASSERT(wnd->cur);
976
977	/* Either we did not need the joining link or we have already followed it.*/
978	if (wnd->cur != &sentinel)
979	return true;
980
981	/* We have reached the end of a bucket. */
982
983	if (wnd->last != &sentinel) {
984	size_t last_seen_hash = node_hash(h, wnd->last);
985
986	if (last_seen_hash == h->invalid_hash) {
987	last_seen_hash = calc_node_hash(h, wnd->last);
988	}
989
990	size_t last_old_idx = calc_bucket_idx(last_seen_hash, h->b->order);
991	size_t move_src_idx = grow_idx(shrink_idx(last_old_idx));
992
993	/*
994	* Last node seen was in the joining bucket - if the searched
995	* for node is there we will find it.
996	*/
997	if (move_src_idx != last_old_idx)
998	return true;
999	}
1000
1001	/*
1002	* Reached the end of the bucket but no nodes from the joining bucket
1003	* were seen. There should have at least been a JOIN node so we have
1004	* definitely not seen (and followed) the joining link. Make the link
1005	* visible and retry.
1006	*/
1007	read_barrier();
1008	return false;
1009	}
1010
1011	static void upd_resizing_head(cht_t h, size_t hash, marked_ptr_t *phead,
1012	bool join_finishing, walk_mode_t walk_mode)
1013	{
1014	cht_buckets_t *b = rcu_access(h->b);
1015	size_t old_idx = calc_bucket_idx(hash, b->order);
1016	size_t new_idx = calc_bucket_idx(hash, h->new_b->order);
1017
1018	marked_ptr_t *pold_head = &b->head[old_idx];
1019	marked_ptr_t *pnew_head = &h->new_b->head[new_idx];
1020
1021	/* In any case, use the bucket in the new table. */
1022	*phead = pnew_head;
1023
1024	/* Growing the table. */
1025	if (b->order < h->new_b->order) {
1026	size_t move_dest_idx = grow_idx(old_idx);
1027	marked_ptr_t *pmoved_head = &h->new_b->head[move_dest_idx];
1028
1029	/* Complete moving the bucket from the old to the new table. */
1030	help_head_move(pold_head, pmoved_head);
1031
1032	/* The hash belongs to the moved bucket. */
1033	if (move_dest_idx == new_idx) {
1034	ASSERT(pmoved_head == pnew_head);
1035	/*
1036	* move_head() makes the new head of the moved bucket visible.
1037	* The new head may be marked with a JOIN_FOLLOWS
1038	*/
1039	ASSERT(!(N_CONST & get_mark(*pmoved_head)));
1040	*walk_mode = WM_MOVE_JOIN_FOLLOWS;
1041	} else {
1042	ASSERT(pmoved_head != pnew_head);
1043	/*
1044	* The hash belongs to the bucket that is the result of splitting
1045	* the old/moved bucket, ie the bucket that contains the second
1046	* half of the split/old/moved bucket.
1047	*/
1048
1049	/* The moved bucket has not yet been split. */
1050	if (N_NORMAL != get_mark(*pnew_head)) {
1051	size_t split_hash = calc_split_hash(new_idx, h->new_b->order);
1052	split_bucket(h, pmoved_head, pnew_head, split_hash);
1053	/*
1054	* split_bucket() makes the new head visible. No
1055	* JOIN_FOLLOWS in this part of split bucket.
1056	*/
1057	ASSERT(N_NORMAL == get_mark(*pnew_head));
1058	}
1059
1060	*walk_mode = WM_LEAVE_JOIN;
1061	}
1062	} else if (h->new_b->order < b->order ) {
1063	/* Shrinking the table. */
1064
1065	size_t move_src_idx = grow_idx(new_idx);
1066
1067	/*
1068	* Complete moving the bucket from the old to the new table.
1069	* Makes a valid pnew_head visible if already moved.
1070	*/
1071	help_head_move(&b->head[move_src_idx], pnew_head);
1072
1073	/* Hash belongs to the bucket to be joined with the moved bucket. */
1074	if (move_src_idx != old_idx) {
1075	/* Bucket join not yet completed. */
1076	if (N_INVALID != get_mark(*pold_head)) {
1077	size_t split_hash = calc_split_hash(old_idx, b->order);
1078	join_buckets(h, pold_head, pnew_head, split_hash);
1079	}
1080
1081	/*
1082	* The resizer sets pold_head to &sentinel when all cpus are
1083	* guaranteed to see the bucket join.
1084	*/
1085	join_finishing = (&sentinel != get_next(pold_head));
1086	}
1087
1088	/* move_head() or join_buckets() makes it so or makes the mark visible.*/
1089	ASSERT(N_INVALID == get_mark(*pold_head));
1090	/* move_head() makes it visible. No JOIN_FOLLOWS used when shrinking. */
1091	ASSERT(N_NORMAL == get_mark(*pnew_head));
1092
1093	*walk_mode = WM_LEAVE_JOIN;
1094	} else {
1095	/*
1096	* Final stage of resize. The resizer is waiting for all
1097	* readers to notice that the old table had been replaced.
1098	*/
1099	ASSERT(b == h->new_b);
1100	*walk_mode = WM_NORMAL;
1101	}
1102	}
1103
1104
1105	#if 0
1106	static void move_head(marked_ptr_t psrc_head, marked_ptr_t pdest_head)
1107	{
1108	start_head_move(psrc_head);
1109	cas_order_barrier();
1110	complete_head_move(psrc_head, pdest_head);
1111	}
1112	#endif
1113
1114	static inline void help_head_move(marked_ptr_t *psrc_head,
1115	marked_ptr_t *pdest_head)
1116	{
1117	/* Head move has to in progress already when calling this func. */
1118	ASSERT(N_CONST & get_mark(*psrc_head));
1119
1120	/* Head already moved. */
1121	if (N_INVALID == get_mark(*psrc_head)) {
1122	/* Effects of the head move have not yet propagated to this cpu. */
1123	if (N_INVALID == get_mark(*pdest_head)) {
1124	/* Make the move visible on this cpu. */
1125	read_barrier();
1126	}
1127	} else {
1128	complete_head_move(psrc_head, pdest_head);
1129	}
1130
1131	ASSERT(!(N_CONST & get_mark(*pdest_head)));
1132	}
1133
1134	static void start_head_move(marked_ptr_t *psrc_head)
1135	{
1136	/* Mark src head immutable. */
1137	mark_const(psrc_head);
1138	}
1139
1140	static void mark_const(marked_ptr_t *psrc_head)
1141	{
1142	marked_ptr_t ret, src_link;
1143
1144	/* Mark src head immutable. */
1145	do {
1146	cht_link_t next = get_next(psrc_head);
1147	src_link = make_link(next, N_NORMAL);
1148
1149	/* Mark the normal/clean src link immutable/const. */
1150	ret = cas_link(psrc_head, next, N_NORMAL, next, N_CONST);
1151	} while(ret != src_link && !(N_CONST & get_mark(ret)));
1152	}
1153
1154	static void complete_head_move(marked_ptr_t psrc_head, marked_ptr_t pdest_head)
1155	{
1156	ASSERT(N_JOIN_FOLLOWS != get_mark(*psrc_head));
1157	ASSERT(N_CONST & get_mark(*psrc_head));
1158
1159	cht_link_t next = get_next(psrc_head);
1160	marked_ptr_t ret;
1161
1162	ret = cas_link(pdest_head, &sentinel, N_INVALID, next, N_NORMAL);
1163	ASSERT(ret == make_link(&sentinel, N_INVALID) \|\| (N_NORMAL == get_mark(ret)));
1164	cas_order_barrier();
1165
1166	ret = cas_link(psrc_head, next, N_CONST, next, N_INVALID);
1167	ASSERT(ret == make_link(next, N_CONST) \|\| (N_INVALID == get_mark(ret)));
1168	cas_order_barrier();
1169	}
1170
1171	static void split_bucket(cht_t h, marked_ptr_t psrc_head,
1172	marked_ptr_t *pdest_head, size_t split_hash)
1173	{
1174	/* Already split. */
1175	if (N_NORMAL == get_mark(*pdest_head))
1176	return;
1177
1178	/*
1179	* L == Last node of the first part of the split bucket. That part
1180	* remains in the original/src bucket.
1181	* F == First node of the second part of the split bucket. That part
1182	* will be referenced from the dest bucket head.
1183	*
1184	* We want to first mark a clean L as JF so that updaters unaware of
1185	* the split (or table resize):
1186	* - do not insert a new node between L and F
1187	* - do not unlink L (that is why it has to be clean/normal)
1188	* - do not unlink F
1189	*
1190	* Then we can safely mark F as JN even if it has been marked deleted.
1191	* Once F is marked as JN updaters aware of table resize will not
1192	* attempt to unlink it (JN will have two predecessors - we cannot
1193	* safely unlink from both at the same time). Updaters unaware of
1194	* ongoing resize can reach F only via L and that node is already
1195	* marked JF, so they won't unlink F.
1196	*
1197	* Last, link the new/dest head to F.
1198	*
1199	*
1200	* 0) ,-- split_hash, first hash of the dest bucket
1201	* v
1202	* [src_head \| N] -> .. -> [L] -> [F]
1203	* [dest_head \| Inv]
1204	*
1205	* 1) ,-- split_hash
1206	* v
1207	* [src_head \| N] -> .. -> [JF] -> [F]
1208	* [dest_head \| Inv]
1209	*
1210	* 2) ,-- split_hash
1211	* v
1212	* [src_head \| N] -> .. -> [JF] -> [JN]
1213	* [dest_head \| Inv]
1214	*
1215	* 2) ,-- split_hash
1216	* v
1217	* [src_head \| N] -> .. -> [JF] -> [JN]
1218	* ^
1219	* [dest_head \| N] -----------------'
1220	*/
1221	wnd_t wnd;
1222
1223	rcu_read_lock();
1224
1225	/* Mark the last node of the first part of the split bucket as JF. */
1226	mark_join_follows(h, psrc_head, split_hash, &wnd);
1227	cas_order_barrier();
1228
1229	/* There are nodes in the dest bucket, ie the second part of the split. */
1230	if (wnd.cur != &sentinel) {
1231	/*
1232	* Mark the first node of the dest bucket as a join node so
1233	* updaters do not attempt to unlink it if it is deleted.
1234	*/
1235	mark_join_node(wnd.cur);
1236	cas_order_barrier();
1237	} else {
1238	/*
1239	* Second part of the split bucket is empty. There are no nodes
1240	* to mark as JOIN nodes and there never will be.
1241	*/
1242	}
1243
1244	/* Link the dest head to the second part of the split. */
1245	marked_ptr_t ret =
1246	cas_link(pdest_head, &sentinel, N_INVALID, wnd.cur, N_NORMAL);
1247	ASSERT(ret == make_link(&sentinel, N_INVALID) \|\| (N_NORMAL == get_mark(ret)));
1248	cas_order_barrier();
1249
1250	rcu_read_unlock();
1251	}
1252
1253	static void mark_join_follows(cht_t h, marked_ptr_t psrc_head,
1254	size_t split_hash, wnd_t *wnd)
1255	{
1256	/* See comment in split_bucket(). */
1257
1258	bool done;
1259	do {
1260	bool resizing = false;
1261	wnd->ppred = psrc_head;
1262	wnd->cur = get_next(*psrc_head);
1263
1264	/*
1265	* Find the split window, ie the last node of the first part of
1266	* the split bucket and the its successor - the first node of
1267	* the second part of the split bucket. Retry if GC failed.
1268	*/
1269	if (!find_wnd_and_gc(h, split_hash, WM_MOVE_JOIN_FOLLOWS, wnd, &resizing))
1270	continue;
1271
1272	/* Must not report that the table is resizing if WM_MOVE_JOIN_FOLLOWS.*/
1273	ASSERT(!resizing);
1274	/*
1275	* Mark the last node of the first half of the split bucket
1276	* that a join node follows. It must be clean/normal.
1277	*/
1278	marked_ptr_t ret
1279	= cas_link(wnd->ppred, wnd->cur, N_NORMAL, wnd->cur, N_JOIN_FOLLOWS);
1280
1281	/*
1282	* Successfully marked as a JF node or already marked that way (even
1283	* if also marked deleted - unlinking the node will move the JF mark).
1284	*/
1285	done = (ret == make_link(wnd->cur, N_NORMAL))
1286	\|\| (N_JOIN_FOLLOWS & get_mark(ret));
1287	} while (!done);
1288	}
1289
1290	static void mark_join_node(cht_link_t *join_node)
1291	{
1292	/* See comment in split_bucket(). */
1293
1294	bool done;
1295	do {
1296	cht_link_t *next = get_next(join_node->link);
1297	mark_t mark = get_mark(join_node->link);
1298
1299	/*
1300	* May already be marked as deleted, but it won't be unlinked
1301	* because its predecessor is marked with JOIN_FOLLOWS or CONST.
1302	*/
1303	marked_ptr_t ret
1304	= cas_link(&join_node->link, next, mark, next, mark \| N_JOIN);
1305
1306	/* Successfully marked or already marked as a join node. */
1307	done = (ret == make_link(next, mark))
1308	\|\| (N_JOIN & get_mark(ret));
1309	} while(!done);
1310	}
1311
1312
1313	static void join_buckets(cht_t h, marked_ptr_t psrc_head,
1314	marked_ptr_t *pdest_head, size_t split_hash)
1315	{
1316	/* Buckets already joined. */
1317	if (N_INVALID == get_mark(*psrc_head))
1318	return;
1319	/*
1320	* F == First node of psrc_head, ie the bucket we want to append
1321	* to (ie join with) the bucket starting at pdest_head.
1322	* L == Last node of pdest_head, ie the bucket that psrc_head will
1323	* be appended to.
1324	*
1325	* (1) We first mark psrc_head immutable to signal that a join is
1326	* in progress and so that updaters unaware of the join (or table
1327	* resize):
1328	* - do not insert new nodes between the head psrc_head and F
1329	* - do not unlink F (it may already be marked deleted)
1330	*
1331	* (2) Next, F is marked as a join node. Updaters aware of table resize
1332	* will not attempt to unlink it. We cannot safely/atomically unlink
1333	* the join node because it will be pointed to from two different
1334	* buckets. Updaters unaware of resize will fail to unlink the join
1335	* node due to the head being marked immutable.
1336	*
1337	* (3) Then the tail of the bucket at pdest_head is linked to the join
1338	* node. From now on, nodes in both buckets can be found via pdest_head.
1339	*
1340	* (4) Last, mark immutable psrc_head as invalid. It signals updaters
1341	* that the join is complete and they can insert new nodes (originally
1342	* destined for psrc_head) into pdest_head.
1343	*
1344	* Note that pdest_head keeps pointing at the join node. This allows
1345	* lookups and updaters to determine if they should see a link between
1346	* the tail L and F when searching for nodes originally in psrc_head
1347	* via pdest_head. If they reach the tail of pdest_head without
1348	* encountering any nodes of psrc_head, either there were no nodes
1349	* in psrc_head to begin with or the link between L and F did not
1350	* yet propagate to their cpus. If psrc_head was empty, it remains
1351	* NULL. Otherwise psrc_head points to a join node (it will not be
1352	* unlinked until table resize completes) and updaters/lookups
1353	* should issue a read_barrier() to make the link [L]->[JN] visible.
1354	*
1355	* 0) ,-- split_hash, first hash of the src bucket
1356	* v
1357	* [dest_head \| N]-> .. -> [L]
1358	* [src_head \| N]--> [F] -> ..
1359	* ^
1360	* ` split_hash, first hash of the src bucket
1361	*
1362	* 1) ,-- split_hash
1363	* v
1364	* [dest_head \| N]-> .. -> [L]
1365	* [src_head \| C]--> [F] -> ..
1366	*
1367	* 2) ,-- split_hash
1368	* v
1369	* [dest_head \| N]-> .. -> [L]
1370	* [src_head \| C]--> [JN] -> ..
1371	*
1372	* 3) ,-- split_hash
1373	* v
1374	* [dest_head \| N]-> .. -> [L] --+
1375	* v
1376	* [src_head \| C]-------------> [JN] -> ..
1377	*
1378	* 4) ,-- split_hash
1379	* v
1380	* [dest_head \| N]-> .. -> [L] --+
1381	* v
1382	* [src_head \| Inv]-----------> [JN] -> ..
1383	*/
1384
1385	rcu_read_lock();
1386
1387	/* Mark src_head immutable - signals updaters that bucket join started. */
1388	mark_const(psrc_head);
1389	cas_order_barrier();
1390
1391	cht_link_t join_node = get_next(psrc_head);
1392
1393	if (join_node != &sentinel) {
1394	mark_join_node(join_node);
1395	cas_order_barrier();
1396
1397	link_to_join_node(h, pdest_head, join_node, split_hash);
1398	cas_order_barrier();
1399	}
1400
1401	marked_ptr_t ret =
1402	cas_link(psrc_head, join_node, N_CONST, join_node, N_INVALID);
1403	ASSERT(ret == make_link(join_node, N_CONST) \|\| (N_INVALID == get_mark(ret)));
1404	cas_order_barrier();
1405
1406	rcu_read_unlock();
1407	}
1408
1409	static void link_to_join_node(cht_t h, marked_ptr_t pdest_head,
1410	cht_link_t *join_node, size_t split_hash)
1411	{
1412	bool done;
1413	do {
1414	wnd_t wnd = {
1415	.ppred = pdest_head,
1416	.cur = get_next(*pdest_head)
1417	};
1418
1419	bool resizing = false;
1420
1421	if (!find_wnd_and_gc(h, split_hash, WM_LEAVE_JOIN, &wnd, &resizing))
1422	continue;
1423
1424	ASSERT(!resizing);
1425
1426	if (wnd.cur != &sentinel) {
1427	/* Must be from the new appended bucket. */
1428	ASSERT(split_hash <= node_hash(h, wnd.cur)
1429	\|\| h->invalid_hash == node_hash(h, wnd.cur));
1430	return;
1431	}
1432
1433	/* Reached the tail of pdest_head - link it to the join node. */
1434	marked_ptr_t ret =
1435	cas_link(wnd.ppred, &sentinel, N_NORMAL, join_node, N_NORMAL);
1436
1437	done = (ret == make_link(&sentinel, N_NORMAL));
1438	} while (!done);
1439	}
1440
1441	static void free_later(cht_t h, cht_link_t item)
1442	{
1443	ASSERT(item != &sentinel);
1444
1445	/*
1446	* remove_callback only works as rcu_func_t because rcu_link is the first
1447	* field in cht_link_t.
1448	*/
1449	rcu_call(&item->rcu_link, (rcu_func_t)h->op->remove_callback);
1450
1451	item_removed(h);
1452	}
1453
1454	static inline void item_removed(cht_t *h)
1455	{
1456	size_t items = (size_t) atomic_predec(&h->item_cnt);
1457	size_t bucket_cnt = (1 << h->b->order);
1458
1459	bool need_shrink = (items == h->max_load * bucket_cnt / 4);
1460	bool missed_shrink = (items == h->max_load * bucket_cnt / 8);
1461
1462	if ((need_shrink \|\| missed_shrink) && h->b->order > h->min_order) {
1463	atomic_count_t resize_reqs = atomic_preinc(&h->resize_reqs);
1464	/* The first resize request. Start the resizer. */
1465	if (1 == resize_reqs) {
1466	workq_global_enqueue_noblock(&h->resize_work, resize_table);
1467	}
1468	}
1469	}
1470
1471	static inline void item_inserted(cht_t *h)
1472	{
1473	size_t items = (size_t) atomic_preinc(&h->item_cnt);
1474	size_t bucket_cnt = (1 << h->b->order);
1475
1476	bool need_grow = (items == h->max_load * bucket_cnt);
1477	bool missed_grow = (items == 2 * h->max_load * bucket_cnt);
1478
1479	if ((need_grow \|\| missed_grow) && h->b->order < CHT_MAX_ORDER) {
1480	atomic_count_t resize_reqs = atomic_preinc(&h->resize_reqs);
1481	/* The first resize request. Start the resizer. */
1482	if (1 == resize_reqs) {
1483	workq_global_enqueue_noblock(&h->resize_work, resize_table);
1484	}
1485	}
1486	}
1487
1488	static void resize_table(work_t *arg)
1489	{
1490	cht_t *h = member_to_inst(arg, cht_t, resize_work);
1491
1492	#ifdef CONFIG_DEBUG
1493	ASSERT(h->b);
1494	/* Make resize_reqs visible. */
1495	read_barrier();
1496	ASSERT(0 < atomic_get(&h->resize_reqs));
1497	#endif
1498
1499	bool done;
1500	do {
1501	/* Load the most recent h->item_cnt. */
1502	read_barrier();
1503	size_t cur_items = (size_t) atomic_get(&h->item_cnt);
1504	size_t bucket_cnt = (1 << h->b->order);
1505	size_t max_items = h->max_load * bucket_cnt;
1506
1507	if (cur_items >= max_items && h->b->order < CHT_MAX_ORDER) {
1508	grow_table(h);
1509	} else if (cur_items <= max_items / 4 && h->b->order > h->min_order) {
1510	shrink_table(h);
1511	} else {
1512	/* Table is just the right size. */
1513	atomic_count_t reqs = atomic_predec(&h->resize_reqs);
1514	done = (reqs == 0);
1515	}
1516	} while (!done);
1517	}
1518
1519	static void grow_table(cht_t *h)
1520	{
1521	if (h->b->order >= CHT_MAX_ORDER)
1522	return;
1523
1524	h->new_b = alloc_buckets(h->b->order + 1, true);
1525
1526	/* Failed to alloc a new table - try next time the resizer is run. */
1527	if (!h->new_b)
1528	return;
1529
1530	/* Wait for all readers and updaters to see the initialized new table. */
1531	rcu_synchronize();
1532	size_t old_bucket_cnt = (1 << h->b->order);
1533
1534	/*
1535	* Give updaters a chance to help out with the resize. Do the minimum
1536	* work needed to announce a resize is in progress, ie start moving heads.
1537	*/
1538	for (size_t idx = 0; idx < old_bucket_cnt; ++idx) {
1539	start_head_move(&h->b->head[idx]);
1540	}
1541
1542	/* Order start_head_move() wrt complete_head_move(). */
1543	cas_order_barrier();
1544
1545	/* Complete moving heads and split any buckets not yet split by updaters. */
1546	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1547	marked_ptr_t *move_dest_head = &h->new_b->head[grow_idx(old_idx)];
1548	marked_ptr_t *move_src_head = &h->b->head[old_idx];
1549
1550	/* Head move not yet completed. */
1551	if (N_INVALID != get_mark(*move_src_head)) {
1552	complete_head_move(move_src_head, move_dest_head);
1553	}
1554
1555	size_t split_idx = grow_to_split_idx(old_idx);
1556	size_t split_hash = calc_split_hash(split_idx, h->new_b->order);
1557	marked_ptr_t *split_dest_head = &h->new_b->head[split_idx];
1558
1559	split_bucket(h, move_dest_head, split_dest_head, split_hash);
1560	}
1561
1562	/*
1563	* Wait for all updaters to notice the new heads. Once everyone sees
1564	* the invalid old bucket heads they will know a resize is in progress
1565	* and updaters will modify the correct new buckets.
1566	*/
1567	rcu_synchronize();
1568
1569	/* Clear the JOIN_FOLLOWS mark and remove the link between the split buckets.*/
1570	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1571	size_t new_idx = grow_idx(old_idx);
1572
1573	cleanup_join_follows(h, &h->new_b->head[new_idx]);
1574	}
1575
1576	/*
1577	* Wait for everyone to notice that buckets were split, ie link connecting
1578	* the join follows and join node has been cut.
1579	*/
1580	rcu_synchronize();
1581
1582	/* Clear the JOIN mark and GC any deleted join nodes. */
1583	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1584	size_t new_idx = grow_to_split_idx(old_idx);
1585
1586	cleanup_join_node(h, &h->new_b->head[new_idx]);
1587	}
1588
1589	/* Wait for everyone to see that the table is clear of any resize marks. */
1590	rcu_synchronize();
1591
1592	cht_buckets_t *old_b = h->b;
1593	rcu_assign(h->b, h->new_b);
1594
1595	/* Wait for everyone to start using the new table. */
1596	rcu_synchronize();
1597
1598	free(old_b);
1599
1600	/* Not needed; just for increased readability. */
1601	h->new_b = 0;
1602	}
1603
1604	static void shrink_table(cht_t *h)
1605	{
1606	if (h->b->order <= h->min_order)
1607	return;
1608
1609	h->new_b = alloc_buckets(h->b->order - 1, true);
1610
1611	/* Failed to alloc a new table - try next time the resizer is run. */
1612	if (!h->new_b)
1613	return;
1614
1615	/* Wait for all readers and updaters to see the initialized new table. */
1616	rcu_synchronize();
1617
1618	size_t old_bucket_cnt = (1 << h->b->order);
1619
1620	/*
1621	* Give updaters a chance to help out with the resize. Do the minimum
1622	* work needed to announce a resize is in progress, ie start moving heads.
1623	*/
1624	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1625	size_t new_idx = shrink_idx(old_idx);
1626
1627	/* This bucket should be moved. */
1628	if (grow_idx(new_idx) == old_idx) {
1629	start_head_move(&h->b->head[old_idx]);
1630	} else {
1631	/* This bucket should join the moved bucket once the move is done.*/
1632	}
1633	}
1634
1635	/* Order start_head_move() wrt to complete_head_move(). */
1636	cas_order_barrier();
1637
1638	/* Complete moving heads and join buckets with the moved buckets. */
1639	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1640	size_t new_idx = shrink_idx(old_idx);
1641	size_t move_src_idx = grow_idx(new_idx);
1642
1643	/* This bucket should be moved. */
1644	if (move_src_idx == old_idx) {
1645	/* Head move not yet completed. */
1646	if (N_INVALID != get_mark(h->b->head[old_idx])) {
1647	complete_head_move(&h->b->head[old_idx], &h->new_b->head[new_idx]);
1648	}
1649	} else {
1650	/* This bucket should join the moved bucket. */
1651	size_t split_hash = calc_split_hash(old_idx, h->b->order);
1652	join_buckets(h, &h->b->head[old_idx], &h->new_b->head[new_idx],
1653	split_hash);
1654	}
1655	}
1656
1657	/*
1658	* Wait for all updaters to notice the new heads. Once everyone sees
1659	* the invalid old bucket heads they will know a resize is in progress
1660	* and updaters will modify the correct new buckets.
1661	*/
1662	rcu_synchronize();
1663
1664	/* Let everyone know joins are complete and fully visible. */
1665	for (size_t old_idx = 0; old_idx < old_bucket_cnt; ++old_idx) {
1666	size_t move_src_idx = grow_idx(shrink_idx(old_idx));
1667
1668	/* Set the invalid joinee head to NULL. */
1669	if (old_idx != move_src_idx) {
1670	ASSERT(N_INVALID == get_mark(h->b->head[old_idx]));
1671
1672	if (&sentinel != get_next(h->b->head[old_idx]))
1673	h->b->head[old_idx] = make_link(&sentinel, N_INVALID);
1674	}
1675	}
1676
1677	/* todo comment join node vs reset joinee head*/
1678	rcu_synchronize();
1679
1680	size_t new_bucket_cnt = (1 << h->new_b->order);
1681
1682	/* Clear the JOIN mark and GC any deleted join nodes. */
1683	for (size_t new_idx = 0; new_idx < new_bucket_cnt; ++new_idx) {
1684	cleanup_join_node(h, &h->new_b->head[new_idx]);
1685	}
1686
1687	/* Wait for everyone to see that the table is clear of any resize marks. */
1688	rcu_synchronize();
1689
1690	cht_buckets_t *old_b = h->b;
1691	rcu_assign(h->b, h->new_b);
1692
1693	/* Wait for everyone to start using the new table. */
1694	rcu_synchronize();
1695
1696	free(old_b);
1697
1698	/* Not needed; just for increased readability. */
1699	h->new_b = 0;
1700	}
1701
1702	static void cleanup_join_node(cht_t h, marked_ptr_t new_head)
1703	{
1704	rcu_read_lock();
1705
1706	cht_link_t cur = get_next(new_head);
1707
1708	while (cur != &sentinel) {
1709	/* Clear the join node's JN mark - even if it is marked as deleted. */
1710	if (N_JOIN & get_mark(cur->link)) {
1711	clear_join_and_gc(h, cur, new_head);
1712	break;
1713	}
1714
1715	cur = get_next(cur->link);
1716	}
1717
1718	rcu_read_unlock();
1719	}
1720
1721	static void clear_join_and_gc(cht_t h, cht_link_t join_node,
1722	marked_ptr_t *new_head)
1723	{
1724	ASSERT(join_node != &sentinel);
1725	ASSERT(join_node && (N_JOIN & get_mark(join_node->link)));
1726
1727	bool done;
1728
1729	/* Clear the JN mark. */
1730	do {
1731	marked_ptr_t jn_link = join_node->link;
1732	cht_link_t *next = get_next(jn_link);
1733	/* Clear the JOIN mark but keep the DEL mark if present. */
1734	mark_t cleared_mark = get_mark(jn_link) & N_DELETED;
1735
1736	marked_ptr_t ret =
1737	_cas_link(&join_node->link, jn_link, make_link(next, cleared_mark));
1738
1739	/* Done if the mark was cleared. Retry if a new node was inserted. */
1740	done = (ret == jn_link);
1741	ASSERT(ret == jn_link \|\| (get_mark(ret) & N_JOIN));
1742	} while (!done);
1743
1744	if (!(N_DELETED & get_mark(join_node->link)))
1745	return;
1746
1747	/* The join node had been marked as deleted - GC it. */
1748
1749	/* Clear the JOIN mark before trying to unlink the deleted join node.*/
1750	cas_order_barrier();
1751
1752	size_t jn_hash = node_hash(h, join_node);
1753	do {
1754	bool resizing = false;
1755
1756	wnd_t wnd = {
1757	.ppred = new_head,
1758	.cur = get_next(*new_head)
1759	};
1760
1761	done = find_wnd_and_gc_pred(h, jn_hash, WM_NORMAL, same_node_pred,
1762	join_node, &wnd, &resizing);
1763
1764	ASSERT(!resizing);
1765	} while (!done);
1766	}
1767
1768	static void cleanup_join_follows(cht_t h, marked_ptr_t new_head)
1769	{
1770	ASSERT(new_head);
1771
1772	rcu_read_lock();
1773
1774	wnd_t wnd = {
1775	.ppred = 0,
1776	.cur = 0
1777	};
1778	marked_ptr_t *cur_link = new_head;
1779
1780	/*
1781	* Find the non-deleted node with a JF mark and clear the JF mark.
1782	* The JF node may be deleted and/or the mark moved to its neighbors
1783	* at any time. Therefore, we GC deleted nodes until we find the JF
1784	* node in order to remove stale/deleted JF nodes left behind eg by
1785	* delayed threads that did not yet get a chance to unlink the deleted
1786	* JF node and move its mark.
1787	*
1788	* Note that the head may be marked JF (but never DELETED).
1789	*/
1790	while (true) {
1791	bool is_jf_node = N_JOIN_FOLLOWS & get_mark(*cur_link);
1792
1793	/* GC any deleted nodes on the way - even deleted JOIN_FOLLOWS. */
1794	if (N_DELETED & get_mark(*cur_link)) {
1795	ASSERT(cur_link != new_head);
1796	ASSERT(wnd.ppred && wnd.cur && wnd.cur != &sentinel);
1797	ASSERT(cur_link == &wnd.cur->link);
1798
1799	bool dummy;
1800	bool deleted = gc_deleted_node(h, WM_MOVE_JOIN_FOLLOWS, &wnd, &dummy);
1801
1802	/* Failed to GC or collected a deleted JOIN_FOLLOWS. */
1803	if (!deleted \|\| is_jf_node) {
1804	/* Retry from the head of the bucket. */
1805	cur_link = new_head;
1806	continue;
1807	}
1808	} else {
1809	/* Found a non-deleted JF. Clear its JF mark. */
1810	if (is_jf_node) {
1811	cht_link_t next = get_next(cur_link);
1812	marked_ptr_t ret =
1813	cas_link(cur_link, next, N_JOIN_FOLLOWS, &sentinel, N_NORMAL);
1814
1815	ASSERT(next == &sentinel
1816	\|\| ((N_JOIN \| N_JOIN_FOLLOWS) & get_mark(ret)));
1817
1818	/* Successfully cleared the JF mark of a non-deleted node. */
1819	if (ret == make_link(next, N_JOIN_FOLLOWS)) {
1820	break;
1821	} else {
1822	/*
1823	* The JF node had been deleted or a new node inserted
1824	* right after it. Retry from the head.
1825	*/
1826	cur_link = new_head;
1827	continue;
1828	}
1829	} else {
1830	wnd.ppred = cur_link;
1831	wnd.cur = get_next(*cur_link);
1832	}
1833	}
1834
1835	/* We must encounter a JF node before we reach the end of the bucket. */
1836	ASSERT(wnd.cur && wnd.cur != &sentinel);
1837	cur_link = &wnd.cur->link;
1838	}
1839
1840	rcu_read_unlock();
1841	}
1842
1843
1844	static inline size_t calc_split_hash(size_t split_idx, size_t order)
1845	{
1846	ASSERT(1 <= order && order <= 8 * sizeof(size_t));
1847	return split_idx << (8 * sizeof(size_t) - order);
1848	}
1849
1850	static inline size_t calc_bucket_idx(size_t hash, size_t order)
1851	{
1852	ASSERT(1 <= order && order <= 8 * sizeof(size_t));
1853	return hash >> (8 * sizeof(size_t) - order);
1854	}
1855
1856	static inline size_t grow_to_split_idx(size_t old_idx)
1857	{
1858	return grow_idx(old_idx) \| 1;
1859	}
1860
1861	static inline size_t grow_idx(size_t idx)
1862	{
1863	return idx << 1;
1864	}
1865
1866	static inline size_t shrink_idx(size_t idx)
1867	{
1868	return idx >> 1;
1869	}
1870
1871	static inline size_t calc_key_hash(cht_t h, void key)
1872	{
1873	/* Mimic calc_node_hash. */
1874	return hash_mix(h->op->key_hash(key)) & ~(size_t)1;
1875	}
1876
1877	static inline size_t node_hash(cht_t h, const cht_link_t item)
1878	{
1879	ASSERT(item->hash == h->invalid_hash
1880	\|\| item->hash == sentinel.hash
1881	\|\| item->hash == calc_node_hash(h, item));
1882
1883	return item->hash;
1884	}
1885
1886	static inline size_t calc_node_hash(cht_t h, const cht_link_t item)
1887	{
1888	ASSERT(item != &sentinel);
1889	/*
1890	* Clear the lowest order bit in order for sentinel's node hash
1891	* to be the greatest possible.
1892	*/
1893	return hash_mix(h->op->hash(item)) & ~(size_t)1;
1894	}
1895
1896	static inline void memoize_node_hash(cht_t h, cht_link_t item)
1897	{
1898	item->hash = calc_node_hash(h, item);
1899	}
1900
1901	static inline marked_ptr_t make_link(const cht_link_t *next, mark_t mark)
1902	{
1903	marked_ptr_t ptr = (marked_ptr_t) next;
1904
1905	ASSERT(!(ptr & N_MARK_MASK));
1906	ASSERT(!((unsigned)mark & ~N_MARK_MASK));
1907
1908	return ptr \| mark;
1909	}
1910
1911
1912	static inline cht_link_t * get_next(marked_ptr_t link)
1913	{
1914	return (cht_link_t*)(link & ~N_MARK_MASK);
1915	}
1916
1917
1918	static inline mark_t get_mark(marked_ptr_t link)
1919	{
1920	return (mark_t)(link & N_MARK_MASK);
1921	}
1922
1923
1924	static inline void next_wnd(wnd_t *wnd)
1925	{
1926	ASSERT(wnd);
1927	ASSERT(wnd->cur);
1928
1929	wnd->last = wnd->cur;
1930	wnd->ppred = &wnd->cur->link;
1931	wnd->cur = get_next(wnd->cur->link);
1932	}
1933
1934
1935	static bool same_node_pred(void node, const cht_link_t item2)
1936	{
1937	const cht_link_t item1 = (const cht_link_t) node;
1938	return item1 == item2;
1939	}
1940
1941	static inline marked_ptr_t cas_link(marked_ptr_t link, const cht_link_t cur_next,
1942	mark_t cur_mark, const cht_link_t *new_next, mark_t new_mark)
1943	{
1944	return _cas_link(link, make_link(cur_next, cur_mark),
1945	make_link(new_next, new_mark));
1946	}
1947
1948	static inline marked_ptr_t _cas_link(marked_ptr_t *link, marked_ptr_t cur,
1949	marked_ptr_t new)
1950	{
1951	ASSERT(link != &sentinel.link);
1952	/*
1953	* cas(x) on the same location x on one cpu must be ordered, but do not
1954	* have to be ordered wrt to other cas(y) to a different location y
1955	* on the same cpu.
1956	*
1957	* cas(x) must act as a write barrier on x, ie if cas(x) succeeds
1958	* and is observed by another cpu, then all cpus must be able to
1959	* make the effects of cas(x) visible just by issuing a load barrier.
1960	* For example:
1961	* cpu1 cpu2 cpu3
1962	* cas(x, 0 -> 1), succeeds
1963	* cas(x, 0 -> 1), fails
1964	* MB
1965	* y = 7
1966	* sees y == 7
1967	* loadMB must be enough to make cas(x) on cpu3 visible to cpu1, ie x == 1.
1968	*
1969	* If cas() did not work this way:
1970	* - our head move protocol would not be correct.
1971	* - freeing an item linked to a moved head after another item was
1972	* inserted in front of it, would require more than one grace period.
1973	*/
1974	void ret = atomic_cas_ptr((void)link, (void )cur, (void *)new);
1975	return (marked_ptr_t) ret;
1976	}
1977
1978	static inline void cas_order_barrier(void)
1979	{
1980	/* Make sure CAS to different memory locations are ordered. */
1981	write_barrier();
1982	}
1983
1984
1985	/** @}
1986	*/

Note: See TracBrowser for help on using the repository browser.

Download in other formats: