source: mainline/generic/src/adt/btree.c@ 50fe620

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since 50fe620 was cc27ae48, checked in by Jakub Jermar <jakub@…>, 20 years ago

Try to avoid splitting full B+-tree nodes by trying left or right rotation first.
(This improved memory consumption of this algorithm by some 40% - meassured on 101-item set).

  • Property mode set to 100644
File size: 18.8 KB
Line 
1/*
2 * Copyright (C) 2006 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * This B-tree has the following properties:
31 * - it is a ballanced 2-3-4 tree (i.e. BTREE_M = 4)
32 * - values (i.e. pointers to values) are stored only in leaves
33 * - leaves are linked in a list
34 * - technically, it is a B+-tree (because of the previous properties)
35 *
36 * Be carefull when using these trees. They need to allocate
37 * and deallocate memory for their index nodes and as such
38 * can sleep.
39 */
40
41#include <adt/btree.h>
42#include <adt/list.h>
43#include <mm/slab.h>
44#include <debug.h>
45#include <panic.h>
46#include <typedefs.h>
47#include <print.h>
48
49static void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node);
50static void node_initialize(btree_node_t *node);
51static void node_insert_key_left(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
52static void node_insert_key_right(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
53static btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median);
54static void node_remove_key_left(btree_node_t *node, __native key);
55static void node_remove_key_right(btree_node_t *node, __native key);
56static index_t find_key_by_subtree(btree_node_t *node, btree_node_t *subtree, bool right);
57static bool try_insert_by_left_rotation(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
58static bool try_insert_by_right_rotation(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
59
60#define ROOT_NODE(n) (!(n)->parent)
61#define INDEX_NODE(n) ((n)->subtree[0] != NULL)
62#define LEAF_NODE(n) ((n)->subtree[0] == NULL)
63
64#define MEDIAN_LOW_INDEX(n) (((n)->keys-1)/2)
65#define MEDIAN_HIGH_INDEX(n) ((n)->keys/2)
66#define MEDIAN_LOW(n) ((n)->key[MEDIAN_LOW_INDEX((n))]);
67#define MEDIAN_HIGH(n) ((n)->key[MEDIAN_HIGH_INDEX((n))]);
68
69/** Create empty B-tree.
70 *
71 * @param t B-tree.
72 */
73void btree_create(btree_t *t)
74{
75 list_initialize(&t->leaf_head);
76 t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
77 node_initialize(t->root);
78 list_append(&t->root->leaf_link, &t->leaf_head);
79}
80
81/** Destroy empty B-tree. */
82void btree_destroy(btree_t *t)
83{
84 ASSERT(!t->root->keys);
85 free(t->root);
86}
87
88/** Insert key-value pair into B-tree.
89 *
90 * @param t B-tree.
91 * @param key Key to be inserted.
92 * @param value Value to be inserted.
93 * @param leaf_node Leaf node where the insertion should begin.
94 */
95void btree_insert(btree_t *t, __native key, void *value, btree_node_t *leaf_node)
96{
97 btree_node_t *lnode;
98
99 ASSERT(value);
100
101 lnode = leaf_node;
102 if (!lnode) {
103 if (btree_search(t, key, &lnode)) {
104 panic("B-tree %P already contains key %d\n", t, key);
105 }
106 }
107
108 _btree_insert(t, key, value, NULL, lnode);
109}
110
111/** Recursively insert into B-tree.
112 *
113 * @param t B-tree.
114 * @param key Key to be inserted.
115 * @param value Value to be inserted.
116 * @param rsubtree Right subtree of the inserted key.
117 * @param node Start inserting into this node.
118 */
119void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node)
120{
121 if (node->keys < BTREE_MAX_KEYS) {
122 /*
123 * Node conatins enough space, the key can be stored immediately.
124 */
125 node_insert_key_right(node, key, value, rsubtree);
126 } else if (try_insert_by_left_rotation(node, key, value, rsubtree)) {
127 /*
128 * The key-value-rsubtree triplet has been inserted because
129 * some keys could have been moved to the left sibling.
130 */
131 } else if (try_insert_by_right_rotation(node, key, value, rsubtree)) {
132 /*
133 * The key-value-rsubtree triplet has been inserted because
134 * some keys could have been moved to the right sibling.
135 */
136 } else {
137 btree_node_t *rnode;
138 __native median;
139
140 /*
141 * Node is full and both siblings (if both exist) are full too.
142 * Split the node and insert the smallest key from the node containing
143 * bigger keys (i.e. the new node) into its parent.
144 */
145
146 rnode = node_split(node, key, value, rsubtree, &median);
147
148 if (LEAF_NODE(node)) {
149 list_append(&rnode->leaf_link, &node->leaf_link);
150 }
151
152 if (ROOT_NODE(node)) {
153 /*
154 * We split the root node. Create new root.
155 */
156 t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
157 node->parent = t->root;
158 rnode->parent = t->root;
159 node_initialize(t->root);
160
161 /*
162 * Left-hand side subtree will be the old root (i.e. node).
163 * Right-hand side subtree will be rnode.
164 */
165 t->root->subtree[0] = node;
166
167 t->root->depth = node->depth + 1;
168 }
169 _btree_insert(t, median, NULL, rnode, node->parent);
170 }
171
172}
173
174/* TODO */
175void btree_remove(btree_t *t, __native key)
176{
177}
178
179/** Search key in a B-tree.
180 *
181 * @param t B-tree.
182 * @param key Key to be searched.
183 * @param leaf_node Address where to put pointer to visited leaf node.
184 *
185 * @return Pointer to value or NULL if there is no such key.
186 */
187void *btree_search(btree_t *t, __native key, btree_node_t **leaf_node)
188{
189 btree_node_t *cur, *next;
190
191 /*
192 * Iteratively descend to the leaf that can contain the searched key.
193 */
194 for (cur = t->root; cur; cur = next) {
195
196 /* Last iteration will set this with proper leaf node address. */
197 *leaf_node = cur;
198
199 /*
200 * The key can be in the leftmost subtree.
201 * Test it separately.
202 */
203 if (key < cur->key[0]) {
204 next = cur->subtree[0];
205 continue;
206 } else {
207 void *val;
208 int i;
209
210 /*
211 * Now if the key is smaller than cur->key[i]
212 * it can only mean that the value is in cur->subtree[i]
213 * or it is not in the tree at all.
214 */
215 for (i = 1; i < cur->keys; i++) {
216 if (key < cur->key[i]) {
217 next = cur->subtree[i];
218 val = cur->value[i - 1];
219
220 if (LEAF_NODE(cur))
221 return key == cur->key[i - 1] ? val : NULL;
222
223 goto descend;
224 }
225 }
226
227 /*
228 * Last possibility is that the key is in the rightmost subtree.
229 */
230 next = cur->subtree[i];
231 val = cur->value[i - 1];
232 if (LEAF_NODE(cur))
233 return key == cur->key[i - 1] ? val : NULL;
234 }
235 descend:
236 ;
237 }
238
239 /*
240 * The key was not found in the *leaf_node and is smaller than any of its keys.
241 */
242 return NULL;
243}
244
245/** Get pointer to value with the smallest key within the node.
246 *
247 * Can be only used on leaf-level nodes.
248 *
249 * @param node B-tree node.
250 *
251 * @return Pointer to value assiciated with the smallest key.
252 */
253void *btree_node_min(btree_node_t *node)
254{
255 ASSERT(LEAF_NODE(node));
256 ASSERT(node->keys);
257 return node->value[0];
258}
259
260/** Get pointer to value with the biggest key within the node.
261 *
262 * Can be only used on leaf-level nodes.
263 *
264 * @param node B-tree node.
265 *
266 * @return Pointer to value assiciated with the biggest key.
267 */
268void *btree_node_max(btree_node_t *node)
269{
270 ASSERT(LEAF_NODE(node));
271 ASSERT(node->keys);
272 return node->value[node->keys - 1];
273}
274
275/** Initialize B-tree node.
276 *
277 * @param node B-tree node.
278 */
279void node_initialize(btree_node_t *node)
280{
281 int i;
282
283 node->keys = 0;
284
285 /* Clean also space for the extra key. */
286 for (i = 0; i < BTREE_MAX_KEYS + 1; i++) {
287 node->key[i] = 0;
288 node->value[i] = NULL;
289 node->subtree[i] = NULL;
290 }
291 node->subtree[i] = NULL;
292
293 node->parent = NULL;
294
295 link_initialize(&node->leaf_link);
296
297 link_initialize(&node->bfs_link);
298 node->depth = 0;
299}
300
301/** Insert key-value-lsubtree triplet into B-tree node.
302 *
303 * It is actually possible to have more keys than BTREE_MAX_KEYS.
304 * This feature is used during insert by right rotation.
305 *
306 * @param node B-tree node into wich the new key is to be inserted.
307 * @param key The key to be inserted.
308 * @param value Pointer to value to be inserted.
309 * @param lsubtree Pointer to the left subtree.
310 */
311void node_insert_key_left(btree_node_t *node, __native key, void *value, btree_node_t *lsubtree)
312{
313 int i;
314
315 for (i = 0; i < node->keys; i++) {
316 if (key < node->key[i]) {
317 int j;
318
319 for (j = node->keys; j > i; j--) {
320 node->key[j] = node->key[j - 1];
321 node->value[j] = node->value[j - 1];
322 node->subtree[j + 1] = node->subtree[j];
323 }
324 node->subtree[j + 1] = node->subtree[j];
325 break;
326 }
327 }
328 node->key[i] = key;
329 node->value[i] = value;
330 node->subtree[i] = lsubtree;
331
332 node->keys++;
333}
334
335
336/** Insert key-value-rsubtree triplet into B-tree node.
337 *
338 * It is actually possible to have more keys than BTREE_MAX_KEYS.
339 * This feature is used during splitting the node when the
340 * number of keys is BTREE_MAX_KEYS + 1. Insert by left rotation
341 * also makes use of this feature.
342 *
343 * @param node B-tree node into wich the new key is to be inserted.
344 * @param key The key to be inserted.
345 * @param value Pointer to value to be inserted.
346 * @param rsubtree Pointer to the right subtree.
347 */
348void node_insert_key_right(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree)
349{
350 int i;
351
352 for (i = 0; i < node->keys; i++) {
353 if (key < node->key[i]) {
354 int j;
355
356 for (j = node->keys; j > i; j--) {
357 node->key[j] = node->key[j - 1];
358 node->value[j] = node->value[j - 1];
359 node->subtree[j + 1] = node->subtree[j];
360 }
361 break;
362 }
363 }
364 node->key[i] = key;
365 node->value[i] = value;
366 node->subtree[i + 1] = rsubtree;
367
368 node->keys++;
369}
370
371/** Split full B-tree node and insert new key-value-right-subtree triplet.
372 *
373 * This function will split a node and return pointer to a newly created
374 * node containing keys greater than or equal to the greater of medians
375 * (or median) of the old keys and the newly added key. It will also write
376 * the median key to a memory address supplied by the caller.
377 *
378 * If the node being split is an index node, the median will not be
379 * included in the new node. If the node is a leaf node,
380 * the median will be copied there.
381 *
382 * @param node B-tree node wich is going to be split.
383 * @param key The key to be inserted.
384 * @param value Pointer to the value to be inserted.
385 * @param rsubtree Pointer to the right subtree of the key being added.
386 * @param median Address in memory, where the median key will be stored.
387 *
388 * @return Newly created right sibling of node.
389 */
390btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median)
391{
392 btree_node_t *rnode;
393 int i, j;
394
395 ASSERT(median);
396 ASSERT(node->keys == BTREE_MAX_KEYS);
397
398 /*
399 * Use the extra space to store the extra node.
400 */
401 node_insert_key_right(node, key, value, rsubtree);
402
403 /*
404 * Compute median of keys.
405 */
406 *median = MEDIAN_HIGH(node);
407
408 /*
409 * Allocate and initialize new right sibling.
410 */
411 rnode = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
412 node_initialize(rnode);
413 rnode->parent = node->parent;
414 rnode->depth = node->depth;
415
416 /*
417 * Copy big keys, values and subtree pointers to the new right sibling.
418 * If this is an index node, do not copy the median.
419 */
420 i = (int) INDEX_NODE(node);
421 for (i += MEDIAN_HIGH_INDEX(node), j = 0; i < node->keys; i++, j++) {
422 rnode->key[j] = node->key[i];
423 rnode->value[j] = node->value[i];
424 rnode->subtree[j] = node->subtree[i];
425
426 /*
427 * Fix parent links in subtrees.
428 */
429 if (rnode->subtree[j])
430 rnode->subtree[j]->parent = rnode;
431
432 }
433 rnode->subtree[j] = node->subtree[i];
434 if (rnode->subtree[j])
435 rnode->subtree[j]->parent = rnode;
436
437 rnode->keys = j; /* Set number of keys of the new node. */
438 node->keys /= 2; /* Shrink the old node. */
439
440 return rnode;
441}
442
443/** Remove key and its left subtree pointer from B-tree node.
444 *
445 * Remove the key and eliminate gaps in node->key array.
446 * Note that the value pointer and the left subtree pointer
447 * is removed from the node as well.
448 *
449 * @param node B-tree node.
450 * @param key Key to be removed.
451 */
452void node_remove_key_left(btree_node_t *node, __native key)
453{
454 int i, j;
455
456 for (i = 0; i < node->keys; i++) {
457 if (key == node->key[i]) {
458 for (j = i + 1; j < node->keys; j++) {
459 node->key[j - 1] = node->key[j];
460 node->value[j - 1] = node->value[j];
461 node->subtree[j - 1] = node->subtree[j];
462 }
463 node->subtree[j - 1] = node->subtree[j];
464 node->keys--;
465 return;
466 }
467 }
468 panic("node %P does not contain key %d\n", node, key);
469}
470
471/** Remove key and its right subtree pointer from B-tree node.
472 *
473 * Remove the key and eliminate gaps in node->key array.
474 * Note that the value pointer and the right subtree pointer
475 * is removed from the node as well.
476 *
477 * @param node B-tree node.
478 * @param key Key to be removed.
479 */
480void node_remove_key_right(btree_node_t *node, __native key)
481{
482 int i, j;
483
484 for (i = 0; i < node->keys; i++) {
485 if (key == node->key[i]) {
486 for (j = i + 1; j < node->keys; j++) {
487 node->key[j - 1] = node->key[j];
488 node->value[j - 1] = node->value[j];
489 node->subtree[j] = node->subtree[j + 1];
490 }
491 node->keys--;
492 return;
493 }
494 }
495 panic("node %P does not contain key %d\n", node, key);
496}
497
498/** Find key by its left or right subtree.
499 *
500 * @param node B-tree node.
501 * @param subtree Left or right subtree of a key found in node.
502 * @param right If true, subtree is a right subtree. If false, subtree is a left subtree.
503 *
504 * @return Index of the key associated with the subtree.
505 */
506index_t find_key_by_subtree(btree_node_t *node, btree_node_t *subtree, bool right)
507{
508 int i;
509
510 for (i = 0; i < node->keys + 1; i++) {
511 if (subtree == node->subtree[i])
512 return i - (int) (right != false);
513 }
514 panic("node %P does not contain subtree %P\n", node, subtree);
515}
516
517/** Insert key-value-rsubtree triplet and rotate the node to the left, if this operation can be done.
518 *
519 * Left sibling of the node (if it exists) is checked for free space.
520 * If there is free space, the key is inserted and the smallest key of
521 * the node is moved there. The index node which is the parent of both
522 * nodes is fixed.
523 *
524 * @param node B-tree node.
525 * @param inskey Key to be inserted.
526 * @param insvalue Value to be inserted.
527 * @param rsubtree Right subtree of inskey.
528 *
529 * @return True if the rotation was performed, false otherwise.
530 */
531bool try_insert_by_left_rotation(btree_node_t *node, __native inskey, void *insvalue, btree_node_t *rsubtree)
532{
533 index_t idx;
534 btree_node_t *lnode;
535
536 /*
537 * If this is root node, the rotation can not be done.
538 */
539 if (ROOT_NODE(node))
540 return false;
541
542 idx = find_key_by_subtree(node->parent, node, true);
543 if ((int) idx == -1) {
544 /*
545 * If this node is the leftmost subtree of its parent,
546 * the rotation can not be done.
547 */
548 return false;
549 }
550
551 lnode = node->parent->subtree[idx];
552
553 if (lnode->keys < BTREE_MAX_KEYS) {
554 __native key;
555
556 /*
557 * The rotaion can be done. The left sibling has free space.
558 */
559
560 node_insert_key_right(node, inskey, insvalue, rsubtree);
561 key = node->key[0];
562
563 if (LEAF_NODE(node)) {
564 void *value;
565
566 value = node->value[0];
567 node_remove_key_left(node, key);
568 node_insert_key_right(lnode, key, value, NULL);
569 node->parent->key[idx] = node->key[0];
570 } else {
571 btree_node_t *lsubtree;
572
573 lsubtree = node->subtree[0];
574 node_remove_key_left(node, key);
575 node_insert_key_right(lnode, node->parent->key[idx], NULL, lsubtree);
576 node->parent->key[idx] = key;
577
578 /* Fix parent link of the reconnected left subtree. */
579 lsubtree->parent = lnode;
580 }
581 return true;
582 }
583
584 return false;
585}
586
587/** Insert key-value-rsubtree triplet and rotate the node to the right, if this operation can be done.
588 *
589 * Right sibling of the node (if it exists) is checked for free space.
590 * If there is free space, the key is inserted and the biggest key of
591 * the node is moved there. The index node which is the parent of both
592 * nodes is fixed.
593 *
594 * @param node B-tree node.
595 * @param inskey Key to be inserted.
596 * @param insvalue Value to be inserted.
597 * @param rsubtree Right subtree of inskey.
598 *
599 * @return True if the rotation was performed, false otherwise.
600 */
601bool try_insert_by_right_rotation(btree_node_t *node, __native inskey, void *insvalue, btree_node_t *rsubtree)
602{
603 index_t idx;
604 btree_node_t *rnode;
605
606 /*
607 * If this is root node, the rotation can not be done.
608 */
609 if (ROOT_NODE(node))
610 return false;
611
612 idx = find_key_by_subtree(node->parent, node, false);
613 if (idx == node->parent->keys) {
614 /*
615 * If this node is the rightmost subtree of its parent,
616 * the rotation can not be done.
617 */
618 return false;
619 }
620
621 rnode = node->parent->subtree[idx + 1];
622
623 if (rnode->keys < BTREE_MAX_KEYS) {
624 __native key;
625
626 /*
627 * The rotaion can be done. The right sibling has free space.
628 */
629
630 node_insert_key_right(node, inskey, insvalue, rsubtree);
631 key = node->key[node->keys - 1];
632
633 if (LEAF_NODE(node)) {
634 void *value;
635
636 value = node->value[node->keys - 1];
637 node_remove_key_right(node, key);
638 node_insert_key_left(rnode, key, value, NULL);
639 node->parent->key[idx] = key;
640 } else {
641 btree_node_t *rsubt;
642
643 rsubt = node->subtree[node->keys];
644 node_remove_key_right(node, key);
645 node_insert_key_left(rnode, node->parent->key[idx], NULL, rsubt);
646 node->parent->key[idx] = key;
647
648 /* Fix parent link of the reconnected right subtree. */
649 rsubt->parent = rnode;
650 }
651 return true;
652 }
653
654 return false;
655}
656
657/** Print B-tree.
658 *
659 * @param t Print out B-tree.
660 */
661void btree_print(btree_t *t)
662{
663 int i, depth = t->root->depth;
664 link_t head;
665
666 list_initialize(&head);
667 list_append(&t->root->bfs_link, &head);
668
669 /*
670 * Use BFS search to print out the tree.
671 * Levels are distinguished from one another by node->depth.
672 */
673 while (!list_empty(&head)) {
674 link_t *hlp;
675 btree_node_t *node;
676
677 hlp = head.next;
678 ASSERT(hlp != &head);
679 node = list_get_instance(hlp, btree_node_t, bfs_link);
680 list_remove(hlp);
681
682 ASSERT(node);
683
684 if (node->depth != depth) {
685 printf("\n");
686 depth = node->depth;
687 }
688
689 printf("(");
690 for (i = 0; i < node->keys; i++) {
691 printf("%d,", node->key[i]);
692 if (node->depth && node->subtree[i]) {
693 list_append(&node->subtree[i]->bfs_link, &head);
694 }
695 }
696 if (node->depth && node->subtree[i]) {
697 list_append(&node->subtree[i]->bfs_link, &head);
698 }
699 printf(")");
700 }
701 printf("\n");
702}
Note: See TracBrowser for help on using the repository browser.