source: mainline/generic/src/adt/btree.c@ ca687ad

lfn serial ticket/834-toolchain-update topic/msim-upgrade topic/simplify-dev-export
Last change on this file since ca687ad was 296cc1b, checked in by Jakub Jermar <jakub@…>, 19 years ago

Change B+-tree from 2-3-4 tree to 2-3-4-5 tree by adding space for the fourth key.
This should make key removal easier.

  • Property mode set to 100644
File size: 19.2 KB
Line 
1/*
2 * Copyright (C) 2006 Jakub Jermar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 *
9 * - Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * - Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * - The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29/*
30 * This B-tree has the following properties:
31 * - it is a ballanced 2-3-4-5 tree (i.e. BTREE_M = 5)
32 * - values (i.e. pointers to values) are stored only in leaves
33 * - leaves are linked in a list
34 * - technically, it is a B+-tree (because of the previous properties)
35 *
36 * Be carefull when using these trees. They need to allocate
37 * and deallocate memory for their index nodes and as such
38 * can sleep.
39 */
40
41#include <adt/btree.h>
42#include <adt/list.h>
43#include <mm/slab.h>
44#include <debug.h>
45#include <panic.h>
46#include <typedefs.h>
47#include <print.h>
48
49static void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node);
50static void node_initialize(btree_node_t *node);
51static void node_insert_key_left(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
52static void node_insert_key_right(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
53static btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median);
54static void node_remove_key_left(btree_node_t *node, __native key);
55static void node_remove_key_right(btree_node_t *node, __native key);
56static index_t find_key_by_subtree(btree_node_t *node, btree_node_t *subtree, bool right);
57static bool try_insert_by_left_rotation(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
58static bool try_insert_by_right_rotation(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
59
60#define ROOT_NODE(n) (!(n)->parent)
61#define INDEX_NODE(n) ((n)->subtree[0] != NULL)
62#define LEAF_NODE(n) ((n)->subtree[0] == NULL)
63
64#define FILL_FACTOR ((BTREE_M-1)/2)
65
66#define MEDIAN_LOW_INDEX(n) (((n)->keys-1)/2)
67#define MEDIAN_HIGH_INDEX(n) ((n)->keys/2)
68#define MEDIAN_LOW(n) ((n)->key[MEDIAN_LOW_INDEX((n))]);
69#define MEDIAN_HIGH(n) ((n)->key[MEDIAN_HIGH_INDEX((n))]);
70
71/** Create empty B-tree.
72 *
73 * @param t B-tree.
74 */
75void btree_create(btree_t *t)
76{
77 list_initialize(&t->leaf_head);
78 t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
79 node_initialize(t->root);
80 list_append(&t->root->leaf_link, &t->leaf_head);
81}
82
83/** Destroy empty B-tree. */
84void btree_destroy(btree_t *t)
85{
86 ASSERT(!t->root->keys);
87 free(t->root);
88}
89
90/** Insert key-value pair into B-tree.
91 *
92 * @param t B-tree.
93 * @param key Key to be inserted.
94 * @param value Value to be inserted.
95 * @param leaf_node Leaf node where the insertion should begin.
96 */
97void btree_insert(btree_t *t, __native key, void *value, btree_node_t *leaf_node)
98{
99 btree_node_t *lnode;
100
101 ASSERT(value);
102
103 lnode = leaf_node;
104 if (!lnode) {
105 if (btree_search(t, key, &lnode)) {
106 panic("B-tree %P already contains key %d\n", t, key);
107 }
108 }
109
110 _btree_insert(t, key, value, NULL, lnode);
111}
112
113/** Recursively insert into B-tree.
114 *
115 * @param t B-tree.
116 * @param key Key to be inserted.
117 * @param value Value to be inserted.
118 * @param rsubtree Right subtree of the inserted key.
119 * @param node Start inserting into this node.
120 */
121void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node)
122{
123 if (node->keys < BTREE_MAX_KEYS) {
124 /*
125 * Node conatins enough space, the key can be stored immediately.
126 */
127 node_insert_key_right(node, key, value, rsubtree);
128 } else if (try_insert_by_left_rotation(node, key, value, rsubtree)) {
129 /*
130 * The key-value-rsubtree triplet has been inserted because
131 * some keys could have been moved to the left sibling.
132 */
133 } else if (try_insert_by_right_rotation(node, key, value, rsubtree)) {
134 /*
135 * The key-value-rsubtree triplet has been inserted because
136 * some keys could have been moved to the right sibling.
137 */
138 } else {
139 btree_node_t *rnode;
140 __native median;
141
142 /*
143 * Node is full and both siblings (if both exist) are full too.
144 * Split the node and insert the smallest key from the node containing
145 * bigger keys (i.e. the new node) into its parent.
146 */
147
148 rnode = node_split(node, key, value, rsubtree, &median);
149
150 if (LEAF_NODE(node)) {
151 list_append(&rnode->leaf_link, &node->leaf_link);
152 }
153
154 if (ROOT_NODE(node)) {
155 /*
156 * We split the root node. Create new root.
157 */
158 t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
159 node->parent = t->root;
160 rnode->parent = t->root;
161 node_initialize(t->root);
162
163 /*
164 * Left-hand side subtree will be the old root (i.e. node).
165 * Right-hand side subtree will be rnode.
166 */
167 t->root->subtree[0] = node;
168
169 t->root->depth = node->depth + 1;
170 }
171 _btree_insert(t, median, NULL, rnode, node->parent);
172 }
173
174}
175
176/** Remove B-tree node.
177 *
178 * @param B-tree.
179 * @param key Key to be removed from the B-tree along with its associated value.
180 * @param leaf_node If not NULL, pointer to the leaf node where the key is found.
181 */
182void btree_remove(btree_t *t, __native key, btree_node_t *leaf_node)
183{
184 btree_node_t *lnode;
185
186 lnode = leaf_node;
187 if (!lnode) {
188 if (!btree_search(t, key, &lnode)) {
189 panic("B-tree %P does not contain key %d\n", t, key);
190 }
191 }
192
193 /* TODO */
194
195}
196
197/** Search key in a B-tree.
198 *
199 * @param t B-tree.
200 * @param key Key to be searched.
201 * @param leaf_node Address where to put pointer to visited leaf node.
202 *
203 * @return Pointer to value or NULL if there is no such key.
204 */
205void *btree_search(btree_t *t, __native key, btree_node_t **leaf_node)
206{
207 btree_node_t *cur, *next;
208
209 /*
210 * Iteratively descend to the leaf that can contain the searched key.
211 */
212 for (cur = t->root; cur; cur = next) {
213
214 /* Last iteration will set this with proper leaf node address. */
215 *leaf_node = cur;
216
217 /*
218 * The key can be in the leftmost subtree.
219 * Test it separately.
220 */
221 if (key < cur->key[0]) {
222 next = cur->subtree[0];
223 continue;
224 } else {
225 void *val;
226 int i;
227
228 /*
229 * Now if the key is smaller than cur->key[i]
230 * it can only mean that the value is in cur->subtree[i]
231 * or it is not in the tree at all.
232 */
233 for (i = 1; i < cur->keys; i++) {
234 if (key < cur->key[i]) {
235 next = cur->subtree[i];
236 val = cur->value[i - 1];
237
238 if (LEAF_NODE(cur))
239 return key == cur->key[i - 1] ? val : NULL;
240
241 goto descend;
242 }
243 }
244
245 /*
246 * Last possibility is that the key is in the rightmost subtree.
247 */
248 next = cur->subtree[i];
249 val = cur->value[i - 1];
250 if (LEAF_NODE(cur))
251 return key == cur->key[i - 1] ? val : NULL;
252 }
253 descend:
254 ;
255 }
256
257 /*
258 * The key was not found in the *leaf_node and is smaller than any of its keys.
259 */
260 return NULL;
261}
262
263/** Get pointer to value with the smallest key within the node.
264 *
265 * Can be only used on leaf-level nodes.
266 *
267 * @param node B-tree node.
268 *
269 * @return Pointer to value assiciated with the smallest key.
270 */
271void *btree_node_min(btree_node_t *node)
272{
273 ASSERT(LEAF_NODE(node));
274 ASSERT(node->keys);
275 return node->value[0];
276}
277
278/** Get pointer to value with the biggest key within the node.
279 *
280 * Can be only used on leaf-level nodes.
281 *
282 * @param node B-tree node.
283 *
284 * @return Pointer to value assiciated with the biggest key.
285 */
286void *btree_node_max(btree_node_t *node)
287{
288 ASSERT(LEAF_NODE(node));
289 ASSERT(node->keys);
290 return node->value[node->keys - 1];
291}
292
293/** Initialize B-tree node.
294 *
295 * @param node B-tree node.
296 */
297void node_initialize(btree_node_t *node)
298{
299 int i;
300
301 node->keys = 0;
302
303 /* Clean also space for the extra key. */
304 for (i = 0; i < BTREE_MAX_KEYS + 1; i++) {
305 node->key[i] = 0;
306 node->value[i] = NULL;
307 node->subtree[i] = NULL;
308 }
309 node->subtree[i] = NULL;
310
311 node->parent = NULL;
312
313 link_initialize(&node->leaf_link);
314
315 link_initialize(&node->bfs_link);
316 node->depth = 0;
317}
318
319/** Insert key-value-lsubtree triplet into B-tree node.
320 *
321 * It is actually possible to have more keys than BTREE_MAX_KEYS.
322 * This feature is used during insert by right rotation.
323 *
324 * @param node B-tree node into wich the new key is to be inserted.
325 * @param key The key to be inserted.
326 * @param value Pointer to value to be inserted.
327 * @param lsubtree Pointer to the left subtree.
328 */
329void node_insert_key_left(btree_node_t *node, __native key, void *value, btree_node_t *lsubtree)
330{
331 int i;
332
333 for (i = 0; i < node->keys; i++) {
334 if (key < node->key[i]) {
335 int j;
336
337 for (j = node->keys; j > i; j--) {
338 node->key[j] = node->key[j - 1];
339 node->value[j] = node->value[j - 1];
340 node->subtree[j + 1] = node->subtree[j];
341 }
342 node->subtree[j + 1] = node->subtree[j];
343 break;
344 }
345 }
346 node->key[i] = key;
347 node->value[i] = value;
348 node->subtree[i] = lsubtree;
349
350 node->keys++;
351}
352
353
354/** Insert key-value-rsubtree triplet into B-tree node.
355 *
356 * It is actually possible to have more keys than BTREE_MAX_KEYS.
357 * This feature is used during splitting the node when the
358 * number of keys is BTREE_MAX_KEYS + 1. Insert by left rotation
359 * also makes use of this feature.
360 *
361 * @param node B-tree node into wich the new key is to be inserted.
362 * @param key The key to be inserted.
363 * @param value Pointer to value to be inserted.
364 * @param rsubtree Pointer to the right subtree.
365 */
366void node_insert_key_right(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree)
367{
368 int i;
369
370 for (i = 0; i < node->keys; i++) {
371 if (key < node->key[i]) {
372 int j;
373
374 for (j = node->keys; j > i; j--) {
375 node->key[j] = node->key[j - 1];
376 node->value[j] = node->value[j - 1];
377 node->subtree[j + 1] = node->subtree[j];
378 }
379 break;
380 }
381 }
382 node->key[i] = key;
383 node->value[i] = value;
384 node->subtree[i + 1] = rsubtree;
385
386 node->keys++;
387}
388
389/** Split full B-tree node and insert new key-value-right-subtree triplet.
390 *
391 * This function will split a node and return pointer to a newly created
392 * node containing keys greater than or equal to the greater of medians
393 * (or median) of the old keys and the newly added key. It will also write
394 * the median key to a memory address supplied by the caller.
395 *
396 * If the node being split is an index node, the median will not be
397 * included in the new node. If the node is a leaf node,
398 * the median will be copied there.
399 *
400 * @param node B-tree node wich is going to be split.
401 * @param key The key to be inserted.
402 * @param value Pointer to the value to be inserted.
403 * @param rsubtree Pointer to the right subtree of the key being added.
404 * @param median Address in memory, where the median key will be stored.
405 *
406 * @return Newly created right sibling of node.
407 */
408btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median)
409{
410 btree_node_t *rnode;
411 int i, j;
412
413 ASSERT(median);
414 ASSERT(node->keys == BTREE_MAX_KEYS);
415
416 /*
417 * Use the extra space to store the extra node.
418 */
419 node_insert_key_right(node, key, value, rsubtree);
420
421 /*
422 * Compute median of keys.
423 */
424 *median = MEDIAN_HIGH(node);
425
426 /*
427 * Allocate and initialize new right sibling.
428 */
429 rnode = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
430 node_initialize(rnode);
431 rnode->parent = node->parent;
432 rnode->depth = node->depth;
433
434 /*
435 * Copy big keys, values and subtree pointers to the new right sibling.
436 * If this is an index node, do not copy the median.
437 */
438 i = (int) INDEX_NODE(node);
439 for (i += MEDIAN_HIGH_INDEX(node), j = 0; i < node->keys; i++, j++) {
440 rnode->key[j] = node->key[i];
441 rnode->value[j] = node->value[i];
442 rnode->subtree[j] = node->subtree[i];
443
444 /*
445 * Fix parent links in subtrees.
446 */
447 if (rnode->subtree[j])
448 rnode->subtree[j]->parent = rnode;
449
450 }
451 rnode->subtree[j] = node->subtree[i];
452 if (rnode->subtree[j])
453 rnode->subtree[j]->parent = rnode;
454
455 rnode->keys = j; /* Set number of keys of the new node. */
456 node->keys /= 2; /* Shrink the old node. */
457
458 return rnode;
459}
460
461/** Remove key and its left subtree pointer from B-tree node.
462 *
463 * Remove the key and eliminate gaps in node->key array.
464 * Note that the value pointer and the left subtree pointer
465 * is removed from the node as well.
466 *
467 * @param node B-tree node.
468 * @param key Key to be removed.
469 */
470void node_remove_key_left(btree_node_t *node, __native key)
471{
472 int i, j;
473
474 for (i = 0; i < node->keys; i++) {
475 if (key == node->key[i]) {
476 for (j = i + 1; j < node->keys; j++) {
477 node->key[j - 1] = node->key[j];
478 node->value[j - 1] = node->value[j];
479 node->subtree[j - 1] = node->subtree[j];
480 }
481 node->subtree[j - 1] = node->subtree[j];
482 node->keys--;
483 return;
484 }
485 }
486 panic("node %P does not contain key %d\n", node, key);
487}
488
489/** Remove key and its right subtree pointer from B-tree node.
490 *
491 * Remove the key and eliminate gaps in node->key array.
492 * Note that the value pointer and the right subtree pointer
493 * is removed from the node as well.
494 *
495 * @param node B-tree node.
496 * @param key Key to be removed.
497 */
498void node_remove_key_right(btree_node_t *node, __native key)
499{
500 int i, j;
501
502 for (i = 0; i < node->keys; i++) {
503 if (key == node->key[i]) {
504 for (j = i + 1; j < node->keys; j++) {
505 node->key[j - 1] = node->key[j];
506 node->value[j - 1] = node->value[j];
507 node->subtree[j] = node->subtree[j + 1];
508 }
509 node->keys--;
510 return;
511 }
512 }
513 panic("node %P does not contain key %d\n", node, key);
514}
515
516/** Find key by its left or right subtree.
517 *
518 * @param node B-tree node.
519 * @param subtree Left or right subtree of a key found in node.
520 * @param right If true, subtree is a right subtree. If false, subtree is a left subtree.
521 *
522 * @return Index of the key associated with the subtree.
523 */
524index_t find_key_by_subtree(btree_node_t *node, btree_node_t *subtree, bool right)
525{
526 int i;
527
528 for (i = 0; i < node->keys + 1; i++) {
529 if (subtree == node->subtree[i])
530 return i - (int) (right != false);
531 }
532 panic("node %P does not contain subtree %P\n", node, subtree);
533}
534
535/** Insert key-value-rsubtree triplet and rotate the node to the left, if this operation can be done.
536 *
537 * Left sibling of the node (if it exists) is checked for free space.
538 * If there is free space, the key is inserted and the smallest key of
539 * the node is moved there. The index node which is the parent of both
540 * nodes is fixed.
541 *
542 * @param node B-tree node.
543 * @param inskey Key to be inserted.
544 * @param insvalue Value to be inserted.
545 * @param rsubtree Right subtree of inskey.
546 *
547 * @return True if the rotation was performed, false otherwise.
548 */
549bool try_insert_by_left_rotation(btree_node_t *node, __native inskey, void *insvalue, btree_node_t *rsubtree)
550{
551 index_t idx;
552 btree_node_t *lnode;
553
554 /*
555 * If this is root node, the rotation can not be done.
556 */
557 if (ROOT_NODE(node))
558 return false;
559
560 idx = find_key_by_subtree(node->parent, node, true);
561 if ((int) idx == -1) {
562 /*
563 * If this node is the leftmost subtree of its parent,
564 * the rotation can not be done.
565 */
566 return false;
567 }
568
569 lnode = node->parent->subtree[idx];
570
571 if (lnode->keys < BTREE_MAX_KEYS) {
572 __native key;
573
574 /*
575 * The rotaion can be done. The left sibling has free space.
576 */
577
578 node_insert_key_right(node, inskey, insvalue, rsubtree);
579 key = node->key[0];
580
581 if (LEAF_NODE(node)) {
582 void *value;
583
584 value = node->value[0];
585 node_remove_key_left(node, key);
586 node_insert_key_right(lnode, key, value, NULL);
587 node->parent->key[idx] = node->key[0];
588 } else {
589 btree_node_t *lsubtree;
590
591 lsubtree = node->subtree[0];
592 node_remove_key_left(node, key);
593 node_insert_key_right(lnode, node->parent->key[idx], NULL, lsubtree);
594 node->parent->key[idx] = key;
595
596 /* Fix parent link of the reconnected left subtree. */
597 lsubtree->parent = lnode;
598 }
599 return true;
600 }
601
602 return false;
603}
604
605/** Insert key-value-rsubtree triplet and rotate the node to the right, if this operation can be done.
606 *
607 * Right sibling of the node (if it exists) is checked for free space.
608 * If there is free space, the key is inserted and the biggest key of
609 * the node is moved there. The index node which is the parent of both
610 * nodes is fixed.
611 *
612 * @param node B-tree node.
613 * @param inskey Key to be inserted.
614 * @param insvalue Value to be inserted.
615 * @param rsubtree Right subtree of inskey.
616 *
617 * @return True if the rotation was performed, false otherwise.
618 */
619bool try_insert_by_right_rotation(btree_node_t *node, __native inskey, void *insvalue, btree_node_t *rsubtree)
620{
621 index_t idx;
622 btree_node_t *rnode;
623
624 /*
625 * If this is root node, the rotation can not be done.
626 */
627 if (ROOT_NODE(node))
628 return false;
629
630 idx = find_key_by_subtree(node->parent, node, false);
631 if (idx == node->parent->keys) {
632 /*
633 * If this node is the rightmost subtree of its parent,
634 * the rotation can not be done.
635 */
636 return false;
637 }
638
639 rnode = node->parent->subtree[idx + 1];
640
641 if (rnode->keys < BTREE_MAX_KEYS) {
642 __native key;
643
644 /*
645 * The rotaion can be done. The right sibling has free space.
646 */
647
648 node_insert_key_right(node, inskey, insvalue, rsubtree);
649 key = node->key[node->keys - 1];
650
651 if (LEAF_NODE(node)) {
652 void *value;
653
654 value = node->value[node->keys - 1];
655 node_remove_key_right(node, key);
656 node_insert_key_left(rnode, key, value, NULL);
657 node->parent->key[idx] = key;
658 } else {
659 btree_node_t *rsubt;
660
661 rsubt = node->subtree[node->keys];
662 node_remove_key_right(node, key);
663 node_insert_key_left(rnode, node->parent->key[idx], NULL, rsubt);
664 node->parent->key[idx] = key;
665
666 /* Fix parent link of the reconnected right subtree. */
667 rsubt->parent = rnode;
668 }
669 return true;
670 }
671
672 return false;
673}
674
675/** Print B-tree.
676 *
677 * @param t Print out B-tree.
678 */
679void btree_print(btree_t *t)
680{
681 int i, depth = t->root->depth;
682 link_t head;
683
684 list_initialize(&head);
685 list_append(&t->root->bfs_link, &head);
686
687 /*
688 * Use BFS search to print out the tree.
689 * Levels are distinguished from one another by node->depth.
690 */
691 while (!list_empty(&head)) {
692 link_t *hlp;
693 btree_node_t *node;
694
695 hlp = head.next;
696 ASSERT(hlp != &head);
697 node = list_get_instance(hlp, btree_node_t, bfs_link);
698 list_remove(hlp);
699
700 ASSERT(node);
701
702 if (node->depth != depth) {
703 printf("\n");
704 depth = node->depth;
705 }
706
707 printf("(");
708 for (i = 0; i < node->keys; i++) {
709 printf("%d,", node->key[i]);
710 if (node->depth && node->subtree[i]) {
711 list_append(&node->subtree[i]->bfs_link, &head);
712 }
713 }
714 if (node->depth && node->subtree[i]) {
715 list_append(&node->subtree[i]->bfs_link, &head);
716 }
717 printf(")");
718 }
719 printf("\n");
720}
Note: See TracBrowser for help on using the repository browser.