[018d957e] | 1 | /*
|
---|
| 2 | * Copyright (C) 2006 Jakub Jermar
|
---|
| 3 | * All rights reserved.
|
---|
| 4 | *
|
---|
| 5 | * Redistribution and use in source and binary forms, with or without
|
---|
| 6 | * modification, are permitted provided that the following conditions
|
---|
| 7 | * are met:
|
---|
| 8 | *
|
---|
| 9 | * - Redistributions of source code must retain the above copyright
|
---|
| 10 | * notice, this list of conditions and the following disclaimer.
|
---|
| 11 | * - Redistributions in binary form must reproduce the above copyright
|
---|
| 12 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 13 | * documentation and/or other materials provided with the distribution.
|
---|
| 14 | * - The name of the author may not be used to endorse or promote products
|
---|
| 15 | * derived from this software without specific prior written permission.
|
---|
| 16 | *
|
---|
| 17 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
---|
| 18 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
---|
| 19 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
---|
| 20 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
---|
| 21 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
---|
| 22 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
| 23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
| 24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
| 25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
---|
| 26 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
| 27 | */
|
---|
| 28 |
|
---|
| 29 | /*
|
---|
| 30 | * This B-tree has the following properties:
|
---|
| 31 | * - it is a ballanced 2-3-4 tree (i.e. M = 4)
|
---|
| 32 | * - values (i.e. pointers to values) are stored only in leaves
|
---|
| 33 | * - leaves are linked in a list
|
---|
| 34 | * - technically, it is a B+-tree (because of the previous properties)
|
---|
| 35 | *
|
---|
| 36 | * Some of the functions below take pointer to the right-hand
|
---|
| 37 | * side subtree pointer as parameter. Note that this is sufficient
|
---|
| 38 | * because:
|
---|
| 39 | * - New root node is passed the left-hand side subtree pointer
|
---|
| 40 | * directly.
|
---|
| 41 | * - node_split() always creates the right sibling and preserves
|
---|
| 42 | * the original node (which becomes the left sibling).
|
---|
| 43 | * There is always pointer to the left-hand side subtree
|
---|
| 44 | * (i.e. left sibling) in the parent node.
|
---|
| 45 | */
|
---|
| 46 |
|
---|
| 47 | #include <adt/btree.h>
|
---|
| 48 | #include <adt/list.h>
|
---|
| 49 | #include <mm/slab.h>
|
---|
| 50 | #include <debug.h>
|
---|
| 51 | #include <panic.h>
|
---|
| 52 | #include <typedefs.h>
|
---|
| 53 | #include <print.h>
|
---|
| 54 |
|
---|
| 55 | static void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node);
|
---|
| 56 | static void node_initialize(btree_node_t *node);
|
---|
| 57 | static void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree);
|
---|
| 58 | static btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median);
|
---|
| 59 |
|
---|
| 60 | #define ROOT_NODE(n) (!(n)->parent)
|
---|
| 61 | #define INDEX_NODE(n) ((n)->subtree[0] != NULL)
|
---|
| 62 | #define LEAF_NODE(n) ((n)->subtree[0] == NULL)
|
---|
| 63 |
|
---|
| 64 | #define MEDIAN_LOW_INDEX(n) (((n)->keys-1)/2)
|
---|
| 65 | #define MEDIAN_HIGH_INDEX(n) ((n)->keys/2)
|
---|
| 66 | #define MEDIAN_LOW(n) ((n)->key[MEDIAN_LOW_INDEX((n))]);
|
---|
| 67 | #define MEDIAN_HIGH(n) ((n)->key[MEDIAN_HIGH_INDEX((n))]);
|
---|
| 68 |
|
---|
| 69 | /** Create empty B-tree.
|
---|
| 70 | *
|
---|
| 71 | * @param t B-tree.
|
---|
| 72 | */
|
---|
| 73 | void btree_create(btree_t *t)
|
---|
| 74 | {
|
---|
| 75 | list_initialize(&t->leaf_head);
|
---|
| 76 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
|
---|
| 77 | node_initialize(t->root);
|
---|
| 78 | list_append(&t->root->leaf_link, &t->leaf_head);
|
---|
| 79 | }
|
---|
| 80 |
|
---|
| 81 | /** Destroy empty B-tree. */
|
---|
| 82 | void btree_destroy(btree_t *t)
|
---|
| 83 | {
|
---|
| 84 | ASSERT(!t->root->keys);
|
---|
| 85 | free(t->root);
|
---|
| 86 | }
|
---|
| 87 |
|
---|
| 88 | /** Insert key-value pair into B-tree.
|
---|
| 89 | *
|
---|
| 90 | * @param t B-tree.
|
---|
| 91 | * @param key Key to be inserted.
|
---|
| 92 | * @param value Value to be inserted.
|
---|
| 93 | * @param leaf_node Leaf node where the insertion should begin.
|
---|
| 94 | */
|
---|
| 95 | void btree_insert(btree_t *t, __native key, void *value, btree_node_t *leaf_node)
|
---|
| 96 | {
|
---|
| 97 | btree_node_t *lnode;
|
---|
| 98 |
|
---|
| 99 | ASSERT(value);
|
---|
| 100 |
|
---|
| 101 | lnode = leaf_node;
|
---|
| 102 | if (!lnode) {
|
---|
| 103 | if (btree_search(t, key, &lnode)) {
|
---|
| 104 | panic("B-tree %P already contains key %d\n", t, key);
|
---|
| 105 | }
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 | _btree_insert(t, key, value, NULL, lnode);
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | /** Recursively insert into B-tree.
|
---|
| 112 | *
|
---|
| 113 | * @param t B-tree.
|
---|
| 114 | * @param key Key to be inserted.
|
---|
| 115 | * @param value Value to be inserted.
|
---|
| 116 | * @param rsubtree Right subtree of the inserted key.
|
---|
| 117 | * @param node Start inserting into this node.
|
---|
| 118 | */
|
---|
| 119 | void _btree_insert(btree_t *t, __native key, void *value, btree_node_t *rsubtree, btree_node_t *node)
|
---|
| 120 | {
|
---|
| 121 | if (node->keys < BTREE_MAX_KEYS) {
|
---|
| 122 | /*
|
---|
| 123 | * Node conatins enough space, the key can be stored immediately.
|
---|
| 124 | */
|
---|
| 125 | node_insert_key(node, key, value, rsubtree);
|
---|
| 126 | } else {
|
---|
| 127 | btree_node_t *rnode;
|
---|
| 128 | __native median;
|
---|
| 129 |
|
---|
| 130 | /*
|
---|
| 131 | * Node is full.
|
---|
| 132 | * Split it and insert the smallest key from the node containing
|
---|
| 133 | * bigger keys (i.e. the original node) into its parent.
|
---|
| 134 | */
|
---|
| 135 |
|
---|
| 136 | rnode = node_split(node, key, value, rsubtree, &median);
|
---|
| 137 |
|
---|
| 138 | if (LEAF_NODE(node)) {
|
---|
| 139 | list_append(&rnode->leaf_link, &node->leaf_link);
|
---|
| 140 | }
|
---|
| 141 |
|
---|
| 142 | if (ROOT_NODE(node)) {
|
---|
| 143 | /*
|
---|
| 144 | * We split the root node. Create new root.
|
---|
| 145 | */
|
---|
| 146 |
|
---|
| 147 | t->root = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
|
---|
| 148 | node->parent = t->root;
|
---|
| 149 | rnode->parent = t->root;
|
---|
| 150 | node_initialize(t->root);
|
---|
| 151 |
|
---|
| 152 | /*
|
---|
| 153 | * Left-hand side subtree will be the old root (i.e. node).
|
---|
| 154 | * Right-hand side subtree will be rnode.
|
---|
| 155 | */
|
---|
| 156 | t->root->subtree[0] = node;
|
---|
| 157 |
|
---|
| 158 | t->root->depth = node->depth + 1;
|
---|
| 159 | }
|
---|
| 160 | _btree_insert(t, median, NULL, rnode, node->parent);
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | /* TODO */
|
---|
| 166 | void btree_remove(btree_t *t, __native key)
|
---|
| 167 | {
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | /** Search key in a B-tree.
|
---|
| 171 | *
|
---|
| 172 | * @param t B-tree.
|
---|
| 173 | * @param key Key to be searched.
|
---|
| 174 | * @param leaf_node Address where to put pointer to visited leaf node.
|
---|
| 175 | *
|
---|
| 176 | * @return Pointer to value or NULL if there is no such key.
|
---|
| 177 | */
|
---|
| 178 | void *btree_search(btree_t *t, __native key, btree_node_t **leaf_node)
|
---|
| 179 | {
|
---|
| 180 | btree_node_t *cur, *next;
|
---|
| 181 | void *val = NULL;
|
---|
| 182 |
|
---|
| 183 | /*
|
---|
| 184 | * Iteratively descend to the leaf that can contain searched key.
|
---|
| 185 | */
|
---|
| 186 | for (cur = t->root; cur; cur = next) {
|
---|
| 187 | int i;
|
---|
| 188 |
|
---|
| 189 | /* Last iteration will set this with proper leaf node address. */
|
---|
| 190 | *leaf_node = cur;
|
---|
| 191 | for (i = 0; i < cur->keys; i++) {
|
---|
| 192 | if (key <= cur->key[i]) {
|
---|
| 193 | val = cur->value[i];
|
---|
| 194 | next = cur->subtree[i];
|
---|
| 195 |
|
---|
| 196 | /*
|
---|
| 197 | * Check if there is anywhere to descend.
|
---|
| 198 | */
|
---|
| 199 | if (!next) {
|
---|
| 200 | /*
|
---|
| 201 | * Leaf-level.
|
---|
| 202 | */
|
---|
| 203 | return (key == cur->key[i]) ? val : NULL;
|
---|
| 204 | }
|
---|
| 205 | goto descend;
|
---|
| 206 | }
|
---|
| 207 | }
|
---|
| 208 | next = cur->subtree[i];
|
---|
| 209 | descend:
|
---|
| 210 | ;
|
---|
| 211 | }
|
---|
| 212 |
|
---|
| 213 | /*
|
---|
| 214 | * The key was not found in the *leaf_node and is greater than any of its keys.
|
---|
| 215 | */
|
---|
| 216 | return NULL;
|
---|
| 217 | }
|
---|
| 218 |
|
---|
| 219 | /** Get pointer to value with the smallest key within the node.
|
---|
| 220 | *
|
---|
| 221 | * Can be only used on leaf-level nodes.
|
---|
| 222 | *
|
---|
| 223 | * @param node B-tree node.
|
---|
| 224 | *
|
---|
| 225 | * @return Pointer to value assiciated with the smallest key.
|
---|
| 226 | */
|
---|
| 227 | void *btree_node_min(btree_node_t *node)
|
---|
| 228 | {
|
---|
| 229 | ASSERT(LEAF_NODE(node));
|
---|
| 230 | ASSERT(node->keys);
|
---|
| 231 | return node->value[0];
|
---|
| 232 | }
|
---|
| 233 |
|
---|
| 234 | /** Get pointer to value with the biggest key within the node.
|
---|
| 235 | *
|
---|
| 236 | * Can be only used on leaf-level nodes.
|
---|
| 237 | *
|
---|
| 238 | * @param node B-tree node.
|
---|
| 239 | *
|
---|
| 240 | * @return Pointer to value assiciated with the biggest key.
|
---|
| 241 | */
|
---|
| 242 | void *btree_node_max(btree_node_t *node)
|
---|
| 243 | {
|
---|
| 244 | ASSERT(LEAF_NODE(node));
|
---|
| 245 | ASSERT(node->keys);
|
---|
| 246 | return node->value[node->keys - 1];
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | /** Initialize B-tree node.
|
---|
| 250 | *
|
---|
| 251 | * @param node B-tree node.
|
---|
| 252 | */
|
---|
| 253 | void node_initialize(btree_node_t *node)
|
---|
| 254 | {
|
---|
| 255 | int i;
|
---|
| 256 |
|
---|
| 257 | node->keys = 0;
|
---|
| 258 |
|
---|
| 259 | /* Clean also space for the extra key. */
|
---|
| 260 | for (i = 0; i < BTREE_MAX_KEYS + 1; i++) {
|
---|
| 261 | node->key[i] = 0;
|
---|
| 262 | node->value[i] = NULL;
|
---|
| 263 | node->subtree[i] = NULL;
|
---|
| 264 | }
|
---|
| 265 | node->subtree[i] = NULL;
|
---|
| 266 |
|
---|
| 267 | node->parent = NULL;
|
---|
| 268 |
|
---|
| 269 | link_initialize(&node->leaf_link);
|
---|
| 270 |
|
---|
| 271 | link_initialize(&node->bfs_link);
|
---|
| 272 | node->depth = 0;
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | /** Insert key-value-left-subtree triplet into B-tree non-full node.
|
---|
| 276 | *
|
---|
| 277 | * It is actually possible to have more keys than BTREE_MAX_KEYS.
|
---|
| 278 | * This feature is used during splitting the node when the
|
---|
| 279 | * number of keys is BTREE_MAX_KEYS + 1.
|
---|
| 280 | *
|
---|
| 281 | * @param node B-tree node into wich the new key is to be inserted.
|
---|
| 282 | * @param key The key to be inserted.
|
---|
| 283 | * @param value Pointer to value to be inserted.
|
---|
| 284 | * @param rsubtree Pointer to the right subtree.
|
---|
| 285 | */
|
---|
| 286 | void node_insert_key(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree)
|
---|
| 287 | {
|
---|
| 288 | int i;
|
---|
| 289 |
|
---|
| 290 | for (i = 0; i < node->keys; i++) {
|
---|
| 291 | if (key < node->key[i]) {
|
---|
| 292 | int j;
|
---|
| 293 |
|
---|
| 294 | for (j = node->keys; j > i; j--) {
|
---|
| 295 | node->key[j] = node->key[j - 1];
|
---|
| 296 | node->value[j] = node->value[j - 1];
|
---|
| 297 | node->subtree[j + 1] = node->subtree[j];
|
---|
| 298 | }
|
---|
| 299 | break;
|
---|
| 300 | }
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | node->key[i] = key;
|
---|
| 304 | node->value[i] = value;
|
---|
| 305 | node->subtree[i + 1] = rsubtree;
|
---|
| 306 |
|
---|
| 307 | node->keys++;
|
---|
| 308 | }
|
---|
| 309 |
|
---|
| 310 | /** Split full B-tree node and insert new key-value-left-subtree triplet.
|
---|
| 311 | *
|
---|
| 312 | * This function will split a node and return pointer to a newly created
|
---|
| 313 | * node containing keys greater than the lesser of medians (or median)
|
---|
| 314 | * of the old keys and the newly added key. It will also write the
|
---|
| 315 | * median key to a memory address supplied by the caller.
|
---|
| 316 | *
|
---|
| 317 | * If the node being split is an index node, the median will be
|
---|
| 318 | * removed from the original node. If the node is a leaf node,
|
---|
| 319 | * the median will be preserved.
|
---|
| 320 | *
|
---|
| 321 | * @param node B-tree node wich is going to be split.
|
---|
| 322 | * @param key The key to be inserted.
|
---|
| 323 | * @param value Pointer to the value to be inserted.
|
---|
| 324 | * @param rsubtree Pointer to the right subtree of the key being added.
|
---|
| 325 | * @param median Address in memory, where the median key will be stored.
|
---|
| 326 | *
|
---|
| 327 | * @return Newly created right sibling of node.
|
---|
| 328 | */
|
---|
| 329 | btree_node_t *node_split(btree_node_t *node, __native key, void *value, btree_node_t *rsubtree, __native *median)
|
---|
| 330 | {
|
---|
| 331 | btree_node_t *rnode;
|
---|
| 332 | int i, j;
|
---|
| 333 |
|
---|
| 334 | ASSERT(median);
|
---|
| 335 | ASSERT(node->keys == BTREE_MAX_KEYS);
|
---|
| 336 |
|
---|
| 337 | /*
|
---|
| 338 | * Use the extra space to store the extra node.
|
---|
| 339 | */
|
---|
| 340 | node_insert_key(node, key, value, rsubtree);
|
---|
| 341 |
|
---|
| 342 | /*
|
---|
| 343 | * Compute median of keys.
|
---|
| 344 | */
|
---|
| 345 | *median = MEDIAN_LOW(node);
|
---|
| 346 |
|
---|
| 347 | rnode = (btree_node_t *) malloc(sizeof(btree_node_t), 0);
|
---|
| 348 | node_initialize(rnode);
|
---|
| 349 | rnode->parent = node->parent;
|
---|
| 350 | rnode->depth = node->depth;
|
---|
| 351 |
|
---|
| 352 | /*
|
---|
| 353 | * Copy big keys, values and subtree pointers to the new right sibling.
|
---|
| 354 | */
|
---|
| 355 | for (i = MEDIAN_LOW_INDEX(node) + 1, j = 0; i < node->keys; i++, j++) {
|
---|
| 356 | rnode->key[j] = node->key[i];
|
---|
| 357 | rnode->value[j] = node->value[i];
|
---|
| 358 | rnode->subtree[j] = node->subtree[i];
|
---|
| 359 |
|
---|
| 360 | /*
|
---|
| 361 | * Fix parent links in subtrees.
|
---|
| 362 | */
|
---|
| 363 | if (rnode->subtree[j])
|
---|
| 364 | rnode->subtree[j]->parent = rnode;
|
---|
| 365 |
|
---|
| 366 | }
|
---|
| 367 | rnode->subtree[j] = node->subtree[i];
|
---|
| 368 | if (rnode->subtree[j])
|
---|
| 369 | rnode->subtree[j]->parent = rnode;
|
---|
| 370 | rnode->keys = j;
|
---|
| 371 |
|
---|
| 372 | /*
|
---|
| 373 | * Shrink the old node.
|
---|
| 374 | * If this is an index node, remove the median.
|
---|
| 375 | */
|
---|
| 376 | node->keys = MEDIAN_LOW_INDEX(node) + 1;
|
---|
| 377 | if (INDEX_NODE(node))
|
---|
| 378 | node->keys--;
|
---|
| 379 |
|
---|
| 380 | return rnode;
|
---|
| 381 | }
|
---|
| 382 |
|
---|
| 383 | /** Print B-tree.
|
---|
| 384 | *
|
---|
| 385 | * @param t Print out B-tree.
|
---|
| 386 | */
|
---|
| 387 | void btree_print(btree_t *t)
|
---|
| 388 | {
|
---|
| 389 | int i, depth = t->root->depth;
|
---|
| 390 | link_t head;
|
---|
| 391 |
|
---|
| 392 | list_initialize(&head);
|
---|
| 393 | list_append(&t->root->bfs_link, &head);
|
---|
| 394 |
|
---|
| 395 | /*
|
---|
| 396 | * Use BFS search to print out the tree.
|
---|
| 397 | * Levels are distinguished from one another by node->depth.
|
---|
| 398 | */
|
---|
| 399 | while (!list_empty(&head)) {
|
---|
| 400 | link_t *hlp;
|
---|
| 401 | btree_node_t *node;
|
---|
| 402 |
|
---|
| 403 | hlp = head.next;
|
---|
| 404 | ASSERT(hlp != &head);
|
---|
| 405 | node = list_get_instance(hlp, btree_node_t, bfs_link);
|
---|
| 406 | list_remove(hlp);
|
---|
| 407 |
|
---|
| 408 | ASSERT(node);
|
---|
| 409 |
|
---|
| 410 | if (node->depth != depth) {
|
---|
| 411 | printf("\n");
|
---|
| 412 | depth = node->depth;
|
---|
| 413 | }
|
---|
| 414 |
|
---|
| 415 | printf("(");
|
---|
| 416 | for (i = 0; i < node->keys; i++) {
|
---|
| 417 | printf("%d,", node->key[i]);
|
---|
| 418 | if (node->depth && node->subtree[i]) {
|
---|
| 419 | list_append(&node->subtree[i]->bfs_link, &head);
|
---|
| 420 | }
|
---|
| 421 | }
|
---|
| 422 | if (node->depth && node->subtree[i]) {
|
---|
| 423 | list_append(&node->subtree[i]->bfs_link, &head);
|
---|
| 424 | }
|
---|
| 425 | printf(")");
|
---|
| 426 | }
|
---|
| 427 | printf("\n");
|
---|
| 428 | }
|
---|