diff --git a/sandbox/btree/btree.c b/sandbox/btree/btree.c index 12de68a..f57c8b7 100644 --- a/sandbox/btree/btree.c +++ b/sandbox/btree/btree.c @@ -1,3 +1,62 @@ +/* + The Clear BSD License + + Copyright (c) 2023 Max Wash + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted (subject to the limitations in the disclaimer + below) provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + */ + +/* templated AVL binary tree implementation + + this file implements an extensible AVL binary tree data structure. + + the primary rule of an AVL binary tree is that for a given node N, + the heights of N's left and right subtrees can differ by at most 1. + + the height of a subtree is the length of the longest path between + the root of the subtree and a leaf node, including the root node itself. + + the height of a leaf node is 1. + + when a node is inserted into or deleted from the tree, this rule may + be broken, in which the tree must be rotated to restore the balance. + + no more than one rotation is required for any insert operations, + while multiple rotations may be required for a delete operation. + + there are four types of rotations that can be applied to a tree: + - left rotation + - right rotation + - double left rotations + - double right rotations + + by enforcing the balance rule, for a tree with n nodes, the worst-case + performance for insert, delete, and search operations is guaranteed + to be O(log n). + + this file intentionally excludes any kind of search function implementation. + it is up to the programmer to implement their own tree node type + using btree_node_t, and their own search function using btree_t. + this allows the programmer to define their own node types with complex + non-integer key types. btree.h contains a number of macros to help + define these functions. the macros do all the work, you just have to + provide a comparator function. +*/ + #include #include #include @@ -43,6 +102,32 @@ static inline int bf(btree_node_t *x) return bf; } +/* perform a left rotation on a subtree + + if you have a tree like this: + + Z + / \ + X . + / \ + . Y + / \ + . . + + and you perform a left rotation on node X, + you will get the following tree: + + Z + / \ + Y . + / \ + X . + / \ + . . + + note that this function does NOT update b_height for the rotated + nodes. it is up to you to call update_height_to_root(). +*/ static void rotate_left(btree_t *tree, btree_node_t *x) { assert(x != NULL); @@ -84,6 +169,32 @@ static void update_height_to_root(btree_node_t *x) } } +/* perform a right rotation on a subtree + + if you have a tree like this: + + Z + / \ + . X + / \ + Y . + / \ + . . + + and you perform a right rotation on node X, + you will get the following tree: + + Z + / \ + . Y + / \ + . X + / \ + . . + + note that this function does NOT update b_height for the rotated + nodes. it is up to you to call update_height_to_root(). +*/ static void rotate_right(btree_t *tree, btree_node_t *y) { assert(y); @@ -117,6 +228,33 @@ static void rotate_right(btree_t *tree, btree_node_t *y) x->b_parent = p; } +/* for a given node Z, perform a right rotation on Z's right child, + followed by a left rotation on Z itself. + + if you have a tree like this: + + Z + / \ + . X + / \ + Y . + / \ + . . + + and you perform a double-left rotation on node Z, + you will get the following tree: + + Y + / \ + / \ + Z X + / \ / \ + . . . . + + note that, unlike rotate_left and rotate_right, this function + DOES update b_height for the rotated nodes (since it needs to be + done in a certain order). +*/ static void rotate_double_left(btree_t *tree, btree_node_t *z) { btree_node_t *x = z->b_right; @@ -134,6 +272,33 @@ static void rotate_double_left(btree_t *tree, btree_node_t *z) } } +/* for a given node Z, perform a left rotation on Z's left child, + followed by a right rotation on Z itself. + + if you have a tree like this: + + Z + / \ + X . + / \ + . Y + / \ + . . + + and you perform a double-right rotation on node Z, + you will get the following tree: + + Y + / \ + / \ + X Z + / \ / \ + . . . . + + note that, unlike rotate_left and rotate_right, this function + DOES update b_height for the rotated nodes (since it needs to be + done in a certain order). +*/ static void rotate_double_right(btree_t *tree, btree_node_t *z) { btree_node_t *x = z->b_left; @@ -151,9 +316,20 @@ static void rotate_double_right(btree_t *tree, btree_node_t *z) } } +/* run after an insert operation. checks that the balance factor + of the local subtree is within the range -1 <= BF <= 1. if it + is not, rotate the subtree to restore balance. + + note that at most one rotation should be required after a node + is inserted into the tree. + + this function depends on all nodes in the tree having + correct b_height values. + + @param w the node that was just inserted into the tree +*/ static void insert_fixup(btree_t *tree, btree_node_t *w) { - int nr_rotations = 0; btree_node_t *z = NULL, *y = NULL, *x = NULL; z = w; @@ -181,7 +357,6 @@ static void insert_fixup(btree_t *tree, btree_node_t *w) update_height_to_root(z); } } - nr_rotations++; next_ancestor: x = y; @@ -190,19 +365,35 @@ next_ancestor: } } +/* run after a delete operation. checks that the balance factor + of the local subtree is within the range -1 <= BF <= 1. if it + is not, rotate the subtree to restore balance. + + note that, unlike insert_fixup, multiple rotations may be required + to restore balance after a node is deleted. + + this function depends on all nodes in the tree having + correct b_height values. + + @param w one of the following: + - the parent of the node that was deleted if the node + had no children. + - the parent of the node that replaced the deleted node + if the deleted node had two children. + - the node that replaced the node that was deleted, if + the node that was deleted had one child. +*/ static void delete_fixup(btree_t *tree, btree_node_t *w) { btree_node_t *z = w; - int nr_rotations = 0; - while (z) { if (bf(z) > 1) { if (bf(z->b_right) >= 0) { rotate_left(tree, z); update_height_to_root(z); } else { - rotate_double_left(tree, z); // <== + rotate_double_left(tree, z); } } else if (bf(z) < -1) { if (bf(z->b_left) <= 0) { @@ -214,10 +405,14 @@ static void delete_fixup(btree_t *tree, btree_node_t *w) } z = z->b_parent; - nr_rotations++; } } +/* updates b_height for all nodes between the inserted node and the root + of the tree, and calls insert_fixup. + + @param node the node that was just inserted into the tree. +*/ void btree_insert_fixup(btree_t *tree, btree_node_t *node) { node->b_height = 0; @@ -231,6 +426,15 @@ void btree_insert_fixup(btree_t *tree, btree_node_t *node) insert_fixup(tree, node); } +/* remove a node from a tree. + + this function assumes that `node` has no children, and therefore + doesn't need to be replaced. + + updates b_height for all nodes between `node` and the tree root. + + @param node the node to delete. +*/ static btree_node_t *remove_node_with_no_children(btree_t *tree, btree_node_t *node) { btree_node_t *w = node->b_parent; @@ -253,6 +457,16 @@ static btree_node_t *remove_node_with_no_children(btree_t *tree, btree_node_t *n return w; } +/* remove a node from a tree. + + this function assumes that `node` has one child. + the child of `node` is inherited by `node`'s parent, and `node` is removed. + + updates b_height for all nodes between the node that replaced + `node` and the tree root. + + @param node the node to delete. +*/ static btree_node_t *replace_node_with_one_subtree(btree_t *tree, btree_node_t *node) { btree_node_t *p = node->b_parent; @@ -286,6 +500,20 @@ static btree_node_t *replace_node_with_one_subtree(btree_t *tree, btree_node_t * return w; } +/* remove a node from a tree. + + this function assumes that `node` has two children. + find the in-order successor Y of `node` (the largest node in `node`'s left sub-tree), + removes `node` from the tree and moves Y to where `node` used to be. + + if Y has a child (it will never have more than one), have Y's parent inherit + Y's child. + + updates b_height for all nodes between the deepest node that was modified + and the tree root. + + @param z the node to delete. +*/ static btree_node_t *replace_node_with_two_subtrees(btree_t *tree, btree_node_t *z) { /* x will replace z */ @@ -349,6 +577,7 @@ static btree_node_t *replace_node_with_two_subtrees(btree_t *tree, btree_node_t return w; } +/* delete a node from the tree and re-balance it afterwards */ void btree_delete(btree_t *tree, btree_node_t *node) { btree_node_t *w = NULL; @@ -370,6 +599,8 @@ void btree_delete(btree_t *tree, btree_node_t *node) btree_node_t *btree_first(btree_t *tree) { + /* the first node in the tree is the node with the smallest key. + we keep moving left until we can't go any further */ btree_node_t *cur = tree->b_root; if (!cur) { return NULL; @@ -384,6 +615,8 @@ btree_node_t *btree_first(btree_t *tree) btree_node_t *btree_last(btree_t *tree) { + /* the first node in the tree is the node with the largest key. + we keep moving right until we can't go any further */ btree_node_t *cur = tree->b_root; if (!cur) { return NULL; @@ -402,7 +635,18 @@ btree_node_t *btree_next(btree_node_t *node) return NULL; } + /* there are two possibilities for the next node: + + 1. if `node` has a right sub-tree, every node in this sub-tree is bigger + than node. the in-order successor of `node` is the smallest node in + this subtree. + 2. if `node` has no right sub-tree, we've reached the largest node in + the sub-tree rooted at `node`. we need to go back to our parent + and continue the search elsewhere. + */ if (node->b_right) { + /* case 1: step into `node`'s right sub-tree and keep going + left to find the smallest node */ btree_node_t *cur = node->b_right; while (cur->b_left) { cur = cur->b_left; @@ -411,6 +655,10 @@ btree_node_t *btree_next(btree_node_t *node) return cur; } + /* case 2: keep stepping back up towards the root of the tree. + if we encounter a step where we are our parent's left child, + we've found a parent with a value larger than us. this parent + is the in-order successor of `node` */ while (node->b_parent && node->b_parent->b_left != node) { node = node->b_parent; } @@ -424,7 +672,18 @@ btree_node_t *btree_prev(btree_node_t *node) return NULL; } + /* there are two possibilities for the previous node: + + 1. if `node` has a left sub-tree, every node in this sub-tree is smaller + than `node`. the in-order predecessor of `node` is the largest node in + this subtree. + 2. if `node` has no left sub-tree, we've reached the smallest node in + the sub-tree rooted at `node`. we need to go back to our parent + and continue the search elsewhere. + */ if (node->b_left) { + /* case 1: step into `node`'s left sub-tree and keep going + right to find the largest node */ btree_node_t *cur = node->b_left; while (cur->b_right) { cur = cur->b_right; @@ -433,6 +692,10 @@ btree_node_t *btree_prev(btree_node_t *node) return cur; } + /* case 2: keep stepping back up towards the root of the tree. + if we encounter a step where we are our parent's right child, + we've found a parent with a value smaller than us. this parent + is the in-order predecessor of `node`. */ while (node->b_parent && node->b_parent->b_right != node) { node = node->b_parent; } diff --git a/sandbox/btree/include/socks/btree.h b/sandbox/btree/include/socks/btree.h index d5e1970..4767e84 100644 --- a/sandbox/btree/include/socks/btree.h +++ b/sandbox/btree/include/socks/btree.h @@ -1,10 +1,63 @@ +/* + The Clear BSD License + + Copyright (c) 2023 Max Wash + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted (subject to the limitations in the disclaimer + below) provided that the following conditions are met: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + - Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + */ + #ifndef SOCKS_BTREE_H_ #define SOCKS_BTREE_H_ #include +/* if your custom structure contains a btree_node_t (i.e. it can be part of a btree), + you can use this macro to convert a btree_node_t* to a your_type* + + @param t the name of your custom type (something that can be passed to offsetof) + @param m the name of the btree_node_t member variable within your custom type. + @param v the btree_node_t pointer that you wish to convert. if this is NULL, NULL will be returned. +*/ #define BTREE_CONTAINER(t, m, v) ((void *)((v) ? (uintptr_t)(v) - (offsetof(t, m)) : 0)) +/* defines a simple node insertion function. + this function assumes that your nodes have simple integer keys that can be compared with the usual operators. + + EXAMPLE: + if you have a tree node type like this: + + struct my_tree_node { + int key; + btree_node_t base; + } + + You would use the following call to generate an insert function for a tree with this node type: + + BTREE_DEFINE_SIMPLE_INSERT(struct my_tree_node, base, key, my_tree_node_insert); + + Which would emit a function defined like: + + static void my_tree_node_insert(btree_t *tree, struct my_tree_node *node); + + @param node_type your custom tree node type. usually a structure that contains a btree_node_t member. + @param container_node_member the name of the btree_node_t member variable within your custom type. + @param container_key_member the name of the key member variable within your custom type. + @param function_name the name of the function to generate. +*/ #define BTREE_DEFINE_SIMPLE_INSERT(node_type, container_node_member, container_key_member, function_name) \ static void function_name(btree_t *tree, node_type *node) \ { \ @@ -43,6 +96,43 @@ btree_insert_fixup(tree, &node->container_node_member); \ } +/* defines a node insertion function. + this function should be used for trees with complex node keys that cannot be directly compared. + a comparator for your keys must be supplied. + + EXAMPLE: + if you have a tree node type like this: + + struct my_tree_node { + complex_key_t key; + btree_node_t base; + } + + You would need to define a comparator function or macro with the following signature: + + int my_comparator(struct my_tree_node *a, struct my_tree_node *b); + + Which implements the following: + + return -1 if a < b + return 0 if a == b + return 1 if a > b + + You would use the following call to generate an insert function for a tree with this node type: + + BTREE_DEFINE_INSERT(struct my_tree_node, base, key, my_tree_node_insert, my_comparator); + + Which would emit a function defined like: + + static void my_tree_node_insert(btree_t *tree, struct my_tree_node *node); + + @param node_type your custom tree node type. usually a structure that contains a btree_node_t member. + @param container_node_member the name of the btree_node_t member variable within your custom type. + @param container_key_member the name of the key member variable within your custom type. + @param function_name the name of the function to generate. + @param comparator the name of a comparator function or functional-macro that conforms to the + requirements listed above. +*/ #define BTREE_DEFINE_INSERT(node_type, container_node_member, container_key_member, function_name, comparator) \ static void function_name(btree_t *tree, node_type *node) \ { \ @@ -82,6 +172,32 @@ btree_insert_fixup(tree, &node->container_node_member); \ } +/* defines a simple tree search function. + this function assumes that your nodes have simple integer keys that can be compared with the usual operators. + + EXAMPLE: + if you have a tree node type like this: + + struct my_tree_node { + int key; + btree_node_t base; + } + + You would use the following call to generate a search function for a tree with this node type: + + BTREE_DEFINE_SIMPLE_GET(struct my_tree_node, int, base, key, my_tree_node_get); + + Which would emit a function defined like: + + static void my_tree_node_get(btree_t *tree, int key); + + @param node_type your custom tree node type. usually a structure that contains a btree_node_t member. + @param key_type the type name of the key embedded in your custom tree node type. this type must be + compatible with the builtin comparison operators. + @param container_node_member the name of the btree_node_t member variable within your custom type. + @param container_key_member the name of the key member variable within your custom type. + @param function_name the name of the function to generate. +*/ #define BTREE_DEFINE_SIMPLE_GET(node_type, key_type, container_node_member, container_key_member, function_name) \ node_type *get(btree_t *tree, key_type key) \ { \ @@ -100,60 +216,155 @@ node_type *get(btree_t *tree, key_type key) \ return NULL; \ } +/* perform an in-order traversal of a binary tree + + If you have a tree defined like: + + btree_t my_tree; + + with nodes defined like: + + struct my_tree_node { + int key; + btree_node_t base; + } + + and you want to do something like: + + foreach (struct my_tree_node *node : my_tree) { ... } + + you should use this: + + btree_foreach (struct my_tree_node, node, &my_tree, base) { ... } + + @param iter_type the type name of the iterator variable. this should be the tree's node type, and shouldn't be a pointer. + @param iter_name the name of the iterator variable. + @param tree_name a pointer to the tree to traverse. + @param node_member the name of the btree_node_t member variable within the tree node type. +*/ #define btree_foreach(iter_type, iter_name, tree_name, node_member) \ for (iter_type *iter_name = BTREE_CONTAINER(iter_type, node_member, btree_first(tree_name)); \ iter_name; \ iter_name = BTREE_CONTAINER(iter_type, node_member, btree_next(&((iter_name)->node_member)))) +/* perform an reverse in-order traversal of a binary tree + + If you have a tree defined like: + + btree_t my_tree; + + with nodes defined like: + + struct my_tree_node { + int key; + btree_node_t base; + } + + and you want to do something like: + + foreach (struct my_tree_node *node : reverse(my_tree)) { ... } + + you should use this: + + btree_foreach_r (struct my_tree_node, node, &my_tree, base) { ... } + + @param iter_type the type name of the iterator variable. this should be the tree's node type, and shouldn't be a pointer. + @param iter_name the name of the iterator variable. + @param tree_name a pointer to the tree to traverse. + @param node_member the name of the btree_node_t member variable within the tree node type. +*/ #define btree_foreach_r(iter_type, iter_name, tree_name, node_member) \ for (iter_type *iter_name = BTREE_CONTAINER(iter_type, node_member, btree_last(tree_name)); \ iter_name; \ iter_name = BTREE_CONTAINER(iter_type, node_member, btree_prev(&((iter_name)->node_member)))) +/* binary tree nodes. this *cannot* be used directly. you need to define a custom node type + that contains a member variable of type btree_node_t. + + you would then use the supplied macros to define functions to manipulate your custom binary tree. +*/ typedef struct btree_node { struct btree_node *b_parent, *b_left, *b_right; unsigned short b_height; } btree_node_t; +/* binary tree. unlike btree_node_t, you can define variables of type btree_t. */ typedef struct btree { struct btree_node *b_root; } btree_t; +/* re-balance a binary tree after an insertion operation. + + NOTE that, if you define an insertion function using BTREE_DEFINE_INSERT or similar, + this function will automatically called for you. + + @param tree the tree to re-balance. + @param node the node that was just inserted into the tree. +*/ extern void btree_insert_fixup(btree_t *tree, btree_node_t *node); + +/* delete a node from a binary tree and re-balance the tree afterwards. + + @param tree the tree to delete from + @param node the node to delete. +*/ extern void btree_delete(btree_t *tree, btree_node_t *node); +/* get the first node in a binary tree. + + this will be the node with the smallest key (i.e. the node that is furthest-left from the root) +*/ extern btree_node_t *btree_first(btree_t *tree); + +/* get the last node in a binary tree. + + this will be the node with the largest key (i.e. the node that is furthest-right from the root) +*/ extern btree_node_t *btree_last(btree_t *tree); +/* for any binary tree node, this function returns the node with the next-largest key value */ extern btree_node_t *btree_next(btree_node_t *node); +/* for any binary tree node, this function returns the node with the next-smallest key value */ extern btree_node_t *btree_prev(btree_node_t *node); +/* sets `child` as the immediate left-child of `parent` */ static inline void btree_put_left(btree_node_t *parent, btree_node_t *child) { parent->b_left = child; child->b_parent = parent; } +/* sets `child` as the immediate right-child of `parent` */ static inline void btree_put_right(btree_node_t *parent, btree_node_t *child) { parent->b_right = child; child->b_parent = parent; } +/* get the immediate left-child of `node` */ static inline btree_node_t *btree_left(btree_node_t *node) { return node->b_left; } +/* get the immediate right-child of `node` */ static inline btree_node_t *btree_right(btree_node_t *node) { return node->b_right; } +/* get the immediate parent of `node` */ static inline btree_node_t *btree_parent(btree_node_t *node) { return node->b_parent; } +/* get the height of `node`. + + the height of a node is defined as the length of the longest path + between the node and a leaf node. + + this count includes the node itself, so the height of a leaf node will be 1. +*/ static inline unsigned short btree_height(btree_node_t *node) { return node->b_height;