B树实现的非回溯算法参考:http://blog.chinaunix.net/space.php?uid=20196318&do=blog&id=3030529
回溯的方式实现B树与非回溯算法相比,理解起来更为直观,而且插入时/删除时,需要分裂/合并的次数比非回溯算法要少,因为只有到必须分裂或合并的时候回溯算法才执行分裂或合并。当回溯算法从根向叶子下降后,还要向上回溯至根节点。如使用B树(B+树)组织辅助存储设备上(如磁盘)的数据,则应尽量避免执行节点分裂和合并操作,因为这些操作将增加多次额外的磁盘写操作,会极大的影响性能,本文主要结合代码分析B树的回溯实现方式。
数据结构定义:
- #define M 2
// 这里key和pointer的最大个数都比要求的大1,主要是方便在插入时需要先插入再分裂
- typedef struct btree_node {
- int k[2*M];
- struct btree_node *p[2*M+1]; // one more for recursive
- int num;
- bool is_leaf;
- } btree_node;
创建节点接口:
- btree_node *btree_node_new()
- {
- btree_node *node = (btree_node *)malloc(sizeof(btree_node));
- if(NULL == node) {
- return NULL;
- }
-
- for(int i = 0; i < 2 * M -1; i++) {
- node->k[i] = 0;
- }
-
- for(int i = 0; i < 2 * M; i++) {
- node->p[i] = NULL;
- }
-
- node->num = 0;
- node->is_leaf = true;
- }
-
- btree_node *btree_create()
- {
- btree_node *node = btree_node_new();
- if(NULL == node) {
- return NULL;
- }
-
- return node;
- }
插入节点
- // 执行该操作时,说明child上已经有2*M个节点了,已经违反了规则
- // 保留左边的M-1个节点,把第M个节点上升到parent中,右边M个节点放入新节点中
- int btree_split_child(btree_node *parent, int pos, btree_node *child)
- {
- btree_node *new_child = btree_node_new();
- if(NULL == new_child) {
- return -1;
- }
-
- new_child->is_leaf = child->is_leaf;
- new_child->num = M; // M-1 left, M right
-
- for(int i = 0; i < M; i++) {
- new_child->k[i] = child->k[i+M];
- }
-
- if(false == new_child->is_leaf) {
- for(int i = 0; i < M + 1; i++) {
- new_child->p[i] = child->p[i+M];
- }
- }
-
- child->num = M - 1;
-
- for(int i = parent->num; i > pos; i--) {
- parent->p[i+1] = parent->p[i];
- }
- parent->p[pos+1] = new_child;
-
- for(int i = parent->num - 1; i >= pos; i--) {
- parent->k[i+1] = parent->k[i];
- }
- parent->k[pos] = child->k[M-1];
-
- parent->num += 1;
-
- }
-
- // 从根节点开始,一直到叶子节点,然后回溯
- // 回溯过程中,如果是叶子节点,则将target插入,如果是分支节点,如果子路径上有包含
- // 2*M个key的节点,则将其分裂(分裂时必须知道待分裂节点的父节点)
- void btree_insert_recursive(btree_node *node, int target)
- {
- if(NULL != node) {
- int pos = 0;
- while(pos < node->num && target > node->k[pos]) pos++;
- btree_insert_recursive(node->p[pos], target);
- if(NULL == node->p[pos]) {
- btree_insert_nonfull(node, pos, target);
- } else {
- if(2 * M == node->p[pos]->num) {
- btree_split_child(node, pos, node->p[pos]);
- }
- }
- }
- }
// node一定是叶子节点,在pos处插入target
- void btree_insert_nonfull(btree_node *node, int pos, int target)
- {
- for(int j = node->num; j > pos; j--) {
- node->k[j] = node->k[j-1];
- }
- node->k[pos] = target;
- node->num += 1;
- }
// 考虑特殊情况,如果最后根节点有2*M个节点,将根进行分裂,产生新的根,树增高一层
- btree_node* btree_insert(btree_node *root, int target)
- {
- if(NULL == root) {
- return root;
- }
-
- btree_insert_recursive(root, target);
-
- if(2 * M == root->num) {
- btree_node *node = btree_node_new();
- if(NULL == node) {
- return root;
- }
-
- node->is_leaf = false;
- node->p[0] = root;
- btree_split_child(node, 0, root);
- return node;
- }
-
- return root;
- }
删除节点
- // 执行该操作时,一定是y或者z的key个数少于M-1,将y,root->k[pos],z合并到y,释放z
- void btree_merge_child(btree_node *root, int pos, btree_node *y, btree_node *z)
- {
- int n = y->num;
- for(int i = 0; i < z->num; i++) {
- y->k[n+1+i] = z->k[i];
- }
- y->k[n] = root->k[pos];
-
- if(false == z->is_leaf) {
- for(int i = 0; i <= z->num; i++) {
- y->p[n+1+i] = z->p[i];
- }
- }
-
- y->num += (z->num + 1);
-
- for(int j = pos + 1; j < root->num; j++) {
- root->k[j-1] = root->k[j];
- root->p[j] = root->p[j+1];
- }
-
- root->num -= 1;
- free(z);
- }
// 递归删除target,在回溯的过程中,如果遇到叶子节点,则将target删除
// 如果遇到分支节点,则检查子树的节点数是否少于M-1,如果是,则需要从左或
// 右兄弟借节点,如果左右兄弟都只有M-1个节点,则要执行merge
- void btree_delete_recursive(btree_node *node, int target)
- {
- if(NULL != node) {
- int i = 0;
- while(i < node->num && target > node->k[i]) i++;
-
- if(i < node->num && target == node->k[i] && false == node->is_leaf) { // found
- btree_node *y = node->p[i];
- int pre = btree_search_predecessor(y);
- node->k[i] = pre;
- target = pre;
- }
- btree_delete_recursive(node->p[i], target);
-
- if(NULL == node->p[i]) {
- btree_delete_nonone(node, i, target);
- } else {
- btree_node *y = node->p[i];
- if(M - 2 == y->num) {
- btree_node *p = NULL, *z = NULL;
- if(i > 0) {
- p = node->p[i-1];
- }
- if(i < node->num) {
- z = node->p[i+1];
- }
-
- if(i > 0 && p->num > M - 1) {
- btree_shift_to_right_child(node, i-1, p, y);
- } else if(i < node->num && z->num > M - 1) {
- btree_shift_to_left_child(node, i, y, z);
- } else if(i > 0) {
- btree_merge_child(node, i-1, p, y); // note
- y = p;
- } else {
- btree_merge_child(node, i, y, z);
- }
- }
- }
- }
- }
// 考虑特殊情况,删除完后,根节点无任何key了,这时释放根,将子树作为新的根
- btree_node *btree_delete(btree_node *root, int target)
- {
- if(NULL == root) {
- return NULL;
- }
- btree_delete_recursive(root, target);
-
- if(0 == root->num && false == root->is_leaf) {
- btree_node *newroot = root->p[0];
- free(root);
- return newroot;
- }
- return root;
- }
- // 如果node的第pos个key为target,则删除之,否则说明树中没有待删除的节点
- void btree_delete_nonone(btree_node *node, int pos, int target)
- {
- if(node->k[pos] != target) {
- printf("target not found\n");
- }
- else {
- for(int j = pos; j < node->num - 1; j++) {
- node->k[j] = node->k[j+1];
- }
-
- (void)target;
- node->num -= 1;
- }
- }
// find rightmost key
- int btree_search_predecessor(btree_node *root)
- {
- btree_node *y = root;
- while(false == y->is_leaf) {
- y = y->p[y->num];
- }
- return y->k[y->num-1];
- }
// find leftmost key
- int btree_search_successor(btree_node *root)
- {
- btree_node *z = root;
- while(false == z->is_leaf) {
- z = z->p[0];
- }
- return z->k[0];
- }
-
// 从左兄弟借一个节点,y的最大key上升至root,root的k[pos]下降至z
- void btree_shift_to_right_child(btree_node *root, int pos,
- btree_node *y, btree_node *z)
- {
- for(int i = z->num -1; i > 0; i--) {
- z->k[i] = z->k[i-1];
- }
- z->k[0]= root->k[pos];
- root->k[pos] = y->k[y->num-1];
-
- if(false == z->is_leaf) {
- for(int i = z->num; i > 0; i--) {
- z->p[i] = z->p[i-1];
- }
- z->p[0] = y->p[y->num];
- }
-
- y->num -= 1;
- }
// 从右兄弟借一个节点,z的最小key上升至root,root的看k[pos]下降至y
- void btree_shift_to_left_child(btree_node *root, int pos,
- btree_node *y, btree_node *z)
- {
- y->num += 1;
- y->k[y->num-1] = root->k[pos];
- root->k[pos] = z->k[0];
-
- for(int j = 1; j < z->num; j++) {
- z->k[j-1] = z->k[j];
- }
-
- if(false == z->is_leaf) {
- y->p[y->num] = z->p[0];
- for(int j = 1; j <= z->num; j++) {
- z->p[j-1] = z->p[j];
- }
- }
-
- z->num -= 1;
- }
