/**
* BTree.h
*
* 采用网上流传已久的插入删除算法——回溯方式
* 与《算法导论》中的不一样,但是效率更高
*
* 790042744@qq.com
* 2012/3/5
*
**/
#ifndef BTREE_H
#define BTREE_H
#include <iostream>
#include <vector>
using std::vector;
using std::cout;
using std::endl;
/**
* Knuth's definition, a B-tree of order m:
* Every node has at most m children.
* Every node (except root) has at least ceil(m/2) children.
* The root has at least two children if it is not a leaf node.
* All leaves appear in the same level, and carry information.
* A non-leaf node with k children contains k-1 keys.
*
* 不应该把BTreeNode暴露给用户
**/
template< typename FileName, typename DiskAddr, int m >
class BTree;
template< typename FileName, typename DiskAddr, int m >
class BTreeNode
{
public:
static const int MaxKeyCnt = m-1;
static const int MinKeyCnt = ((m+1)>>1)-1;
private:
FileName filename[m]; //多一个槽位,我是先插入再分裂
DiskAddr fileaddr[m];
//子树个数:ceil(m/2)~m
BTreeNode *child[m+1];
BTreeNode *parent;
int keycnt;
bool isleaf;
public:
BTreeNode(): keycnt(0), isleaf(false), parent(NULL)
{
//子女指针初始化为0
memset( child, 0, sizeof(child) );
};
~BTreeNode() {};
bool IsFull() { return ( keycnt == MaxKeyCnt ); }
FileName const GetKey(int index) {
if ( index < 0 || index >= 2*t-1 )
throw index;
return filename[index];
}
FileName const& GetKey(int index) const {
if ( index < 0 || index >= 2*t-1 )
throw index;
return filename[index];
}
DiskAddr const GetValue(int index){
if ( index < 0 || index >= 2*t-1 )
throw index;
return fileaddr[index];
}
DiskAddr const& GetValue(int index) const{
if ( index < 0 || index >= 2*t-1 )
throw index;
return fileaddr[index];
}
friend class BTree<FileName, DiskAddr, m>;
};
//===================================================================================================
template< typename FileName, typename DiskAddr, int m >
class BTree
{
public:
typedef BTreeNode<FileName, DiskAddr, m> Node;
typedef Node& RNode;
typedef Node* PNode;
typedef Node const * PCNode;
private:
PNode root;
public:
BTree(): root(NULL) {}
~BTree();
PCNode Search( FileName name, int& index );
bool Insert( FileName name, DiskAddr addr );
bool Delete( FileName name );
//test, output the whole tree
void PrintTree();
void PrintTree(PNode pnode);
private:
PNode Allocate( bool isleaf=false );
void Clear( PNode pnode );
PNode SubTreeSearch( PNode pnode, FileName name, int& index );
bool NodeSearch( PNode pnode, FileName name, int& index );
int NodeInsert( PNode pnode, FileName name, DiskAddr addr );
void SplitNode( PNode pnode );
PNode FindMinNode( PNode pnode );
PNode FindMaxNode( PNode pnode );
void Update( PNode pnode );
void FromLeftSibling( PNode child, PNode parent, int chind );
void FromRightSibling( PNode child, PNode parent, int chind );
void MergeChild( PNode pnode, int index );
};
//=======================================================================================
//public:
template< typename FileName, typename DiskAddr, int m >
inline BTree<FileName, DiskAddr, m>::~BTree()
{
if ( NULL != root )
Clear(root);
}
/**
* 如果存在,返回指向常节点的指针,index表示节点中第几个关键字
* 否则,返回NULL,index置为-1
**/
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PCNode
BTree<FileName, DiskAddr, m>::Search(FileName name, int& index)
{
if ( NULL == root )
{
index = -1;
return NULL;
}
PNode p = SubTreeSearch( root, name, index );
if ( -1 == index )
p = NULL;
return p;
}
/**
* 插入关键字伪代码:
1、找到应该插入位置的节点,一定是叶子节点,直接插入;
2、如果该叶子节点关键字个数大于m-1;分裂该叶子节点;
分裂节点伪代码:
1、分裂该节点,产生一个新节点;
2、将中间关键字插入父节点中;
3、如果父节点关键字个数大于m-1,递归分裂父节点,否则直接返回;
* 如果name已经存在,返回false
**/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::Insert( FileName name, DiskAddr addr )
{
if ( NULL == root )
{
root = Allocate(true);
root->filename[0] = name;
root->fileaddr[0] = addr;
root->keycnt = 1;
}
else {
int index;
PNode pnode = SubTreeSearch( root, name, index ); //返回的一定是叶子节点
if ( -1 != index ) //关键字存在
return false;
NodeInsert( pnode, name, addr );
if ( pnode->keycnt > Node::MaxKeyCnt )
SplitNode( pnode );
}
return true;
}
/**
* 删除关键字伪代码;
1、查找包含这个关键字key的节点node:
2、如果这个节点是内节点:
a 找到左子树中含有最大关键字的节点leafnode,及其最大的关键字keyx;
b 在叶子节点leafnode上删除关键字keyx,并用keyx代替原来将要删除的关键字key
c 维护leafnode
如果这个节点是叶子节点:
a 直接删除这个关键字,移动后面的其他关键字
b 维护这个节点
维护节点伪代码:
a 如果关键字满足要求,直接返回;
b 如果左右兄弟节点有足够多的关键字,向其借一个,返回;
c 如果左右兄弟节点都没有足够的关键字,合并一个兄弟节点,回溯维护父节点。
*
* 如果name已经存在,返回false
**/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::Delete( FileName name )
{
if ( NULL == root )
return false;
int index;
PNode pnode, leafnode;
pnode = SubTreeSearch( root, name, index );
if ( -1 == index )
return false;
if ( false == pnode->isleaf )
{
leafnode = FindMinNode( pnode->child[index+1] );
pnode->filename[index] = leafnode->filename[0];
pnode->fileaddr[index] = leafnode->fileaddr[0];
index = 0;
}
else {
leafnode = pnode;
}
for ( int i = index; i < leafnode->keycnt-1; ++ i )
{
leafnode->filename[i] = leafnode->filename[i+1];
leafnode->fileaddr[i] = leafnode->fileaddr[i+1];
}
-- leafnode->keycnt;
if ( leafnode->keycnt < Node::MinKeyCnt )
Update( leafnode );
return true;
}
//just for test
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::PrintTree()
{
if ( NULL == root )
return;
vector<PNode> one, two, *tmp, *tmp2, *xx;
vector<PNode>::iterator it;
PNode pnode;
int level = 0, j;
one.push_back( root );
tmp = &one;
tmp2 = &two;
while ( !tmp->empty() )
{
cout << "level: " << level << " ";
for ( it = tmp->begin(); it != tmp->end(); ++ it )
PrintTree( *it );
cout << endl;
it = tmp->begin();
if ( false == (*it)->isleaf )
{
for ( it = tmp->begin(); it != tmp->end(); ++ it )
{
pnode = *it;
for ( j = 0; j < pnode->keycnt+1; ++ j )
tmp2->push_back( pnode->child[j] );
}
}
tmp->clear();
++ level;
xx = tmp;
tmp = tmp2;
tmp2 = xx;
}
}
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::PrintTree(PNode pnode)
{
if ( NULL == pnode )
return;
cout << "[ ";
for ( int i = 0; i < pnode->keycnt; ++ i )
cout << pnode->filename[i] << "(" <<pnode->fileaddr[i] << ") ";
cout << "] ";
}
//=======================================================================================
//private:
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PNode BTree<FileName, DiskAddr, m>::Allocate( bool isleaf )
{
PNode pnode = new Node();
pnode->isleaf = isleaf;
return pnode;
}
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::Clear(PNode pnode)
{
for ( int i = 0; i < pnode->keycnt; ++ i )
{
if ( NULL != pnode->child[i] )
Clear( pnode->child[i] );
}
}
/**
* 在以pnode为根节点的子树中查找指定的key值
* 返回指向节点的指针,index表示节点中第几个关键字
* 若不存在,index置为-1,但是返回相应的节点(为了Insert操作)
**/
template< typename FileName, typename DiskAddr, int m >
typename BTree<FileName, DiskAddr, m>::PNode
BTree<FileName, DiskAddr, m>::SubTreeSearch( PNode pnode, FileName name, int& index )
{
if ( NULL == pnode )
{
index = -1;
return NULL;
}
int keyind;
PNode presult = pnode;
if ( true == NodeSearch( pnode, name, keyind ) )
{
index = keyind;
}
else {
//那么keyind就是子节点的索引
if ( true == pnode->isleaf )
index = -1;
else
presult = SubTreeSearch( pnode->child[keyind], name, index );
}
return presult;
}
/**
* 在节点内二分搜索
* 如果相等,返回true, index为关键字的索引号;否则,返回false,index为子女指针序号
**/
template< typename FileName, typename DiskAddr, int m >
bool BTree<FileName, DiskAddr, m>::NodeSearch( PNode pnode, FileName name, int& index )
{
int i, j, mid;
i = 0;
j = pnode->keycnt-1;
while ( i <= j )
{
mid = (i+j)>>1;
//FileName类型最低要求:operator <
if ( pnode->filename[mid] < name )
i = mid+1;
else if ( name < pnode->filename[mid] )
j = mid-1;
else {
index = mid;
return true;
}
}
index = i;
return false;
}
/**
* 直接在节点中插入关键字,不一定是叶子节点
* 返回插入的位置
**/
template< typename FileName, typename DiskAddr, int m >
int BTree<FileName, DiskAddr, m>::NodeInsert( PNode pnode, FileName name, DiskAddr addr )
{
int i, position;
NodeSearch( pnode, name, position );
for ( i = pnode->keycnt; i > position; -- i )
{
pnode->filename[i] = pnode->filename[i-1];
pnode->fileaddr[i] = pnode->fileaddr[i-1];
pnode->child[i+1] = pnode->child[i];
}
pnode->child[ position+1 ] = pnode->child[ position ]; //如果pnode是新建的空root,不会有影响
pnode->filename[position] = name;
pnode->fileaddr[position] = addr;
pnode->child[position] = NULL;
++ pnode->keycnt;
return position;
}
/**
* 分裂pnode节点,主要当pnode为root
*
**/
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::SplitNode( PNode pnode )
{
PNode parent, pnew;
int index, lcnt, rcnt, i;
pnew = Allocate( pnode->isleaf );
lcnt = (Node::MaxKeyCnt)>>1;
rcnt = Node::MaxKeyCnt-lcnt;
//直接使用memcpy更快,但是如果FileName、DiskAddr有深复制?
for ( i = 0; i < rcnt; ++ i )
{
pnew->filename[i] = pnode->filename[lcnt+i+1];
pnew->fileaddr[i] = pnode->fileaddr[lcnt+i+1];
}
if ( false == pnode->isleaf )
{
memcpy( (void*)pnew->child, (void*)(pnode->child+lcnt+1), sizeof(void*)*(rcnt+1) );
for ( i = 0; i < rcnt+1; ++ i )
pnew->child[i]->parent = pnew;
}
pnew->keycnt = rcnt;
pnode->keycnt = lcnt;
//关键字上移
if ( NULL == pnode->parent ) //pnode为root
{
root = pnode->parent = Allocate();
}
parent = pnode->parent;
index = NodeInsert( parent, pnode->filename[lcnt], pnode->fileaddr[lcnt] );
parent = pnode->parent;
parent->child[index] = pnode;
parent->child[index+1] = pnew;
pnew->parent = parent;
if ( parent->keycnt > Node::MaxKeyCnt )
SplitNode( parent );
}
/**
* 寻找以pnode为根节点的子树中含最小关键字的节点
*
**/
template< typename FileName, typename DiskAddr, int m >
inline typename BTree<FileName, DiskAddr, m>::PNode
BTree<FileName, DiskAddr, m>::FindMinNode( PNode pnode )
{
PNode presult;
presult = pnode;
while ( false == presult->isleaf )
presult = presult->child[0];
return presult;
}
/**
* 寻找以pnode为根节点的子树中含最大关键字的节点
*
**/
template< typename FileName, typename DiskAddr, int m >
inline typename BTree<FileName, DiskAddr, m>::PNode
BTree<FileName, DiskAddr, m>::FindMaxNode( PNode pnode )
{
PNode presult;
presult = pnode;
while ( false == presult->isleaf )
presult = presult->child[ presult->keycnt ];
return presult;
}
/**
* 更新pnode,使得满足B-tree的定义
*
**/
template< typename FileName, typename DiskAddr, int m >
void BTree<FileName, DiskAddr, m>::Update( PNode pnode )
{
if ( pnode->keycnt >= Node::MinKeyCnt || ( pnode == root && pnode->keycnt > 0 ) )
return;
if ( pnode == root && 0 == pnode->keycnt )
{
root = root->child[0];
if ( NULL != root )
{
delete root->parent;
root->parent = NULL;
}
return;
}
PNode parent;
int index;
parent = pnode->parent;
//如果pnode->keycnt == 0, m = 3;很特殊的一种情况。
if ( pnode->keycnt > 0 )
NodeSearch( parent, pnode->filename[0], index );
else{
for ( index = 0; index < parent->keycnt+1; ++ index )
if ( parent->child[index] == pnode )
break;
}
if ( index > 0 && parent->child[index-1]->keycnt > Node::MinKeyCnt )
{
FromLeftSibling( pnode, parent, index );
}
else if ( index < parent->keycnt && parent->child[index+1]->keycnt > Node::MinKeyCnt )
{
FromRightSibling( pnode, parent, index );
}
else{ //合并
if ( index == parent->keycnt ) //pnode是parent的最后一个子女节点
-- index;
MergeChild( parent, index );
if ( parent->keycnt < Node::MinKeyCnt )
Update( parent );
}
}
/*
* leftsibling节点最右边的关键字 --> parent节点的第chind-1个关键字 --> child节点最左边位置
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromLeftSibling( PNode child, PNode parent, int chind )
{
int i;
PNode left;
//child向后移动一个位置
for ( i = child->keycnt; i > 0; -- i )
{
child->filename[i] = child->filename[i-1];
child->fileaddr[i] = child->fileaddr[i-1];
child->child[i+1] = child->child[i];
}
child->child[1] = child->child[0];
left = parent->child[chind-1];
child->child[0] = left->child[ left->keycnt ];
if ( false == child->isleaf )
{
child->child[0]->parent = child;
}
//parent关键字移到child中
child->filename[0] = parent->filename[chind-1];
child->fileaddr[0] = parent->fileaddr[chind-1];
++ child->keycnt;
//left关键字移到parent中
-- left->keycnt;
parent->filename[chind-1] = left->filename[ left->keycnt ];
parent->fileaddr[chind-1] = left->fileaddr[ left->keycnt ];
}
/*
* rightsibling节点最左边的关键字 --> parent节点的第chind个关键字 --> child节点最右边位置
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::FromRightSibling( PNode child, PNode parent, int chind )
{
int i;
PNode right;
right = parent->child[chind+1];
//
child->filename[ child->keycnt ] = parent->filename[ chind ];
child->fileaddr[ child->keycnt ] = parent->fileaddr[ chind ];
child->child[ child->keycnt+1 ] = right->child[0];
if ( false == child->isleaf )
{
child->child[ child->keycnt+1 ]->parent = child;
}
++ child->keycnt;
//right第一个关键字移到parent
parent->filename[ chind ] = right->filename[0];
parent->fileaddr[ chind ] = right->fileaddr[0];
//right向前移动一个位置
-- right->keycnt;
for ( i = 0; i < right->keycnt; ++ i )
{
right->filename[i] = right->filename[i+1];
right->fileaddr[i] = right->fileaddr[i+1];
right->child[i] = right->child[i+1];
}
right->child[i] = right->child[i+1];
}
/*
* 将pnode->child[i]、pnode->child[i+1]和pnode->filename[i]合并
*/
template< typename FileName, typename DiskAddr, int t >
void BTree<FileName, DiskAddr, t>::MergeChild( PNode pnode, int keyind )
{
if ( NULL == pnode )
return;
PNode left, right;
int i;
left = pnode->child[keyind];
right = pnode->child[keyind+1];
//merging left and right node!
for ( i = 0; i < right->keycnt; ++ i )
{
//留一个位置给关键字下移
left->filename[left->keycnt+i+1] = right->filename[i];
left->fileaddr[left->keycnt+i+1] = right->fileaddr[i];
}
if ( false == left->isleaf )
{
memcpy( left->child+left->keycnt+1, right->child, sizeof(PNode)*(right->keycnt+1) );
if ( false == left->isleaf )
{
for ( i = 0; i < right->keycnt+1; ++ i )
left->child[ left->keycnt + 1 + i ]->parent = left;
}
}
//关键字下移
left->filename[ left->keycnt ] = pnode->filename[ keyind ];
left->fileaddr[ left->keycnt ] = pnode->fileaddr[ keyind ];
left->keycnt += right->keycnt + 1;
delete right;
right = NULL;
for ( i = keyind; i < pnode->keycnt-1; ++ i )
{
pnode->filename[i] = pnode->filename[i+1];
pnode->fileaddr[i] = pnode->fileaddr[i+1];
pnode->child[i+1] = pnode->child[i+2];
}
-- pnode->keycnt;
}
//=======================================================================================
#endif