本文我们分析mongodb chunk的拆分,chunk的分拆分两种情况.
1. chunk范围[min,max]这表明这个chunk还没拆分,第一次拆分考虑到后面插入更多的数据,所以拆分时chunk将从实际的最大值max1处拆分,拆分后的chunk范围如下:[min,max1),[max1,max].对于这种[value,max]的拆分拆分点选择这个chunk的最小值min1,得到[value,min1),[min1,max].
2. 对于一般的chunk值[a,b],其拆分是按照空间占用的,拆分后的两个chunk数据占用的空间基本相等.
下面进入singleSplit具体分析源码.
BSONObj Chunk::singleSplit( bool force , BSONObj& res ) const {
vector<BSONObj> splitPoint;
// if splitting is not obligatory we may return early if there are not enough data
// we cap the number of objects that would fall in the first half (before the split point)
// the rationale is we'll find a split point without traversing all the data
if ( ! force ) {
vector<BSONObj> candidates;
const int maxPoints = 2;//选取分chunk的点,MaxObjectPerChunk表明一个chunk最多的对象数,第二个参数表明
//分chunk的大小
pickSplitVector( candidates , getManager()->getCurrentDesiredChunkSize() , maxPoints , MaxObjectPerChunk );
if ( candidates.size() <= 1 ) {
// no split points means there isn't enough data to split on
// 1 split point means we have between half the chunk size to full chunk size
// so we shouldn't split
return BSONObj();
}
splitPoint.push_back( candidates.front() );
}
else {//强制分chunk,这里将现有chunk从中间分成2部分
// if forcing a split, use the chunk's median key
BSONObj medianKey;
pickMedianKey( medianKey );
if ( ! medianKey.isEmpty() )
splitPoint.push_back( medianKey );
}
// We assume that if the chunk being split is the first (or last) one on the collection, this chunk is
// likely to see more insertions. Instead of splitting mid-chunk, we use the very first (or last) key
// as a split point.
if ( minIsInf() ) {//第一次分,考虑到可能会有更多的插入动作,这里通过使用
splitPoint.clear();//shardkey排序中的最后一个来分chunk,这就是机器自己的分chunk策略
BSONObj key = _getExtremeKey( 1 );//splitchunk默认是将chunk按照大小来分
if ( ! key.isEmpty() ) {
splitPoint.push_back( key );
}
}
else if ( maxIsInf() ) {//大端没有分是maxkey,所以这里选取了一个最小的大端作为
splitPoint.clear();//chunk的分点
BSONObj key = _getExtremeKey( -1 );
if ( ! key.isEmpty() ) {
splitPoint.push_back( key );
}
}
// Normally, we'd have a sound split point here if the chunk is not empty. It's also a good place to
// sanity check.
if ( splitPoint.empty() || _min == splitPoint.front() || _max == splitPoint.front() ) {
return BSONObj();
}
if (multiSplit( splitPoint , res ))//实际的分chunk动作
return splitPoint.front();
else
return BSONObj();
}
不关注!force这一情况,去看pickSplitVector
void Chunk::pickSplitVector( vector<BSONObj>& splitPoints , int chunkSize /* bytes */, int maxPoints, int maxObjs ) const {
// Ask the mongod holding this chunk to figure out the split points.
scoped_ptr<ScopedDbConnection> conn(//向chunk所在server发送命令,让其按照自己的要求选择拆分点
ScopedDbConnection::getInternalScopedDbConnection( getShard().getConnString() ) );
BSONObj result;
BSONObjBuilder cmd;
cmd.append( "splitVector" , _manager->getns() );
cmd.append( "keyPattern" , _manager->getShardKey().key() );
cmd.append( "min" , getMin() );
cmd.append( "max" , getMax() );
cmd.append( "maxChunkSizeBytes" , chunkSize );
cmd.append( "maxSplitPoints" , maxPoints );
cmd.append( "maxChunkObjects" , maxObjs );
BSONObj cmdObj = cmd.obj();
if ( ! conn->get()->runCommand( "admin" , cmdObj , result )) {
conn->done();
ostringstream os;
os << "splitVector command failed: " << result;
uassert( 13345 , os.str() , 0 );
}
BSONObjIterator it( result.getObjectField( "splitKeys" ) );
while ( it.more() ) {
splitPoints.push_back( it.next().Obj().getOwned() );
}
conn->done();
}
继续看这里的splitVector命令.
bool run(const string& dbname, BSON