系统中使用hbase作为存储,由于hbase的特性,数据存储在多个分区region上。为了避免分区region的热点读写,需要将数据可以平均分配到每个region上。算法就是通过murmurhash,根据时间的推移、分区region个数进行计算。
#define TIME_PAST(X,B) \
({ \
int year, month, day, hour, minute, second; \
sscanf(X,"%d-%d-%d %d:%d:%d", &year, &month, &day, &hour, &minute, &second); \
B?hour*60+minute:hour; \
})
int HBaseAdapter::getHashRegion(string key,int keyLen,string dateTime,int regionTotal){
int seed=0x1234ABCD;
int hashCode=murmurHash(key.c_str(),keyLen,seed);
hashCode = (hashCode + TIME_PAST(dateTime.c_str(),1)) %13;
return hashCode & 0xFF;
}
/**
* calu murmurhash32
* @return hash code
* @param key key to calculate
* @param key length
* @param seed the seed
*/
int HBaseAdapter::murmurHash ( const void * key, int len, int seed )
{
const int m = 0x5bd1e995;
const int r = 24;
unsigned char* data = (unsigned char*)key;
uint32_t h = seed ^ len;
int len_4 =len >> 2;
for(int i=0;i<len_4;i++){
int i_4 = (i << 2);
uint32_t k = data[i_4 + 3];
k = k << 8;
k = k | (data[i_4 + 2] & 0xff);
k = k << 8;
k = k | (data[i_4 + 1] & 0xff);
k = k << 8;
k = k | (data[i_4 + 0] & 0xff);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
}
int len_m = len_4 << 2;
int left = len - len_m;
int i_m = len_m;
if (left != 0) {
if (left >= 3) {
h ^= data[i_m + 2] << 16;
}
if (left >= 2) {
h ^= data[i_m + 1] << 8;
}
if (left >= 1) {
h ^= data[i_m];
}
h *= m;
}
h ^= h >> 13;
h *= m;
h ^= h >> 15;
return h;
}