先说说为什么使用哈希桶,记得以前在学校读书的时候那本软件工程书上的做法是一个将经过哈希算法的值保存在一个链表数组中,就像下面这样,当我们需要插入或者删除的时候,直接对链表操作就行。在Linux多线程的情况下,临界资源在使用前要先加锁,如果对整个链表数组加锁,那这个服务器就不要工作了。为了减小加锁的粒度,数组的长度尽量长,那样的话就能减小阻塞的几率。但是锁的使用(包括加锁、解锁等)也影响性能,所以锁又要尽量少,这个时候就得取一个折中值。把链表数组的每一链表拿出来单独管理,那么它就是一个哈希桶了。


#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#define FAILED -1
#define SUCCESS 0
#define MRU_HASH_SEED 110
typedef unsigned long long int uint64_t;
typedef struct Node
{
uint64_t hash_key;
unsigned char path[32];
struct Node *next;
}Node;
typedef struct Bucket
{
unsigned int lock;
unsigned short bucket_id;
unsigned int entry_num;
struct Node *head;
//struct Node *tail;
}Bucket;
typedef struct HashMap
{
unsigned char BucketNum;
Bucket *BucketTable;
}HashMap;
uint64_t MurmurHash(const void* key, int len)
{
unsigned int seed = MRU_HASH_SEED;
const uint64_t m = 0xc6a4a7935bd1e995;
const int r = 47;
uint64_t h = seed ^ (len * m);
const uint64_t * data = (const uint64_t *)key;
const uint64_t * end = data + (len/8);
while(data != end)
{
uint64_t k = *data++;
k *= m;
k ^= k >> r;
k *= m;
h ^= k;
h *= m;
}
const unsigned char *data2 = (const unsigned char*)data;
switch(len & 7)
{
case 7: h ^= (uint64_t)(data2[6]) << 48;
case 6: h ^= (uint64_t)(data2[5]) << 40;
case 5: h ^= (uint64_t)(data2[4]) << 32;
case 4: h ^= (uint64_t)(data2[3]) << 24;
case 3: h ^= (uint64_t)(data2[2]) << 16;
case 2: h ^= (uint64_t)(data2[1]) << 8;
case 1: h ^= (uint64_t)(data2[0]);
h *= m;
};
h ^= h >> r;
h *= m;
h ^= h >> r;
return h;
}
void FreeList(Bucket* bucket)
{
struct Node *temp;
struct Node *head;
unsigned int count = 0;
if(NULL == bucket)
{
printf("the bucket is NULL\n");
return ;
}
head = bucket->head;
while(NULL != head)
{
temp = head;
head = head->next;
free(temp);
++count;
}
if(0 != count)
{
printf("free node number is: %u\n", count);
}
}
int HashMapInit(HashMap* hashMap)
{
unsigned short bucketNum = 0;
unsigned short idx = 0;
Bucket *pBucket = NULL;
printf("input the bucketNum\n");
scanf("%d", &bucketNum);
if(0 == bucketNum)
{
printf("the bucketNum is 0\n");
return 0;
}
hashMap->BucketTable = (Bucket*)malloc(bucketNum*sizeof(Bucket));
if(NULL == hashMap->BucketTable)
{
printf("alloc bucket failed\n");
return FAILED;
}
hashMap->BucketNum = bucketNum;
for(idx=0; idx<bucketNum; ++idx)
{
pBucket = &(hashMap->BucketTable[idx]);
pBucket->bucket_id = idx;
//获取锁
pBucket->lock = 0;
pBucket->entry_num = 0;
pBucket->head = NULL;
//pBucket->tail = NULL;
}
return SUCCESS;
}
int GetBucketIdx(HashMap* hashMap, uint64_t hash_key)
{
unsigned short idx = 0;
if(NULL == hashMap)
{
return FAILED;
}
idx = hash_key%(hashMap->BucketNum);
return idx;
}
int BucketInsert(Bucket* bucket, struct Node* node)
{
//先加锁
if(NULL == bucket)
{
printf("the bucket is NULL\n");
return FAILED;
}
if(NULL == node)
{
printf("the node is NULL\n");
return FAILED;
}
node->next = bucket->head;
bucket->head = node;
/*
if(NULL == bucket->tail)
{
bucket->tail = node;
}
*/
bucket->entry_num++;
return SUCCESS;
}
int HashMapInsert(HashMap* hashMap, unsigned char* path)
{
uint64_t hash_key = 0;
unsigned short bucket_idx = 0;
Bucket* pBucket = NULL;
if(NULL == hashMap)
{
printf("the hash map is NULL\n");
return FAILED;
}
if(NULL == path)
{
printf("the file path is NULL\n");
return FAILED;
}
hash_key = MurmurHash(path, strlen(path));
printf("file [%s],Mrumru hash key is: [%llu]\n", path, hash_key);
bucket_idx = (unsigned short)GetBucketIdx(hashMap, hash_key);
printf("hash key [%llu] belong to the bucket: %d\n", hash_key, bucket_idx);
pBucket = &(hashMap->BucketTable[bucket_idx]);
struct Node* node = (struct Node*)malloc(sizeof(struct Node));
if(NULL == node)
{
printf("alloc node failed\n");
return FAILED;
}
node->hash_key = hash_key;
strcpy(node->path, path);
int ret = BucketInsert(pBucket, node);
if(FAILED == ret)
{
printf("bucket inset node failed\n");
free(node);
return FAILED;
}
return SUCCESS;
}
int BucketDel(Bucket* bucket, uint64_t hash_key)
{
if(NULL == bucket)
{
printf("the bucket is NULL\n");
return FAILED;
}
if(0 == bucket->entry_num)
{
return FAILED;
}
struct Node* temp = bucket->head;
if(temp->hash_key == hash_key)
{
bucket->head = temp->next;
free(temp);
return SUCCESS;
}
struct Node* next = temp;
while(NULL != temp)
{
next = temp;
next = next->next;
if(next->hash_key == hash_key)
{
temp->next = next->next;
free(next);
return SUCCESS;
}
temp = next;
}
return FAILED;
}
int HashMapDel(HashMap* hashMap, unsigned char* path)
{
uint64_t hash_key = 0;
unsigned short bucket_idx = 0;
Bucket* pBucket = NULL;
if(NULL == hashMap)
{
printf("the hash map is NULL\n");
return FAILED;
}
if(NULL == path)
{
printf("the file path is NULL\n");
return FAILED;
}
hash_key = MurmurHash(path, strlen(path));
printf("file [%s],Mrumru hash key is: [%llu]\n", path, hash_key);
bucket_idx = (unsigned short)GetBucketIdx(hashMap, hash_key);
printf("hash key [%llu] belong to the bucket: %d\n", hash_key, bucket_idx);
pBucket = &(hashMap->BucketTable[bucket_idx]);
int ret = BucketDel(pBucket, hash_key);
if(SUCCESS == ret)
{
printf("del node failed,file path:[%s]\n", path);
return FAILED;
}
printf("del node success,file path:[%s]\n", path);
return SUCCESS;
}
int HashMapRelease(HashMap* hashMap)
{
unsigned short idx = 0;
unsigned ItemNum = 0;
Bucket* bucket = NULL;
//加锁
if(NULL == hashMap)
{
printf("hash map is already NULL\n");
return SUCCESS;
}
ItemNum = hashMap->BucketNum;
for(idx=0; idx<ItemNum; ++idx)
{
bucket = &(hashMap->BucketTable[idx]);
FreeList(bucket);
}
free(hashMap);
hashMap = NULL;
return SUCCESS;
}
int main(int argc, char *argv[])
{
uint64_t hashKey = 0;
HashMap *hashMap = NULL;
unsigned int idx = 0;
char *p[] = {"/opt/mkmfs/mnt/0/d/1.ts",
"/opt/mkmfs/mnt/0/d/2/ts"
};
hashMap = (HashMap*)malloc(sizeof(HashMap));
if(NULL == hashMap)
{
printf("alloc hash map failed\n");
return 0;
}
printf("alloc hash map success\n");
if(SUCCESS == HashMapInit(hashMap))
{
printf("Init hash map success\n");
}
int ret = 0;
for(idx=0; idx<2; ++idx)
{
ret = HashMapInsert(hashMap, p[idx]);
if(SUCCESS == ret)
{
printf("hash map insert success\n");
}
}
ret = HashMapDel(hashMap, p[0]);
if(SUCCESS == ret)
{
printf("del node success\n");
}
if(SUCCESS == HashMapRelease(hashMap))
{
printf("release hash map success\n");
}
return 0;
}
例子中的哈希算法是google的murmur哈希,它的效率比传统的哈希算法快很多,同时哈希冲突的几率很低。虽然哈希桶实现了,但是另外一个问题又出现了--查找效率。因此一般实现哈希桶的同时,除了在哈希桶中放一个链表,还会放一个红黑树。红黑树的查找效率极高,但是它会占用一部分额外的存储,如果在可接受的范围,那么这种组合的效率会很高。
这个例子中未考虑哈希冲突的冲突,对于算法导致一些额外不足之处,算法一般会用另外的流程处理,但是这样仅仅只能说是对算法的完善,未考虑这些流程在性能上的影响。很多大型服务器首先在源端保证哈希冲突几率很小,其次如果发生哈希冲突,那么直接不把这个元素添加到整个哈希桶数组中,可能解决哈希冲突消耗的性能远高于不对哈希桶操作而直接对对象操作。
这个例子中未考虑哈希冲突的冲突,对于算法导致一些额外不足之处,算法一般会用另外的流程处理,但是这样仅仅只能说是对算法的完善,未考虑这些流程在性能上的影响。很多大型服务器首先在源端保证哈希冲突几率很小,其次如果发生哈希冲突,那么直接不把这个元素添加到整个哈希桶数组中,可能解决哈希冲突消耗的性能远高于不对哈希桶操作而直接对对象操作。