一个带冲突率检查、超时机制的哈希表

最新推荐文章于 2025-11-24 16:48:59 发布

原创最新推荐文章于 2025-11-24 16:48:59 发布 · 1.5k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#哈希表 #散列表

C/C++ 专栏收录该内容

21 篇文章

订阅专栏

本文介绍了一种采用开链法解决冲突的哈希表实现方案，支持动态内存管理和超时机制，提供了完整的源代码及示例。

相关特性：
- 开链法解决冲突问题
- 哈希函数和比较函数通过接口方式提供
- 支持哈希表查询、插入、删除等操作
- 哈希表是最大容量的7倍，冲突率较低（4%以下）
- 超时机制，删除失效元素
- 统计冲突率，方便调试
- 动态分配内存，释放的内存被添加空闲列表，降低malloc函数调用次数

用法：
首先创建哈希表，
htable_t* htable_create(uint32_t payload_size, uint32_t cnt_used_max,
uint32_t cnt_idle_init, uint32_t cnt_idle_max,
uint32_t cnt_timeout, uint32_t (*hash)(const void*),
int (*equal)(const void*, const void*));

payload_size为每个数据的大小，cnt_used_max是评估最大容量，cnt_idle_init是预分配的空闲容量，cnt_idle_max是空闲容量的最大值
cnt_timeout是超时时间，如果为0，则不进行超时检查，hash为哈希函数，equal为比较函数。

然后就可以进行插入、查询、删除等操作，
htable_item_t* htable_find(htable_t *table, void *key, void *payload);
htable_item_t* htable_insert(htable_t *table, void *key, void *payload);
int htable_remove(htable_t *table, htable_item_t *item);

另外还可以进行定期删除超时失效的数据项，定期更新当前时间，
void htable_remove_timeout(htable_t *table);
void htable_update_now(htable_t *table, time_t now);

定期打印统计信息

void htable_print_stat(htable_t *table);

源码如下：

htable.c

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "htable.h"

inline int likely(int expr)
{
#ifdef __GNUC__
    return __builtin_expect(expr, 1);
#else
    return expr;
#endif
}

inline int unlikely(int expr)
{
#ifdef __GNUC__
    return __builtin_expect(expr, 0);
#else
    return expr;
#endif
}

static inline uint32_t __htable_factor(uint32_t max)
{
    uint32_t i = 0;
    uint32_t value = 0;

    do
    {
        value = (uint32_t)sqrt(max);
        for (i = 2; i <= value; i++) {
            if (max % i == 0) {
                break;
            }
        }
        if (i > value) {
            return max;
        }
    }while (--max);

    return 0;
}

void htable_destory(htable_t *table)
{
    htable_item_t *item;
    htable_item_t *_item;
    
    if (NULL == table) {
        return;
    }

    for (item = table->freed; item != NULL;) {
        _item = item;
        item = item->next;
        free(_item);
    }
    
    for (item = table->used_head; item != NULL;) {
        _item = item;
        item = item->next;
        free(_item);
    }

    free(table);
}

htable_t* htable_create(uint32_t payload_size, uint32_t cnt_used_max, 
                        uint32_t cnt_idle_init, uint32_t cnt_idle_max, 
                        uint32_t cnt_timeout, uint32_t (*hash)(const void*),
                        int (*equal)(const void*, const void*))
{
    uint32_t       cnt_hash = __htable_factor(cnt_used_max * HTABLE_FACTOR);
    htable_t      *table   = NULL;
    htable_item_t *item     = NULL;
    int            i        = 0;

    // alloc hash table struct 
    if (NULL == (table = (htable_t *)calloc(1, sizeof(htable_t) + cnt_hash * sizeof(htable_item_t *)))) {
        return NULL;
    }
    
    // init freed list
    for (i = 0; i < cnt_idle_init; i++) {
        if (NULL == (item = (htable_item_t *)calloc(1, sizeof(htable_item_t) + payload_size))) {
            goto err;
        }
        if (table->freed != NULL) {
            table->freed->prev = item;
        }
        item->next = table->freed;
        item->prev = NULL;
        table->freed = item;
    }

    table->payload_size = payload_size;
    table->cnt_used_max = cnt_used_max;
    table->cnt_idle_max = cnt_idle_max;
    table->cnt_hash     = cnt_hash;
    table->cnt_used     = 0;
    table->cnt_idle     = cnt_idle_init;
    table->cnt_conflict = 0;
    table->hash         = hash;
    table->equal        = equal;
    table->cnt_timeout  = cnt_timeout;

    return table;

  err:
    htable_destory(table);
    return NULL;
}

htable_item_t* htable_find(htable_t *table, void *key, void *payload)
{
    htable_item_t *item = NULL;
    uint32_t       idx  = 0;

    if (unlikely(table == NULL || key == NULL || payload == NULL)) {
        return NULL;
    }

    idx = table->hash(key) % table->cnt_hash;
    for (item = table->heads[idx]; item != NULL; item = item->next)
    {
        if (likely(table->equal(item->payload, payload))) 
        {
            // update used list 
            if (likely(item->prev1 != NULL)) {
                if (likely(item->next1 != NULL)) {
                    item->next1->prev1 = item->prev1;
                } else {
                    table->used_tail = item->prev1;
                }
                item->prev1->next1 = item->next1;
                item->next1 = table->used_head;
                item->prev1 = NULL;
                table->used_head->prev1 = item;
                table->used_head = item;
            }
            item->last_read = table->now;

            return item;
        }
    }

    return NULL;
}

htable_item_t* htable_insert(htable_t *table, void *key, void *payload)
{
    htable_item_t *item = NULL;
    uint32_t       idx  = 0;

    if (unlikely(table == NULL || key == NULL || payload == NULL)) {
        return NULL;
    }

    // used count limit check
    if (table->cnt_used >= table->cnt_used_max) {
        table->cnt_exceed_err++;
        return NULL;
    }

    // get one free item
    if (table->freed == NULL) {
        if (NULL == (item = (htable_item_t *)calloc(1, sizeof(htable_item_t) + table->payload_size))) {
            return NULL;
        }
        table->cnt_alloc++;
    } else {
        item = table->freed;
        table->freed = table->freed->next;
        table->cnt_idle--;
    }

    // set used list for timeout checking
    item->last_read = table->now;
    item->next1 = table->used_head;
    item->prev1 = NULL;
    if (table->used_head != NULL) {
        table->used_head->prev1 = item;
    } else {
        table->used_tail = item;
    }
    table->used_head = item;

    // set hash table list
    idx = table->hash(key) % table->cnt_hash;
    item->idx  = idx;
    item->next = table->heads[idx];
    item->prev = NULL;
    if (table->heads[idx] != NULL) {
        table->heads[idx]->prev = item;
        table->cnt_conflict++;
    }
    table->heads[idx] = item;

    table->cnt_used++;
    memcpy(item->payload, payload, table->payload_size);

    return item;
}

static inline void _htable_remove(htable_t *table, htable_item_t *item)
{
    // update hash table
    if (item->next != NULL || item->prev != NULL) {
        table->cnt_conflict--;
    }
    if (item->next != NULL) {
        item->next->prev = item->prev;
    }
    if (item->prev != NULL) {
        item->prev->next = item->next;
    } else {
        table->heads[item->idx] = item->next;
    }
        
    // update used list for timeout checking
    if (item->next1 != NULL) {
        item->next1->prev1 = item->prev1;
    } else {
        table->used_tail = item->prev1;
    }
    if (item->prev1 != NULL) {
        item->prev1->next1 = item->next1;
    } else {
        table->used_head = item->next1;
    }

    // release item or insert item to freed list 
    table->cnt_used--;
    if (table->cnt_idle >= table->cnt_idle_max) {
        free(item);
    } else {
        item->next = table->freed;
        table->freed = item;
        table->cnt_idle++;
    }
}


int htable_remove(htable_t *table, htable_item_t *item)
{
    if (unlikely(table == NULL || item == NULL)) {
        return -1;
    }
    
    _htable_remove(table, item);
    return 0;
}

void htable_remove_timeout(htable_t *table)
{
    htable_item_t *item;

    if (table == NULL || table->cnt_timeout == 0) {
        return;
    }

    while (NULL != (item = table->used_tail))
    {
        // remain items are not timeout
        if (table->now < item->last_read + table->cnt_timeout) {
            break;
        }

        _htable_remove(table, item);
    }
}

void htable_update_now(htable_t *table, time_t now)
{
    table->now = now;
}

void htable_print_stat(htable_t *table)
{
    printf("htable stat. used: %u, idle: %u, conflict: %u, exceed_err: %u, alloc: %u\n", 
           table->cnt_used, table->cnt_idle, table->cnt_conflict, table->cnt_exceed_err, table->cnt_alloc);
    table->cnt_alloc = 0;
    table->cnt_exceed_err = 0;
}

htable.h

#ifndef _HTABLE_H_
#define _HTABLE_H_

#include <stdint.h>
#include <time.h>

#ifdef __cplusplus
extern "C"
{
#endif

#define HTABLE_FACTOR 7

typedef struct _htable_item {
    struct _htable_item* next;
    struct _htable_item* prev;
    struct _htable_item* next1;       // for timeout check
    struct _htable_item* prev1;       // for timeout check
    time_t               last_read;   // last read time
    uint32_t             idx;         // hash table idx
    char                 payload[0];  // payload
} htable_item_t;

typedef struct {
    uint32_t       payload_size;     // payload size
    
    uint32_t       cnt_used_max;     // max of used item count
    uint32_t       cnt_idle_max;     // max idle item count
    uint32_t       cnt_hash;         // hash domain size
    uint32_t       cnt_used;         // used item count
    uint32_t       cnt_idle;         // idle item count
    uint32_t       cnt_conflict;     // conflict item count
    
    uint32_t       cnt_exceed_err;   // cnt_used exceed cnt_used_max 
    uint32_t       cnt_alloc;        // alloc count

    uint32_t       cnt_timeout;      // how many seconds timeout, 0 mean not check timeout
    time_t         now;              // current time
    
    uint32_t (*hash)(const void*);   // hash function
    int (*equal)(const void*, const void*);  // item equal function

    htable_item_t *used_head;        // for timeout check, insert from this
    htable_item_t *used_tail;        // for timeout check, remove from this
    
    htable_item_t *freed;            // freed list (singly linked list)
    htable_item_t *heads[0];         // hash table array
} htable_t;

htable_t* htable_create(uint32_t payload_size, uint32_t cnt_used_max, 
                        uint32_t cnt_idle_init, uint32_t cnt_idle_max, 
                        uint32_t cnt_timeout, uint32_t (*hash)(const void*),
                        int (*equal)(const void*, const void*));

void htable_destory(htable_t *table);

htable_item_t* htable_find(htable_t *table, void *key, void *payload);

htable_item_t* htable_insert(htable_t *table, void *key, void *payload);

int htable_remove(htable_t *table, htable_item_t *item);

void htable_remove_timeout(htable_t *table);

void htable_update_now(htable_t *table, time_t now);

void htable_print_stat(htable_t *table);

#ifdef __cplusplus
};
#endif

#endif

main.c

// gcc main.c htable.c -lm -g -Wall -O2
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "htable.h"

uint32_t ip_key(const void* key)
{
    return *(uint32_t *)key;
}

int ip_cmp(const void* value1, const void* value2)
{
    return *(uint32_t *)value1 == *(uint32_t *)value2;
}

int main()
{
    htable_t *table = NULL;
    int       i     = 0;
    int       j     = 0;
    uint32_t  ip    = 0;
    
    if (NULL == (table = htable_create(4, 3000 * 1000, 1000, 22000, 60, ip_key, ip_cmp))) {
        return -1;
    }

    srand(time(NULL));

    while (1)
    {
        htable_update_now(table, time(NULL));
        for (i = 0; i < 10000; i++) {
            ip = (uint32_t)rand();
            if (NULL == htable_find(table, &ip, &ip)) {
                htable_insert(table, &ip, &ip);
            }
        }
        htable_remove_timeout(table);
        if ((j++ % 20) == 0) {
            htable_print_stat(table);
        }
            
        sleep(1);
    }

    htable_destory(table);
    return 0;
}