hlist内核哈希链表应用-解决散列表表的冲突问题

最新推荐文章于 2022-03-21 09:54:05 发布

原创最新推荐文章于 2022-03-21 09:54:05 发布 · 521 阅读

4 ·

CC 4.0 BY-SA版权

文章标签：

#哈希链表 #散列表 #散列表链接法 #链接法 #hlist

算法导论同时被 2 个专栏收录

17 篇文章

订阅专栏

数据结构

15 篇文章

订阅专栏

本文介绍散列表的基本概念及使用链接法解决散列冲突的方法，并提供C语言实现示例。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

散列表

我们都知道在散列函数(如h)将关键字映射到散列表的槽中，关键字 $k_2$ 和 $k_5$ 映射到一个槽中，因而产生冲突，如图1：

(图1)
有问题就有解决的办法，这里的解决办法就是链接法(chaining)，链接法是怎么样的呢，如图2：
这里写图片描述
(图2)
那么图2这个链接法是怎么建立的呢，我用的是C语言，借鉴的是Linux内核的哈希函数:

例子：将关键字 5, 28, 19, 15, 20, 33, 12, 17, 10 插入到哈希表里面.

hashtable.c

#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
#include "hashtable.h"

#define CMD_HASH_HEAD_SIZE 9
// 因为offsetof是内核里面的宏定义，但是在用户空间调用的所以这里从新定义
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)

/*Linux内核中container_of()的作用就是通过一个结构变量中
一个成员的址找到这个结构体变量的首地址, 因是用户空间，故从新container_of*/ 
#define container_of(ptr, type, member) ({            \
        const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
        (type *)( (char *)__mptr - offsetof(type,member) );})

typedef struct _cmd_hash_head{
    struct hlist_head head; //哈希桶的首地址
    int8_t offset;// 这个哈希桶在整个哈希表的偏移
    int16_t count;// 当前哈希桶中节点的个数
}cmd_hash_head_t;

typedef struct _cmd_hash_node{
    struct hlist_node node;
    int8_t key;
}cmd_hash_node_t;

static cmd_hash_head_t cmd_hash[CMD_HASH_HEAD_SIZE];

static void cmd_hash_init(void)
{
    int8_t index = 0;
    memset(cmd_hash, 0, sizeof(cmd_hash));
    for (index = 0; index < (CMD_HASH_HEAD_SIZE); ++index){
        INIT_HLIST_HEAD(&cmd_hash[index].head);
        cmd_hash[index].count = 0;
        cmd_hash[index].offset = index;
    }
}

static void cmd_hash_show(void)
{
    int8_t index = 0;
    for (index = 0; index < (CMD_HASH_HEAD_SIZE); ++index)
        printf("hash%d, count:%d, offset:%d\n", index,
            cmd_hash[index].count, cmd_hash[index].offset);
}

static int8_t hash(int8_t key)
{
    int8_t tmp = 0;
    if (key < 0)
        return -1;
    tmp = key % CMD_HASH_HEAD_SIZE;
    return tmp;
}

static void hash_node_init(int8_t num)
{
    int8_t offset = 0;
    cmd_hash_node_t *node_ptr = (cmd_hash_node_t *)calloc(1, sizeof(cmd_hash_node_t));
    offset = hash(num);
    if (offset < 0) return;

    node_ptr->key = num;
    INIT_HLIST_NODE(&node_ptr->node);
    hlist_add_head(&node_ptr->node, &cmd_hash[offset].head);
    cmd_hash[offset].count++;
}

static void cmd_hash_node_show(void)
{
    int8_t index = 0;
    int16_t count = 0;

    cmd_hash_node_t *entry = NULL;
    struct hlist_node *ptr = NULL;
    printf("display\n");
    for (index = 0; index < CMD_HASH_HEAD_SIZE; ++index){
        if (cmd_hash[index].count > 0){
            printf("hash%d, count:%d, offset:%d\n", index,
                cmd_hash[index].count, cmd_hash[index].offset);
            hlist_for_each_entry(entry, ptr, &cmd_hash[index].head, node){
                printf("key:%d\n", entry->key);
            }
        }
    }
}

static void cmd_hash_node_init(void)
{
    hash_node_init(5);
    hash_node_init(28);
    hash_node_init(19);
    hash_node_init(15);
    hash_node_init(20);
    hash_node_init(33);
    hash_node_init(12);
    hash_node_init(17);
    hash_node_init(10);
}

int main(void)
{
    cmd_hash_init();
    cmd_hash_show();
    cmd_hash_node_init();
    cmd_hash_show();
    cmd_hash_node_show();
    return 0;
}

===========================================
hashtable.h：

#ifndef __HASHTABLE_H__
#define __HASHTABLE_H__

#include <stdio.h>
#include <stddef.h>

#ifdef __cplusplus
extern "C"{
#endif

#define LIST_POISON1 NULL
#define LIST_POISON2 NULL

struct    hlist_node     
{
    struct hlist_node     *next;    // 指向下一个结点的指针
    struct hlist_node    **pprev;// 指向上一个结点的next指针的地址 
};


struct     hlist_head    
{
    struct hlist_node *first;    // 指向每一个hash桶的第一个结点的指针
};

// 初始化hash桶的头结点 
#define    INIT_HLIST_HEAD(ptr)    ((ptr)->first = NULL)

// 初始化hash桶的普通结点 
static    inline    void    INIT_HLIST_NODE(struct hlist_node *node)
{
    node->next    = NULL;
    node->pprev    = NULL;
}

/**
 * hlist_add_head
 * @n: the element to add to the hash list.
 * @h: the list to add to.
 */
static    inline    void    hlist_add_head(struct hlist_node *n,struct hlist_head *h)    
{
    struct hlist_node    *first = h->first;
    n->next        = first;

    if (first)
        first->pprev    = &n->next;

    h->first     = n;
    n->pprev    = &h->first;
}

/* next must be != NULL */
/* n：要添加的新的节点。
 * next：在next节点之前添加n。
 * 在next节点的前面添加一个新的节点n，在使用这个函数中要特别注意，next不能为NULL。
 */
static    inline    void    hlist_add_before(struct hlist_node *n,
                        struct hlist_node *next)
{
    n->pprev    = next->pprev;
    n->next        = next;
    next->pprev    = &n->next;
    *(n->pprev)    = n;
}

/* next must be != NULL */
/* n：要添加的新的节点。
 * next：表示在next节点之后添加n。
 * 在next 节点的后面添加一个新的节点n，这里也要求next不能为NULL
 */
static inline void hlist_add_after(struct hlist_node *n,
                    struct hlist_node *next)
{
    n->next = next->next;
    next->next = n;
    n->pprev = &next->next;

    if(n->next)
        n->next->pprev = &n->next;
}

/* n：要删除的节点。
 * 对于删除操作的话，要注意n是不是末尾节点，如果是末尾节点的话，next就是NULL?
 * 所以就没有指向的pprev，就更不能进行相应的修改了，否则进行修改。
 */
static inline void __hlist_del(struct hlist_node *n)
{
    struct hlist_node *next = n->next;
    struct hlist_node **pprev = n->pprev;
    *pprev = next;
    if (next)    
        next->pprev = pprev;
}


/* n：要删除的节点。
 * 在这个函数中首先删除了n节点，之后将n节点的两个指针指向了LIST_POSION，表示不可使用的地方
 */
static inline void hlist_del(struct hlist_node *n)
{
    __hlist_del(n);
    n->next     = LIST_POISON1;
    n->pprev    = LIST_POISON2;
}

/*
 * 判断一个结点是否已经存在于hash桶中 
 * 判断h->prev是不是为空，如果pprev的指向是空的话，表示这个节点没有添加到这个链表当中来，
 * 如果是空，返回true，否则返回false
 */
static inline int hlist_unhashed(const struct hlist_node *h)
{
    return !h->pprev;
}

// 判断一个hash桶是否为空 
/* h：struct hlist_head节点指针（hlist链表的头节点）。
 * 判断hlist链表是不是空链表，如果是，返回true，否则返回false。
 */
static inline int hlist_empty(const struct hlist_head *h)
{
    return !h->first;
}

/* ptr：表示struct hlist_node类型的一个地址。
 * type：结构体名
 * member：type结构体中的hlist_node成员变量的名称
 * 表示得到ptr所指地址的这个结构体的首地址
 */

#define hlist_entry(ptr, type, member) container_of(ptr,type,member)


/* pos：struct hlist_node类型的一个指针；
 * head：struct hlist_head类型的一个指针，表示hlist链表的头结点。
 * 这个实际上就是一个for循环，从头到尾遍历链表。
 */
#define hlist_for_each(pos, head) \
    for (pos = (head)->first; pos != NULL ; 1; }); \
     pos = pos->next)


/* 这个实际上就是一个for循环，从头到尾遍历链表。这个和前面的不同的是多了一个n，
 * 这么做是为了遍历过程中防止断链的发生。删除时用这个。
 * pos：struct hlist_node类型的一个指针；
 * n：struct hlist_node类型的一个指针；
 * head：struct hlist_head类型的一个指针，表示hlist链表的头结点。
 */ 
#define hlist_for_each_safe(pos, n, head) \
    for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
     pos = n)



/* tops：用来存放遍历到的数据结构的地址，类型是type *；
 * pos：struct hlist_node类型的一个指针；
 * head：hlist链表的头结点；
 * member：struct hlist_node在type结构体中的变量的名称。
 * 在循环中，我们就可以使用tops来指向type类型结构体的任何一个变量了。
 */
/**
 * hlist_for_each_entry    - iterate over list of given type
 * @tpos:    the type * to use as a loop cursor.
 * @pos:    the &struct hlist_node to use as a loop cursor.
 * @head:    the head for your list.
 * @member:    the name of the hlist_node within the struct.
 *
 */
#define hlist_for_each_entry(tpos, pos, head, member)             \
    for (pos = (head)->first;                     \
     (pos != NULL) &&             \
        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
     pos = pos->next)


/* tops：用来存放遍历到的数据结构的地址，类型是type *；
 * pos：struct hlist_node类型的一个指针；
 * n：struct hlist_node类型的一个指针；
 * head：hlist链表的头结点；
 * member：struct hlist_node在type结构体中的变量的名称。
 * 在循环中，我们就可以使用tops来指向type
 * 类型结构体的任何一个变量了。这个宏函数也是为了防止在遍历的时候删除节点而引入的。
 */
/**
 * hlist_for_each_entry_safe - iterate over list of given type safe against 
removal of list entry
 * @tpos:    the type * to use as a loop cursor.
 * @pos:    the &struct hlist_node to use as a loop cursor.
 * @n:        another &struct hlist_node to use as temporary storage
 * @head:    the head for your list.
 * @member:    the name of the hlist_node within the struct.
 */
#define hlist_for_each_entry_safe(tpos, pos, n, head, memsber)          \
    for (pos = (head)->first;                     \
     pos && ({ n = pos->next; 1; }) &&                  \
        ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
     pos = n)

#ifdef __cplusplus
}
#endif

#endif // __HASHTABLE_H__

踩过的坑：
1.因为上网查的资料是内核的环境的，而我是用在用户空间的环境所以编译的时候，遇到坑如下：
这里写图片描述当时就懵逼了，查了一下网上都说是确实头文件 stddef.h 于是乎，我就加了一句#include < stddef.h >
但是还是，出现这种情况，这让我有点开始怀疑人生的了，当然也有人说要这样编译#gcc -o xxx xxx.c -std=gnu99 这样我也试还是没有用，这下子就不是怀疑人生这么简单了，这下子是要变神经的节奏啦。最后，在网上论坛上查了container_of才醒悟过来，原来这是内核中定义的宏，在用户空间不能用，所以只能在这里从新定义一番了。