C语言数据结构之哈希表(HASHTABLE)的实现

C语言数据结构之哈希表(HASHTABLE)的实现

哈希表的每个节点保存的数据格式为key : value,其中key为字符串,根据字符串内容采用不同方法(哈希函数)生成一个无符号整型哈希码,根据表的长度,采用取余法,将数据存入表单元,如果此表单元中已存在数据,则以此表单元为链表头,向链表追加数据,这样表的长度不是数据的总量,总量可以大于表的长度!!!

哈希表的数据结构如下所示:

  -----
0 | O | -> NULL
  -----    -----    -----    -----
1 | O | -> | O | -> | O | -> | O | -> NULL
  -----    -----    -----    -----
.  ...      ...     ...    ....
.  ...      ...     ...    ....
.  ...      ...     ...    ....
  -----    -----
n | O | -> | O | -> NULL
  -----    -----
  • 每个节点保存一个Entry数据格式为(key : value)
  • 每个节点都是一个链表节点,有一个next指针指向下一个节点
  • 生成哈希码函数 gen_hashcode
  • 创建哈希表函数 hash_table_new
  • 释放哈希表函数 hash_table_free

代码如下:

/* filename : ht.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* define datatype uint */
typedef unsigned int uint;

/* define function pointer HTFunc for HashTable */
typedef void (*HTFunc) (void *key, void *val);

/* define datatype Entry */
typedef struct _Entry Entry;
struct _Entry {
  void *key;
  void *val;
  Entry *next;
};

/* define datatype HashTable */
typedef struct _HashTable HashTable;
struct _HashTable {
  Entry **elts;
  uint size;
  uint length;
};

/* define gen_hashcode function to generate hash code */
static uint
gen_hashcode (const char *p)
{
  unsigned int h = *p;
  if (h)
    for (p += 1; *p != '\0'; p++)
      h = (h << 5) - h + *p;
  return h;
}

/* create a new hash table */
HashTable *
hash_table_new (uint length)
{
  HashTable *ht = (HashTable*) malloc (sizeof(HashTable));
  ht->elts = (Entry**) malloc (length * sizeof(Entry*));
  ht->length = length;
  ht->size = 0;
  for (int i = 0; i < ht->length; i++)
    {
      ht->elts[i] = (Entry*) malloc (sizeof(Entry));
      ht->elts[i]->key = NULL;
      ht->elts[i]->val = NULL;
      ht->elts[i]->next = NULL;
    }
  return ht;
}

/* free the hash table */
void
hash_table_free (HashTable *ht)
{
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      while (tmp != NULL)
	{
	  Entry *node = tmp;
	  tmp = tmp->next;
	  free (node);
	}
    }
  free (ht->elts);
  free (ht);
}

/*------------------------------*/

/**/
void
test_hashtable (void)
{
  HashTable *ht = hash_table_new (17);
  hash_table_free (ht);
}

/**/
int
main (int argc, char *argv[])
{
  test_hashtable ();
  return 0;
}

/* -- end -- */

编译运行,检查内存,效果如下:

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==2835== Memcheck, a memory error detector
==2835== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2835== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2835== Command: ./ht
==2835== 
==2835== 
==2835== HEAP SUMMARY:
==2835==     in use at exit: 0 bytes in 0 blocks
==2835==   total heap usage: 19 allocs, 19 frees, 560 bytes allocated
==2835== 
==2835== All heap blocks were freed -- no leaks are possible
==2835== 
==2835== For counts of detected and suppressed errors, rerun with: -v
==2835== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$ 

向哈希表推入数据 PUT 从哈希表中取出数据 GET

  • 向哈希表推入数据函数 hash_table_put
  • 从哈希表中取出数据函数 hash_table_get

代码如下:

/* put keyval into hash table */
void
hash_table_put (HashTable *ht, void *key, void *val)
{
  uint hc  = gen_hashcode (key);
  uint idx = hc % ht->length;
  Entry *tmp = ht->elts[idx];
  if (tmp->key == NULL)
    {
      tmp->key = key;
      tmp->val = val;
    }
  else
    {
      Entry *node = (Entry*) malloc (sizeof(Entry));
      node->key = key;
      node->val = val;
      node->next = NULL;
      while (tmp->next != NULL)
	tmp = tmp->next;
      tmp->next = node;
    }
  ht->size = ht->size + 1;
}

/* get val by key from hash table */
void *
hash_table_get (HashTable *ht, void *key)
{
  uint hc  = gen_hashcode (key);
  uint idx = hc % ht->length;
  Entry *tmp = ht->elts[idx];
  if (tmp->key == NULL) return NULL;
  if (0 == strcmp (tmp->key, key))
    return tmp->val;
  else
    {
      while (tmp->next != NULL)
	{
	  tmp = tmp->next;
	  if (0 == strcmp (tmp->key, key))
	    return tmp->val;
	}
    }
  return NULL;
}
  • 测试函数,定义字符串数组,保存英文、中文名字一一对应
  • 将字符串数组中的数据循环推入哈希表
  • 根英文名字在哈希表中查找Entry,然后输出中文名字
  • 输出哈希表相关信息

代码如下:

/**/
void
test_hashtable (void)
{
  char *buff[] = {
    "Angus",   "安格斯", "Anthony", "安东尼",  "Apollo",  "阿波罗",
    "Arnold",  "阿诺德", "Arthur",  "亚瑟",    "August",  "奥古斯特",
    "Austin",  "奥斯汀", "Ben",     "本",      "Benjamin", "本杰明",
    "Bert",    "伯特",   "Benson",   "本森",   "Bill",     "比尔",
    "Billy",   "比利",   "Blake",    "布莱克", "Bob",      "鲍伯",
    "Bobby",   "鲍比",   "Brad",     "布拉德", "Brandon",  "布兰登",
    "Brant",   "布兰特", "Brent",    "布伦特", "Brian/Bryan", "布赖恩",
    "Brown",   "布朗",   "Bruce",    "布鲁斯", "Caleb",    "迦勒",
    "Cameron", "卡梅伦", "Carl",     "卡尔",   "Carlos",   "卡洛斯",
    "Cary",    "凯里",   "Caspar",   "卡斯帕", "Cecil",    "塞西",
    "Charles", "查尔斯", "Cheney",   "采尼"  };
  char *key;
  void *val;
  HashTable *ht = hash_table_new (17);
  int len = sizeof(buff)/sizeof(char*);
  for (int i = 0; i < len/2; i++)
    hash_table_put (ht, buff[2*i], buff[2*i+1]);

  key = "Bill";
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (char*)(hash_table_get(ht, key)));

  key = "Austin";
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (char*)(hash_table_get(ht, key)));

  key = "Chinese";
  val = hash_table_get(ht, key);
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (val == NULL) ? "NULL" : (char*)(val));

  key = "Brent";
  val = hash_table_get(ht, key);
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (val == NULL) ? "NULL" : (char*)(val));

  printf ("HashTable length is %d\n", ht->length);
  printf ("HashTable size is %d\n", ht->size);

  hash_table_free (ht);
}

编译运行,结果如下,达到预期:

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
HashTable get key [Bill] : val [比尔]
HashTable get key [Austin] : val [奥斯汀]
HashTable get key [Chinese] : val [NULL]
HashTable get key [Brent] : val [布伦特]
HashTable length is 17
HashTable size is 32
songvm@ubuntu:~/works/xdn/loo$ 

遍历哈希表,逐项操作函数FOREACH,参数为函数指针HTFunc

  • 定义整型变量count,统计哈希表中Entry的数量,输出count的值
  • 对每个Entry节点调用HTFunc指向的函数,输出相关信息

代码如下:

/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
  int count = 0;
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      if (tmp->key != NULL)
	{
	  printf ("%2d, ", count);
	  htfunc (tmp->key, tmp->val);
	  count++;
	  while (tmp->next != NULL)
	    {
	      tmp = tmp->next;
	      printf ("%2d, ", count);
	      htfunc (tmp->key, tmp->val);
	      count++;
	    }
	}
    }
}
  • 测试函数,遍历哈希表,输出KEYVAL

代码如下:

/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
  printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
}

/**/
void
test_hashtable_foreach (void)
{
  char *buff[] = {
    "Angus",   "安格斯", "Anthony", "安东尼",  "Apollo",  "阿波罗",
    "Arnold",  "阿诺德", "Arthur",  "亚瑟",    "August",  "奥古斯特",
    "Austin",  "奥斯汀", "Ben",     "本",      "Benjamin", "本杰明",
    "Bert",    "伯特",   "Benson",   "本森",   "Bill",     "比尔",
    "Billy",   "比利",   "Blake",    "布莱克", "Bob",      "鲍伯",
    "Bobby",   "鲍比",   "Brad",     "布拉德", "Brandon",  "布兰登",
    "Brant",   "布兰特", "Brent",    "布伦特", "Brian/Bryan", "布赖恩",
    "Brown",   "布朗",   "Bruce",    "布鲁斯", "Caleb",    "迦勒",
    "Cameron", "卡梅伦", "Carl",     "卡尔",   "Carlos",   "卡洛斯",
    "Cary",    "凯里",   "Caspar",   "卡斯帕", "Cecil",    "塞西",
    "Charles", "查尔斯", "Cheney",   "采尼"  };
  char *key;
  void *val;
  HashTable *ht = hash_table_new (17);
  int len = sizeof(buff)/sizeof(char*);
  for (int i = 0; i < len/2; i++)
    hash_table_put (ht, buff[2*i], buff[2*i+1]);

  hash_table_foreach (ht, out_keyval);

  hash_table_free (ht);
}

编译运行,结果如下,符合预期:

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
 0, Key : [Austin], Value : [奥斯汀]
 1, Key : [Bill], Value : [比尔]
 2, Key : [Cecil], Value : [塞西]
 3, Key : [Bob], Value : [鲍伯]
 4, Key : [Anthony], Value : [安东尼]
 5, Key : [Brandon], Value : [布兰登]
 6, Key : [Brant], Value : [布兰特]
 7, Key : [Carl], Value : [卡尔]
 8, Key : [August], Value : [奥古斯特]
 9, Key : [Brad], Value : [布拉德]
10, Key : [Brent], Value : [布伦特]
11, Key : [Caleb], Value : [迦勒]
12, Key : [Bert], Value : [伯特]
13, Key : [Blake], Value : [布莱克]
14, Key : [Ben], Value : []
15, Key : [Benjamin], Value : [本杰明]
16, Key : [Brian/Bryan], Value : [布赖恩]
17, Key : [Angus], Value : [安格斯]
18, Key : [Brown], Value : [布朗]
19, Key : [Caspar], Value : [卡斯帕]
20, Key : [Carlos], Value : [卡洛斯]
21, Key : [Bruce], Value : [布鲁斯]
22, Key : [Apollo], Value : [阿波罗]
23, Key : [Benson], Value : [本森]
24, Key : [Bobby], Value : [鲍比]
25, Key : [Arnold], Value : [阿诺德]
26, Key : [Arthur], Value : [亚瑟]
27, Key : [Billy], Value : [比利]
28, Key : [Cameron], Value : [卡梅伦]
29, Key : [Cary], Value : [凯里]
30, Key : [Charles], Value : [查尔斯]
31, Key : [Cheney], Value : [采尼]
songvm@ubuntu:~/works/xdn/loo$ 

遍历哈希表,逐项操作,输出哈希表的结构,空值输出NULL

代码如下:

/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
  for (uint i = 0; i < ht->length; i++)
    {
      int count = 0;
      Entry *tmp = ht->elts[i];
      if (tmp->key != NULL)
	{
	  printf ("Line %2d, %d, ", i, count);
	  htfunc (tmp->key, tmp->val);
	  count++;
	  while (tmp->next != NULL)
	    {
	      tmp = tmp->next;
	      printf (", %d, ", count);
	      htfunc (tmp->key, tmp->val);
	      count++;
	    }
	  printf ("\n");
	}
      else
	{
	  printf ("Line %2d, NULL!\n", i);
	}
    }
}
  • 测试函数,修改一下HTFunc指针指向的函数,便于查看结果

代码如下:

/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
  //printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
  printf ("[%s : %s]", (char*)key, (char*)val);
}

编译运行,结果如下,符合预期:

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
Line  0, NULL!
Line  1, 0, [Austin : 奥斯汀], 1, [Bill : 比尔], 2, [Cecil : 塞西]
Line  2, 0, [Bob : 鲍伯]
Line  3, 0, [Anthony : 安东尼], 1, [Brandon : 布兰登], 2, [Brant : 布兰特], 3, [Carl : 卡尔]
Line  4, 0, [August : 奥古斯特]
Line  5, 0, [Brad : 布拉德], 1, [Brent : 布伦特], 2, [Caleb : 迦勒]
Line  6, 0, [Bert : 伯特], 1, [Blake : 布莱克]
Line  7, NULL!
Line  8, NULL!
Line  9, NULL!
Line 10, 0, [Ben :], 1, [Benjamin : 本杰明], 2, [Brian/Bryan : 布赖恩]
Line 11, 0, [Angus : 安格斯], 1, [Brown : 布朗]
Line 12, 0, [Caspar : 卡斯帕]
Line 13, 0, [Carlos : 卡洛斯]
Line 14, 0, [Bruce : 布鲁斯]
Line 15, 0, [Apollo : 阿波罗], 1, [Benson : 本森], 2, [Bobby : 鲍比]
Line 16, 0, [Arnold : 阿诺德], 1, [Arthur : 亚瑟], 2, [Billy : 比利], 3, [Cameron : 卡梅伦], 4, [Cary : 凯里], 5, [Charles : 查尔斯], 6, [Cheney : 采尼]
songvm@ubuntu:~/works/xdn/loo$ 

读文件到哈希表

  • 找到前面文件读写实践中输出的文件name.dat
  • 将没有中文名字的加上中文译名: Raymond 雷蒙德
  • 将/符号去掉,加上空格 : Tanya/谭雅坦尼娅 ,Tanya 谭雅坦尼娅
  • 读文件需要分配内存,所以要释放每一个节点的key和value函数:hash_table_free_elements

代码如下:

/* free keyvals all of the hash table */
void
hash_table_free_elements (HashTable *ht)
{
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      while (tmp != NULL)
	{
	  Entry *node = tmp;
	  tmp = tmp->next;
	  free (node->key);
	  free (node->val);
	  free (node);
	}
    }
  free (ht->elts);
  free (ht);
}
  • 读文件函数 test_hashtable_from_file
  • 每读一个字符串都在之前分配内存,将内存指针做为参数传给hash_table_put函数
  • 操作结束后调用hash_table_free_elements释放分配的内存和哈希表

代码如下:

/**/
void
test_hashtable_from_file (void)
{
  int tmp = 1;
  char *key, *val;
  FILE *fpi;
  HashTable *ht = hash_table_new (1031);
  fpi = fopen ("name.dat", "r");
  while (tmp != EOF)
    {
      key = (char*) malloc (64*sizeof(char));
      val = (char*) malloc (64*sizeof(char));
      memset (key, 0, 64);
      memset (val, 0, 64);
      tmp = fscanf (fpi, "%s", key);
      tmp = fscanf (fpi, "%s", val);
      hash_table_put (ht, key, val);
    }

  hash_table_foreach (ht, out_keyval);

  hash_table_free_elements (ht);
  fclose (fpi);
}

编译运行,结果如下,符合预期:

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==3300== Memcheck, a memory error detector
==3300== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3300== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3300== Command: ./ht
==3300== 
Line  0, 0, [ : ]
Line  1, 0, [Ann :], 1, [Louise : 路易丝]
Line  2, 0, [Derek : 德里克]
Line  3, NULL!
Line  4, 0, [Kelsey : 凯尔西]

......

Line 1028, 0, [Louisa : 路易莎]
Line 1029, 0, [Diana : 黛安娜]
Line 1030, 0, [Amber : 安伯], 1, [Rachel : 雷切尔]
==3300== 
==3300== HEAP SUMMARY:
==3300==     in use at exit: 0 bytes in 0 blocks
==3300==   total heap usage: 2,364 allocs, 2,364 frees, 117,832 bytes allocated
==3300== 
==3300== All heap blocks were freed -- no leaks are possible
==3300== 
==3300== For counts of detected and suppressed errors, rerun with: -v
==3300== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$

下一步研究树型数据结构!!!

### C语言中的哈希表数据结构 #### 定义哈希表节点 为了构建一个高效的哈希表,在C语言中通常定义两个主要结构体:一个是用于存储实际数据项的`DataNode`,另一个是指向这些节点并管理它们访问方式的桶(bucket)。这里先展示如何创建基本的数据节点。 ```c typedef struct _DataNode { uint32_t key; void* value; // 可以指向任何类型的值 struct _DataNode *next; // 链接到下一个具有相同hashcode的元素 } DataNode; ``` 此部分代码定义了一个名为`_DataNode`的结构体来哈希表内的单个条目[^1]。注意这里的`key`字段被设置为无符号整数类型(`uint32_t`),而`value`则是一个指针变量可以用来保存任意类型的数据对象;此外还包含了指向同一散列位置处其他项目的链接指针`next`。 #### 创建哈希函数 一个好的哈希算法对于确保哈希表性能至关重要。下面给出了一种简单的字符串到索引映射的方法作为例子: ```c unsigned int hash_function(const char* str, unsigned int table_size){ unsigned long hash = 5381; int c; while ((c = *str++)) hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ return hash % table_size; } ``` 这段程序实现了DJB2哈希算法的一个变体,它接收待处理的关键字串以及目标数组大小,并返回介于0至table_size-1之间的数值作为该键对应的槽位编号。 #### 初始化哈希表 接下来初始化哈希表本身,这涉及到分配适当数量的空间给各个桶,并将每个桶置为空列形式准备接受新成员加入进来。 ```c #define INITIAL_SIZE 100 typedef struct HashTable{ DataNode **buckets; // 动态分配的一维指针数组 size_t capacity; // 的最大容量 }s_HashTable; s_HashTable* create_hash_table(size_t initial_capacity){ s_HashTable *ht = malloc(sizeof(s_HashTable)); ht->capacity = initial_capacity ? : INITIAL_SIZE ; ht->buckets = calloc(ht->capacity , sizeof(DataNode *)); if (!ht || !ht->buckets ) { free(ht); return NULL; } return ht; } ``` 上述片段展示了怎样通过动态内存分配技术建立一个新的哈希表实例及其内部容器——即一系列预先设定长度的空闲链头结点集合。 #### 插入操作 当要往已存在的哈希表里添加新的(key,value)对时,则需遵循如下逻辑流程执行插入动作: ```c int insert_into_hash_table(s_HashTable *ht, const char* key,void* val){ unsigned int index = hash_function((char*)key, ht->capacity); DataNode *new_node = (DataNode *)malloc(sizeof(DataNode)); new_node->key = *(uint32_t*)key; new_node->value=val; new_node->next=NULL; if(!ht->buckets[index]){ ht->buckets[index]=new_node; }else{ DataNode *current=ht->buckets[index]; while(current->next!=NULL){ current=current->next; } current->next=new_node; } return 0; } ``` 在此过程中,首先计算出指定关键字应当放置的位置index,之后检查对应位置是否已有记录存在。如果不存在就直接赋值;反之则遍历现有链条直至找到合适的地方追加进去。 #### 查找功能 最后提供一种方法去检索特定键关联起来的信息内容: ```c void* search_in_hash_table(s_HashTable *ht,const char* key){ unsigned int index = hash_function((char*)key, ht->capacity); DataNode *node = ht->buckets[index]; while(node != NULL && node->key !=*(uint32_t*)key ){ node=node->next; } if(node==NULL)return NULL;//未发现匹配项目 return node->value; } ``` 这个过程同样依赖之前提到过的哈希运算得出定位坐标,随后沿着可能形成的冲突路径逐一检验直到遇到相等条件为止或是到达终点仍未命中所需查找的目标。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值