C语言数据结构之哈希表（HASHTABLE）的实现

最新推荐文章于 2025-02-25 08:31:59 发布

麻瓜老宋

最新推荐文章于 2025-02-25 08:31:59 发布

阅读量2.5k

点赞数 47

CC 4.0 BY-SA版权

分类专栏：把C语言捡起来文章标签：数据结构 c语言散列表哈希算法哈希表

本文链接：https://blog.youkuaiyun.com/gwsong52/article/details/143319319

把C语言捡起来专栏收录该内容

62 篇文章

订阅专栏

C语言数据结构之哈希表（HASHTABLE）的实现

哈希表的每个节点保存的数据格式为key : value，其中key为字符串，根据字符串内容采用不同方法（哈希函数）生成一个无符号整型哈希码，根据表的长度，采用取余法，将数据存入表单元，如果此表单元中已存在数据，则以此表单元为链表头，向链表追加数据，这样表的长度不是数据的总量，总量可以大于表的长度！！！

哈希表的数据结构如下所示：

  -----
0 | O | -> NULL
  -----    -----    -----    -----
1 | O | -> | O | -> | O | -> | O | -> NULL
  -----    -----    -----    -----
.  ...      ...     ...    ....
.  ...      ...     ...    ....
.  ...      ...     ...    ....
  -----    -----
n | O | -> | O | -> NULL
  -----    -----

每个节点保存一个Entry数据格式为（key : value）
每个节点都是一个链表节点，有一个next指针指向下一个节点
生成哈希码函数 gen_hashcode
创建哈希表函数 hash_table_new
释放哈希表函数 hash_table_free

代码如下：

/* filename : ht.c */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* define datatype uint */
typedef unsigned int uint;

/* define function pointer HTFunc for HashTable */
typedef void (*HTFunc) (void *key, void *val);

/* define datatype Entry */
typedef struct _Entry Entry;
struct _Entry {
  void *key;
  void *val;
  Entry *next;
};

/* define datatype HashTable */
typedef struct _HashTable HashTable;
struct _HashTable {
  Entry **elts;
  uint size;
  uint length;
};

/* define gen_hashcode function to generate hash code */
static uint
gen_hashcode (const char *p)
{
  unsigned int h = *p;
  if (h)
    for (p += 1; *p != '\0'; p++)
      h = (h << 5) - h + *p;
  return h;
}

/* create a new hash table */
HashTable *
hash_table_new (uint length)
{
  HashTable *ht = (HashTable*) malloc (sizeof(HashTable));
  ht->elts = (Entry**) malloc (length * sizeof(Entry*));
  ht->length = length;
  ht->size = 0;
  for (int i = 0; i < ht->length; i++)
    {
      ht->elts[i] = (Entry*) malloc (sizeof(Entry));
      ht->elts[i]->key = NULL;
      ht->elts[i]->val = NULL;
      ht->elts[i]->next = NULL;
    }
  return ht;
}

/* free the hash table */
void
hash_table_free (HashTable *ht)
{
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      while (tmp != NULL)
	{
	  Entry *node = tmp;
	  tmp = tmp->next;
	  free (node);
	}
    }
  free (ht->elts);
  free (ht);
}

/*------------------------------*/

/**/
void
test_hashtable (void)
{
  HashTable *ht = hash_table_new (17);
  hash_table_free (ht);
}

/**/
int
main (int argc, char *argv[])
{
  test_hashtable ();
  return 0;
}

/* -- end -- */

编译运行，检查内存，效果如下：

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==2835== Memcheck, a memory error detector
==2835== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==2835== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==2835== Command: ./ht
==2835== 
==2835== 
==2835== HEAP SUMMARY:
==2835==     in use at exit: 0 bytes in 0 blocks
==2835==   total heap usage: 19 allocs, 19 frees, 560 bytes allocated
==2835== 
==2835== All heap blocks were freed -- no leaks are possible
==2835== 
==2835== For counts of detected and suppressed errors, rerun with: -v
==2835== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$

向哈希表推入数据 PUT 从哈希表中取出数据 GET

向哈希表推入数据函数 hash_table_put
从哈希表中取出数据函数 hash_table_get

代码如下：

/* put keyval into hash table */
void
hash_table_put (HashTable *ht, void *key, void *val)
{
  uint hc  = gen_hashcode (key);
  uint idx = hc % ht->length;
  Entry *tmp = ht->elts[idx];
  if (tmp->key == NULL)
    {
      tmp->key = key;
      tmp->val = val;
    }
  else
    {
      Entry *node = (Entry*) malloc (sizeof(Entry));
      node->key = key;
      node->val = val;
      node->next = NULL;
      while (tmp->next != NULL)
	tmp = tmp->next;
      tmp->next = node;
    }
  ht->size = ht->size + 1;
}

/* get val by key from hash table */
void *
hash_table_get (HashTable *ht, void *key)
{
  uint hc  = gen_hashcode (key);
  uint idx = hc % ht->length;
  Entry *tmp = ht->elts[idx];
  if (tmp->key == NULL) return NULL;
  if (0 == strcmp (tmp->key, key))
    return tmp->val;
  else
    {
      while (tmp->next != NULL)
	{
	  tmp = tmp->next;
	  if (0 == strcmp (tmp->key, key))
	    return tmp->val;
	}
    }
  return NULL;
}

测试函数，定义字符串数组，保存英文、中文名字一一对应
将字符串数组中的数据循环推入哈希表
根英文名字在哈希表中查找Entry，然后输出中文名字
输出哈希表相关信息

代码如下：

/**/
void
test_hashtable (void)
{
  char *buff[] = {
    "Angus",   "安格斯", "Anthony", "安东尼",  "Apollo",  "阿波罗",
    "Arnold",  "阿诺德", "Arthur",  "亚瑟",    "August",  "奥古斯特",
    "Austin",  "奥斯汀", "Ben",     "本",      "Benjamin", "本杰明",
    "Bert",    "伯特",   "Benson",   "本森",   "Bill",     "比尔",
    "Billy",   "比利",   "Blake",    "布莱克", "Bob",      "鲍伯",
    "Bobby",   "鲍比",   "Brad",     "布拉德", "Brandon",  "布兰登",
    "Brant",   "布兰特", "Brent",    "布伦特", "Brian/Bryan", "布赖恩",
    "Brown",   "布朗",   "Bruce",    "布鲁斯", "Caleb",    "迦勒",
    "Cameron", "卡梅伦", "Carl",     "卡尔",   "Carlos",   "卡洛斯",
    "Cary",    "凯里",   "Caspar",   "卡斯帕", "Cecil",    "塞西",
    "Charles", "查尔斯", "Cheney",   "采尼"  };
  char *key;
  void *val;
  HashTable *ht = hash_table_new (17);
  int len = sizeof(buff)/sizeof(char*);
  for (int i = 0; i < len/2; i++)
    hash_table_put (ht, buff[2*i], buff[2*i+1]);

  key = "Bill";
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (char*)(hash_table_get(ht, key)));

  key = "Austin";
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (char*)(hash_table_get(ht, key)));

  key = "Chinese";
  val = hash_table_get(ht, key);
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (val == NULL) ? "NULL" : (char*)(val));

  key = "Brent";
  val = hash_table_get(ht, key);
  printf ("HashTable get key [%s] : val [%s]\n",
	  key, (val == NULL) ? "NULL" : (char*)(val));

  printf ("HashTable length is %d\n", ht->length);
  printf ("HashTable size is %d\n", ht->size);

  hash_table_free (ht);
}

编译运行，结果如下，达到预期：

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
HashTable get key [Bill] : val [比尔]
HashTable get key [Austin] : val [奥斯汀]
HashTable get key [Chinese] : val [NULL]
HashTable get key [Brent] : val [布伦特]
HashTable length is 17
HashTable size is 32
songvm@ubuntu:~/works/xdn/loo$

遍历哈希表，逐项操作函数FOREACH，参数为函数指针HTFunc

定义整型变量count，统计哈希表中Entry的数量，输出count的值
对每个Entry节点调用HTFunc指向的函数，输出相关信息

代码如下：

/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
  int count = 0;
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      if (tmp->key != NULL)
	{
	  printf ("%2d, ", count);
	  htfunc (tmp->key, tmp->val);
	  count++;
	  while (tmp->next != NULL)
	    {
	      tmp = tmp->next;
	      printf ("%2d, ", count);
	      htfunc (tmp->key, tmp->val);
	      count++;
	    }
	}
    }
}

测试函数，遍历哈希表，输出KEYVAL

代码如下：

/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
  printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
}

/**/
void
test_hashtable_foreach (void)
{
  char *buff[] = {
    "Angus",   "安格斯", "Anthony", "安东尼",  "Apollo",  "阿波罗",
    "Arnold",  "阿诺德", "Arthur",  "亚瑟",    "August",  "奥古斯特",
    "Austin",  "奥斯汀", "Ben",     "本",      "Benjamin", "本杰明",
    "Bert",    "伯特",   "Benson",   "本森",   "Bill",     "比尔",
    "Billy",   "比利",   "Blake",    "布莱克", "Bob",      "鲍伯",
    "Bobby",   "鲍比",   "Brad",     "布拉德", "Brandon",  "布兰登",
    "Brant",   "布兰特", "Brent",    "布伦特", "Brian/Bryan", "布赖恩",
    "Brown",   "布朗",   "Bruce",    "布鲁斯", "Caleb",    "迦勒",
    "Cameron", "卡梅伦", "Carl",     "卡尔",   "Carlos",   "卡洛斯",
    "Cary",    "凯里",   "Caspar",   "卡斯帕", "Cecil",    "塞西",
    "Charles", "查尔斯", "Cheney",   "采尼"  };
  char *key;
  void *val;
  HashTable *ht = hash_table_new (17);
  int len = sizeof(buff)/sizeof(char*);
  for (int i = 0; i < len/2; i++)
    hash_table_put (ht, buff[2*i], buff[2*i+1]);

  hash_table_foreach (ht, out_keyval);

  hash_table_free (ht);
}

编译运行，结果如下，符合预期：

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
 0, Key : [Austin], Value : [奥斯汀]
 1, Key : [Bill], Value : [比尔]
 2, Key : [Cecil], Value : [塞西]
 3, Key : [Bob], Value : [鲍伯]
 4, Key : [Anthony], Value : [安东尼]
 5, Key : [Brandon], Value : [布兰登]
 6, Key : [Brant], Value : [布兰特]
 7, Key : [Carl], Value : [卡尔]
 8, Key : [August], Value : [奥古斯特]
 9, Key : [Brad], Value : [布拉德]
10, Key : [Brent], Value : [布伦特]
11, Key : [Caleb], Value : [迦勒]
12, Key : [Bert], Value : [伯特]
13, Key : [Blake], Value : [布莱克]
14, Key : [Ben], Value : [本]
15, Key : [Benjamin], Value : [本杰明]
16, Key : [Brian/Bryan], Value : [布赖恩]
17, Key : [Angus], Value : [安格斯]
18, Key : [Brown], Value : [布朗]
19, Key : [Caspar], Value : [卡斯帕]
20, Key : [Carlos], Value : [卡洛斯]
21, Key : [Bruce], Value : [布鲁斯]
22, Key : [Apollo], Value : [阿波罗]
23, Key : [Benson], Value : [本森]
24, Key : [Bobby], Value : [鲍比]
25, Key : [Arnold], Value : [阿诺德]
26, Key : [Arthur], Value : [亚瑟]
27, Key : [Billy], Value : [比利]
28, Key : [Cameron], Value : [卡梅伦]
29, Key : [Cary], Value : [凯里]
30, Key : [Charles], Value : [查尔斯]
31, Key : [Cheney], Value : [采尼]
songvm@ubuntu:~/works/xdn/loo$

遍历哈希表，逐项操作，输出哈希表的结构，空值输出NULL

代码如下：

/* foreach node run htfunc of hashtable */
void
hash_table_foreach (HashTable *ht, HTFunc htfunc)
{
  for (uint i = 0; i < ht->length; i++)
    {
      int count = 0;
      Entry *tmp = ht->elts[i];
      if (tmp->key != NULL)
	{
	  printf ("Line %2d, %d, ", i, count);
	  htfunc (tmp->key, tmp->val);
	  count++;
	  while (tmp->next != NULL)
	    {
	      tmp = tmp->next;
	      printf (", %d, ", count);
	      htfunc (tmp->key, tmp->val);
	      count++;
	    }
	  printf ("\n");
	}
      else
	{
	  printf ("Line %2d, NULL!\n", i);
	}
    }
}

测试函数，修改一下HTFunc指针指向的函数，便于查看结果

代码如下：

/* define function for HTFunc */
void
out_keyval (void *key, void *val)
{
  //printf ("Key : [%s], Value : [%s]\n", (char*)key, (char*)val);
  printf ("[%s : %s]", (char*)key, (char*)val);
}

编译运行，结果如下，符合预期：

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ ./ht
Line  0, NULL!
Line  1, 0, [Austin : 奥斯汀], 1, [Bill : 比尔], 2, [Cecil : 塞西]
Line  2, 0, [Bob : 鲍伯]
Line  3, 0, [Anthony : 安东尼], 1, [Brandon : 布兰登], 2, [Brant : 布兰特], 3, [Carl : 卡尔]
Line  4, 0, [August : 奥古斯特]
Line  5, 0, [Brad : 布拉德], 1, [Brent : 布伦特], 2, [Caleb : 迦勒]
Line  6, 0, [Bert : 伯特], 1, [Blake : 布莱克]
Line  7, NULL!
Line  8, NULL!
Line  9, NULL!
Line 10, 0, [Ben : 本], 1, [Benjamin : 本杰明], 2, [Brian/Bryan : 布赖恩]
Line 11, 0, [Angus : 安格斯], 1, [Brown : 布朗]
Line 12, 0, [Caspar : 卡斯帕]
Line 13, 0, [Carlos : 卡洛斯]
Line 14, 0, [Bruce : 布鲁斯]
Line 15, 0, [Apollo : 阿波罗], 1, [Benson : 本森], 2, [Bobby : 鲍比]
Line 16, 0, [Arnold : 阿诺德], 1, [Arthur : 亚瑟], 2, [Billy : 比利], 3, [Cameron : 卡梅伦], 4, [Cary : 凯里], 5, [Charles : 查尔斯], 6, [Cheney : 采尼]
songvm@ubuntu:~/works/xdn/loo$

读文件到哈希表

找到前面文件读写实践中输出的文件name.dat
将没有中文名字的加上中文译名： Raymond 雷蒙德
将/符号去掉，加上空格： Tanya/谭雅坦尼娅，Tanya 谭雅坦尼娅
读文件需要分配内存，所以要释放每一个节点的key和value函数：hash_table_free_elements

代码如下：

/* free keyvals all of the hash table */
void
hash_table_free_elements (HashTable *ht)
{
  for (uint i = 0; i < ht->length; i++)
    {
      Entry *tmp = ht->elts[i];
      while (tmp != NULL)
	{
	  Entry *node = tmp;
	  tmp = tmp->next;
	  free (node->key);
	  free (node->val);
	  free (node);
	}
    }
  free (ht->elts);
  free (ht);
}

读文件函数 test_hashtable_from_file
每读一个字符串都在之前分配内存，将内存指针做为参数传给hash_table_put函数
操作结束后调用hash_table_free_elements释放分配的内存和哈希表

代码如下：

/**/
void
test_hashtable_from_file (void)
{
  int tmp = 1;
  char *key, *val;
  FILE *fpi;
  HashTable *ht = hash_table_new (1031);
  fpi = fopen ("name.dat", "r");
  while (tmp != EOF)
    {
      key = (char*) malloc (64*sizeof(char));
      val = (char*) malloc (64*sizeof(char));
      memset (key, 0, 64);
      memset (val, 0, 64);
      tmp = fscanf (fpi, "%s", key);
      tmp = fscanf (fpi, "%s", val);
      hash_table_put (ht, key, val);
    }

  hash_table_foreach (ht, out_keyval);

  hash_table_free_elements (ht);
  fclose (fpi);
}

编译运行，结果如下，符合预期：

songvm@ubuntu:~/works/xdn/loo$ gcc ht.c -o ht
songvm@ubuntu:~/works/xdn/loo$ valgrind --leak-check=yes ./ht
==3300== Memcheck, a memory error detector
==3300== Copyright (C) 2002-2017, and GNU GPL'd, by Julian Seward et al.
==3300== Using Valgrind-3.13.0 and LibVEX; rerun with -h for copyright info
==3300== Command: ./ht
==3300== 
Line  0, 0, [ : ]
Line  1, 0, [Ann : 安], 1, [Louise : 路易丝]
Line  2, 0, [Derek : 德里克]
Line  3, NULL!
Line  4, 0, [Kelsey : 凯尔西]

......

Line 1028, 0, [Louisa : 路易莎]
Line 1029, 0, [Diana : 黛安娜]
Line 1030, 0, [Amber : 安伯], 1, [Rachel : 雷切尔]
==3300== 
==3300== HEAP SUMMARY:
==3300==     in use at exit: 0 bytes in 0 blocks
==3300==   total heap usage: 2,364 allocs, 2,364 frees, 117,832 bytes allocated
==3300== 
==3300== All heap blocks were freed -- no leaks are possible
==3300== 
==3300== For counts of detected and suppressed errors, rerun with: -v
==3300== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
songvm@ubuntu:~/works/xdn/loo$