哈希表--开散列【数据结构】

最新推荐文章于 2024-01-18 16:30:50 发布

CZF_csdn

最新推荐文章于 2024-01-18 16:30:50 发布

阅读量468

点赞数 1

CC 4.0 BY-SA版权

分类专栏：数据结构【C语言】文章标签：数据结构散列函数 hash 开散列哈希表

本文链接：https://blog.youkuaiyun.com/bian_cheng_ru_men/article/details/79449224

数据结构【C语言】专栏收录该内容

20 篇文章

订阅专栏

哈希表：

散列表（Hash table，也叫哈希表），是根据关键码值(Key value)而直接进行访问的数据结构。也就是说，它通过把关键码值映射到表中一个位置来访问记录，以加快查找的速度。这个映射函数叫做散列函数，存放记录的数组叫做散列表。———— 百度百科

哈希冲突：

对于两个数据元素的关键字Ki和Kj（i != j），有Ki ！= Kj，但有： HashFun(Ki) == HashFun(Kj)
即不同关键字通过相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。把具有不同关键码而具有相同哈希地址的数据元素称为“同义词”。

引起哈希冲突的一个原因可能是：哈希函数设计不够合理。
哈希函数设计原则：
哈希函数的定义域必须包括需要存储的全部关键码，而如果散列表允许有m个地址时，其值域必须在0到m-1之间
哈希函数计算出来的地址能均匀分布在整个空间中
哈希函数应该比较简单

哈希函数：

直接定址法：
取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B
优点：简单、均匀
缺点：需要事先知道关键字的分布情况
适合查找比较小且连续的情况
除留余数法：
设散列表中允许的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址
平方取中法：
假设关键字为1234，对它平方就是1522756，抽取中间的3位227作为哈希地址；再比如关键字为4321，对它平方就是18671041，抽取中间的3位671(或710)作为哈希地址平方取中法比较适合：不知道关键字的分布，而位数又不是很大的情况
折叠法：
折叠法是将关键字从左到右分割成位数相等的几部分(最后一部分位数可以短些)，然后将这几部分叠加求和，并按散列表表长，取后几位作为散列地址折叠法适合事先不需要知道关键字的分布，适合关键字位数比较多的情况
随机数法：
选择一个随机函数，取关键字的随机函数值为它的哈希地址，即H(key) = random(key),其中random为随机数函数通常应用于关键字长度不等时采用此法
数学分析法：
设有n个d位数，每一位可能有r种不同的符号，这r种不同的符号在各位上出现的频率不一定相同，可能在某些位上分布比较均匀，每种符号出现的机会均等，在某些位上分布不均匀只有某几种符号经常出现。可根据散列表的大小，选择其中各种符号分布均匀的若干位作为散列地址。

解决哈希冲突的办法：

1.闭散列（开放定址法）

线性探测：只要hash表没存满，一周之内一定可以找到空余位置---缺点--->引起数据堆积

扩容----->

负载因子：A = 有效元素个数/哈希表的容量

当A > 0.7时则哈希冲突的可能性大大增加，需对哈希表进行扩容

二次探测：H(i+1)-H(i) addr = addr+ 2*i + 1;-----缺点-------->空间利用率太低

2.开散列（开链法）-----------链表

首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

闭散列：

1.哈希表的构成：

typedef enum STATUS
{
	EMPTY,		//空
	EXIST,		//有元素
	DELETE		//该元素已经删除
}STATUS;

typedef struct Elem{
	STATUS _status;		//状态
	HtDataType _data;	//值域
}Elem;

typedef struct HashTable
{
	Elem _array[MAX_SIZE];
	int _size;			//有效元素个数
}HT;

2.基本操作：

插入 ----------此处借顺序表实现

int InsertHashTable(HT* ht, HtDataType data)
{
	int Addr = 0;
	int i = 0;
	if (NULL == ht)
	{
		return 0;
	}
	if (ht->_size == MAX_SIZE)			//表满?    ht->size < max_size （有delete 和exist）
	{
		return 0;
	}
	//找到要插入元素的位置
	Addr = HashFunc(data);

	while (EMPTY != ht->_array[Addr]._status)//查看表中元素状态
	{
		if (ht->_array[Addr]._status == EXIST &&		//delete处不插入元素，保证不插入相同元素
			data == ht->_array[Addr]._data) //元素在表中已经存在,且状态为存在
			return 0;

#ifdef  DETECTIVE_LINE
		Addr = DetectiveLine(Addr);
#else
		++i;
		Addr = DetectiveLine_T(Addr,i);
#endif
	}
	
	//只有当前位置为空时插入
	ht->_array[Addr]._data = data;
	ht->_array[Addr]._status = EXIST;
	ht->_size++;
	return 1;
}

删除：

int DeleteElemHashTable(HT* ht, HtDataType data)
{
	int addr = 0;
	int Cur = 0;
	if (NULL == ht)
	{
		return 0;
	}


	if (EmptyHashTable(ht))
	{
		return 0;
	}
	//找到要删除元素的位置
	addr = HashFunc(data);
	Cur = FindHashTable(ht, data);


	//删除
	if (-1 != Cur)
	{
		ht->_array[Cur]._status = DELETE;
		ht->_size--;    
		return 1;
	}
	else
	{
		return 0;
	}
}

查找：

int FindHashTable(HT* ht, HtDataType data)//返回找到元素的地址
{
	int addr = 0;
	int addrCount = 0;
	if (NULL == ht)
	{
		return -1;
	}
	
	if (EmptyHashTable(ht))
	{
		return -1;
	}

	addr = HashFunc(data);            //addr = addrstart = hashfunc(data)
	while (EMPTY != ht->_array[addr]._status)
	{
		if (data == ht->_array[addr]._data)
		{
			if (ht->_array[addr]._status == EXIST)//数据相等 且状态为存在
			{
				return addr;
			}
		}

		addr++;
		addrCount++;

		if (MAX_SIZE == addr)//越界
		{
			addr = 0;
		}
		if (MAX_SIZE == addrCount)//满
		{
			break;
		}
	}
	return -1;
}

完整代码：

Hash.h

#pragma once

typedef int HtDataType;
#define MAX_SIZE 100
#define NULL 0
//#define DETECTIVE_LINE

typedef enum STATUS
{
	EMPTY,		//空
	EXIST,		//有元素
	DELETE		//该元素已经删除
}STATUS;

typedef struct Elem{
	STATUS _status;		//状态
	HtDataType _data;	//值域
}Elem;

typedef struct HashTable
{
	Elem _array[MAX_SIZE];
	int _size;			//有效元素个数
}HT;

void InitHashTable(HT* ht);
int InsertHashTable(HT* ht, HtDataType data);
int HashFunc(HtDataType data);

int DetectiveLine(int addr); //线性探测
int DetectiveLine_T(int addr, int i);  //二次探测

int FindHashTable(HT* ht, HtDataType data);
int DeleteElemHashTable(HT* ht, HtDataType data);
int EmptyHashTable(HT* ht);
int HashSize(HT* ht);

void TestHash();

Hash.c

#include "Hash.h"
#include <assert.h>
#include <stdio.h>

void InitHashTable(HT* ht)
{
	int i = 0;
	assert(ht);
	
	for (; i < MAX_SIZE; i++)
	{
		ht->_array[i]._status = EMPTY;
	}
	ht->_size = 0;
}

int HashFunc(HtDataType data)	//除留取余
{
	return data % MAX_SIZE;		//尽量模素数
}

int InsertHashTable(HT* ht, HtDataType data)
{
	int Addr = 0;
	int i = 0;
	if (NULL == ht)
	{
		return 0;
	}
	if (ht->_size == MAX_SIZE)			//表满?    ht->size < max_size （有delete 和exist）
	{
		return 0;
	}
	//找到要插入元素的位置
	Addr = HashFunc(data);

	while (EMPTY != ht->_array[Addr]._status)//查看表中元素状态
	{
		if (ht->_array[Addr]._status == EXIST &&		//delete处不插入元素，保证不插入相同元素
			data == ht->_array[Addr]._data) //元素在表中已经存在,且状态为存在
			return 0;

#ifdef  DETECTIVE_LINE
		Addr = DetectiveLine(Addr);
#else
		++i;
		Addr = DetectiveLine_T(Addr,i);
#endif
	}
	
	//只有当前位置为空时插入
	ht->_array[Addr]._data = data;
	ht->_array[Addr]._status = EXIST;
	ht->_size++;
	return 1;
}

int DeleteElemHashTable(HT* ht, HtDataType data)
{
	int addr = 0;
	int Cur = 0;
	if (NULL == ht)
	{
		return 0;
	}

	if (EmptyHashTable(ht))
	{
		return 0;
	}
	//找到要删除元素的位置
	addr = HashFunc(data);
	Cur = FindHashTable(ht, data);

	//删除
	if (-1 != Cur)
	{
		ht->_array[Cur]._status = DELETE;
		ht->_size--;    
		return 1;
	}
	else
	{
		return 0;
	}
}

int FindHashTable(HT* ht, HtDataType data)//返回找到元素的地址
{
	int addr = 0;
	int addrCount = 0;
	if (NULL == ht)
	{
		return -1;
	}
	
	if (EmptyHashTable(ht))
	{
		return -1;
	}

	addr = HashFunc(data);            //addr = addrstart = hashfunc(data)
	while (EMPTY != ht->_array[addr]._status)
	{
		if (data == ht->_array[addr]._data)
		{
			if (ht->_array[addr]._status == EXIST)//数据相等 且状态为存在
			{
				return addr;
			}
		}

		addr++;
		addrCount++;

		if (MAX_SIZE == addr)//越界
		{
			addr = 0;
		}
		if (MAX_SIZE == addrCount)//满
		{
			break;
		}
	}
	return -1;
}

int EmptyHashTable(HT* ht)
{
	assert(ht);
	return 0 == ht->_size;
}

int HashSize(HT* ht)
{
	assert(ht);
	return ht->_size;
}

int DetectiveLine(int addr)		//线性探测
{
	addr++;

	if (MAX_SIZE == addr)//越界
	{
		addr = 0;
	}
	return addr;
}
int DetectiveLine_T(int addr, int i)	//二次探测
{
	addr = addr + 2 * i + 1;		//H(i+1)- H(i)

	if (MAX_SIZE <= addr)//越界
	{
		addr = addr % MAX_SIZE;
	}
	return addr;
}


void TestHash()
{
	HT ht;
	int Cur = 0;

	InitHashTable(&ht);
	InsertHashTable(&ht, 3);
	InsertHashTable(&ht, 4);
	InsertHashTable(&ht, 5);
	InsertHashTable(&ht, 103);
	InsertHashTable(&ht, 199);
	InsertHashTable(&ht, 299);
	InsertHashTable(&ht, 399);
	InsertHashTable(&ht, 499);
	InsertHashTable(&ht, 599);
	DeleteElemHashTable(&ht, 599);
	FindHashTable(&ht, 599);

	Cur = FindHashTable(&ht, 599);
	if (-1 != Cur)
	{
		printf("yes!\n");
	}
	else
	{
		printf("no!\n");
	}
}

Common.c

#include "Common.h"
// 使用素数表对齐做哈希表的容量，降低哈希冲突
unsigned long _PrimeList[_PrimeSize] =
{
	53ul, 97ul, 193ul, 389ul, 769ul,
	1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
	49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
	1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
	50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
	1610612741ul, 3221225473ul, 4294967291ul
};

unsigned long GetNextPrime(unsigned long capacity)
{
	int i = 0;
	for (; i < _PrimeSize; i++)
	{
		if (capacity < _PrimeList[i])
		{
			return _PrimeList[i];
		}
	}
	return _PrimeList[_PrimeSize - 1];
}

Common.h

#pragma once
// 使用素数表对齐做哈希表的容量，降低哈希冲突
#define _PrimeSize  28
unsigned long GetNextPrime(unsigned long capacity);

Test.c

#include"Hash.h"
#include "Hash_D.h"
#include <stdlib.h>

int main()
{
	TestHash();
	
	system("pause");
	return 0;
}

动态哈希表完整代码：

Hash_D.h

#pragma once 

typedef char* HtDataType;
typedef unsigned long (*PHF)(HtDataType);  

#define MAX_SIZE 100
#define NULL 0
#define DETECTIVE_LINE

typedef enum STATUS
{
	EMPTY,		//空
	EXIST,		//有元素
	DELETE		//该元素已经删除
}STATUS;

typedef struct Elem{
	STATUS _status;
	HtDataType _data;		//《键值对，《key, value》》-----结构体（key唯一，value可能重复）
}Elem;

typedef struct HashTable
{
	Elem* _array;		//哈希表空间
	int _capacity;		//容量
	int _size;		//有效元素个数
	PHF _pToInt;		//输入为字符或整形
}HT;

void InitHashTable_D(HT* ht, unsigned long capacity, PHF pTo);	//初始化
int InsertHashTable_D(HT* ht, HtDataType data);			//插入
int FindHashTable_D(HT* ht, HtDataType data);			//查找
int DeleteHashTable_D(HT* ht, HtDataType data);			//删除
void DestoryHashTable_D(HT*ht);					//销毁

unsigned long IntHash(int data);				//整形数据
int HashEmpty(HT* ht);
unsigned long BKDRHash(const char * str);			//字符串转化为整形
int Chackcapacity(HT* ht);					//容量检测
int HashFunc(HT* ht, HtDataType data);				//哈希地址

int Detective(HT* ht, int addr);				//线性探测
int Detective_T(HT* ht,int addr, int i);			//二次探测

void TestHash_D();

Hash_D.c

#include "Hash_D.h"
#include "Common.h"
#include <assert.h>
#include <malloc.h>
#include <stdio.h>

void Swap(HT* left, HT* right)
{
	HT tmp = *left;
	*left = *right;
	*right = tmp;
}

int HashEmpty(HT* ht)
{
	assert(ht);
	return 0 == ht->_size;
}

unsigned long BKDRHash(const char * str)		//字符串转化为整形	
{
	unsigned int seed = 131; // 31 131 1313 13131 131313
	unsigned int hash = 0;
	while (*str)
	{
		hash = hash * seed + (*str++);
	}
	return (hash & 0x7FFFFFFF);
}

unsigned long IntHash(int data)			//整形
{
	return data;
}


void InitHashTable_D(HT* ht, unsigned long capacity, PHF pTo)
{
	unsigned long i = 0;
	if (NULL == ht)
	{
		return;
	}
	//申请空间
	capacity = GetNextPrime(capacity);
	ht->_array = (Elem*)malloc(sizeof(Elem) * capacity);
	assert(ht->_array);

	for (; i < capacity; i++){
		ht->_array[i]._status = EMPTY;
	}
	ht->_capacity = capacity;
	ht->_size = 0;
	ht->_pToInt = pTo;
}

int InsertHashTable_D(HT* ht, HtDataType data)//插入
{
	int addr = 0;
	int i = 0;
	if (NULL == ht)//判空
	{
		return 0;
	}
	if (!Chackcapacity(ht))	//测容
	{
		return 0;
	}
	addr = HashFunc(ht, data);
	while (EMPTY != ht->_array[addr]._status)
	{
		if (EXIST == ht->_array[addr]._status &&//避免插入相同元素
			data == ht->_array[addr]._data)
		{
			return 0;
		}
#ifdef DETECTIVE_LINE
		addr = Detective(ht, addr);
#else
		i++
		addr = Detective_T(ht, addr);
#endif

	}
	ht->_array[addr]._data = data;
	ht->_array[addr]._status = EXIST;
	ht->_size++;
	return 1;
}

int FindHashTable_D(HT* ht, HtDataType data)
{
	int addr = 0;
	int addrStart = 0;
	int count = 0;//记录二次探测次数
	//判空
	if (NULL == ht)
	{
		return -1;
	}
	//在hash表中找起始地址
	addr = HashFunc(ht, data);
	addrStart = addr;
	//在状态为EXIST中找data，找到返回地址没找到继续进行线性探测或二次探测
	while (EMPTY != ht->_array[addr]._status)
	{
		if (data == ht->_array[addr]._data &&
			EXIST == ht->_array[addr]._status)
			return addr;

#ifdef DETECTIVE_LINE
		addr = Detective(ht, addr);
#else
		i++
		addr = Detective_T(ht, addr,i);
		
#endif
	}
	//没找到返回-1
	return -1;
}

int DeleteHashTable_D(HT* ht, HtDataType data)
{
	int addr = 0;
	if (NULL == ht || HashEmpty(ht)){
		return 0;
	}
	//找到删除的结点
	addr = FindHashTable_D(ht, data);//判断是否找到
	if (-1 == addr)
	{
		return 0;
	}
	ht->_array[addr]._status = DELETE;
	ht->_size--;
	return 1;

}

int Chackcapacity(HT* ht)
{
	int i = 0;
	if (NULL == ht)
	{
		return 0;
	}
	if ((ht->_size * 10 / ht->_capacity) > 7)   //负载因子0.7
	{
		HT tmp;
		InitHashTable_D(&tmp, ht->_capacity, ht->_pToInt);  //初始化新的哈希表
		for (; i < ht->_capacity; i++)
		{
			InsertHashTable_D(&tmp, ht->_array[i]._data);//将旧空间的元素散列到新空间
		}
		Swap(&tmp, ht);			//交换新旧空间
		DestoryHashTable_D(&tmp);//释放新空间
	}
	return 1;
}

int HashFunc(HT* ht, HtDataType data)
{
	return ht->_pToInt(data) % ht->_capacity;
}

int Detective(HT* ht, int addr)
{
	addr++;
	if (addr > ht->_capacity)	//越界判断
	{
		addr = 0;
	}
	return addr;
}

int Detective_T(HT* ht, int addr, int i)
{
	addr = addr + 2 * i + 1;
	if (addr >= ht->_capacity)
	{
		addr = addr % ht->_capacity;//防止进入死循环
	}
	return addr;
}

void DestoryHashTable_D(HT*ht)
{
	if (NULL == ht){
		return;
	}
	free(ht->_array);
	ht->_array = NULL;
	ht->_capacity = 0;
	ht->_size = 0;
}

void TestHash_D()
{
	int tmp = 0;
	HT ht;
	InitHashTable_D(&ht, 10, BKDRHash);
	InsertHashTable_D(&ht,"陈");
	InsertHashTable_D(&ht, "陈2");
	DeleteHashTable_D(&ht, "陈");
	tmp = FindHashTable_D(&ht, "陈");
	if (-1 == tmp)
	{
		printf("no!\n");
	}
	else
	{
		printf("yes!\n");
	}
}