【数据结构与算法分析】第五章散列

最新推荐文章于 2025-06-28 16:47:24 发布

原创最新推荐文章于 2025-06-28 16:47:24 发布 · 622 阅读

0 ·

CC 4.0 BY-SA版权

数据结构专栏收录该内容

19 篇文章

订阅专栏

本文深入探讨了散列技术，包括散列函数的设计原则及其在分离链接法和开放定址法中的应用。通过具体实例，详细介绍了线性探测、平方探测及双散列等冲突解决方法。

【数据结构与算法分析】第五章散列

散列是一种用于以常数平均时间执行插入、删除和查找技术。

1.散列函数

每个关键字被映射到从0到TableSize-1这个范围中的某个数，并放到适当的单元中。（TableSize为散列表的大小，表的大小为素数）

一种简单的散列函数：把字符串的ASCII码值加起来

typedef unsigned int Index;
Index Hsh(const char *key,int TableSize)
{
   unsigned  int HashVal=0;
    while(*key!=0)
        HashVal+=*key++;
    return HashVal % TableSize;
}

一种好的散列函数：Horner法则

Index Hash(const char *key,int TableSize)
{
    unsigned  int HashVal=0;
    while(*key!=0)
        HashVal=(HashVal<<5)+*key++;//向左移动5位，即乘以32
    return HashVal % TableSize;
}

如果当一个元素被插入时另一个元素已经存在（散列值相同），那么就产生一个冲突，消除冲突的方法：分离链接法和开放定址法

2.分离链接法

将散列到同一值得所有元素保留到一个表中。这些表都有表头。

例：散列表大小为10，散列函数就是Hash（key,10）= key % 10,每个散列值均有一个表，如key=16，则散列值为6，key=36，散列值为6，他们的处于同一链表中

分离链接散列表的类型声明：

#ifndef _Hash_H
struct ListNode;
typedef struct ListNode *Position;
struct HashTb;
typedef struct HashTb *HashTable;
HashTable InitializeTable(int TableSize);
void DestroyTable(HashTable H);
Position Find(ElementType Key,HashTable H);
void Insert(ElementType Key,HashTable H);
ElementType Retrieve(Position P);
#endef
struct ListNode
{
    ElementType Element;
    Position Next;
}
typedef Position List;
struct HashTb
{
    int TableSize;
    List *TheLists;
}

分离链接散列表的初始化例程：

HashTable InitializeTable(int TableSize)
{
    HashTable H;
    int i;
    if(TableSize < MinTableSize)
    {
        Error("Table size too small");
        return NULL;
    }
    H=malloc(sizeof(struct HashTb));//分配列散表空间
    if(H=NULL)
        FatalError("out of space");
    H->TableSize=NextPrime(TableSize);//设置表的大小为一个素数
    H->TheLists=malloc(sizeof(List)*H->TableSize);//分配链表的空间
    if(H->TheLists==NULL)
        FatalError("out of space");
    for(i=0;i<H->TableSize;i++)//分配每个链表的表头空间
    {
        H->TheLists[i] = malloc(sizeof(struct ListNode));
        if(H->TheLists[i]==NULL)
            FatalError("out of space");
        else
            H->TheLists[i]->Next = NULL;   
    }
    return H;
}

分离链接散列表的Find例程：

Position Find(ElementType Key,HashTable H)
{
    Posion P;
    List L;
    L=H->TheLists[Hash(key,H->TableSize)];//散列值相同的表，L为表头
    P=L->Next;//表的第二个元素的位置
    while(P!=NULL&&P->Element!=key)//如果有第二个元素
        P=P->Next;
    return P;//返回P的值，为NULL则无重复，否则重复
}

分离链接散列表的Insert例程：

void Insert(ElementType Key,HashTable H)
{
    Position Pos,NewCell;
    List L;
    Pos = Find(Key,H);//查看是否存在散列值
    if(Pos==NULL)//散列值不存在
    {
        NewCell=malloc(sizeof(struct ListNode));//分配链表中一个元素的内存
        if(NewCell==NULL)
            FatalError("out of space");
        else
        {
            L=H->TheLists[Hash(Key,H->TableSize)];//散列表下的一个链表，L为表头
            NewCell->Next=L->Next;//
            L->Next=NewCell;//向链表中插入一个值
            NewCell->Element=Key;//向前插入，插入链表中的最前位置
        }
    }
}

插入如图所示：插入值为56

3.开放定址法

定义散列表的装填因子为散列表中的元素个数与散列表大小的比值，分离链接散列表的装填因子为1.

开放定址散列法不需要使用链表，所需要的表较大，其装填因子低于0.5，三个开放定址的冲突解决方法：线性探测法，平方探测法，双散列

开放定址散列表的类型声明：

#ifndef _HashQuad_H
typedef unsigned int Index;
typedef Index Position;
struct HashTb;
typedef struct HashTb *HashTable;
HashTable InitializeTable(int TableSize);
void DestroyTable(HashTable H);
Position Find(ElementType Key,HashTable H);
void Insert(ElementType Key,HashTable H);
ElementType Retrieve(Position P,HashTable H);
HashTable Rehash(HashTable H);
#endef

enum KindOfEntry{Legitimate,Empty,file};
struct HashEntry
{
    ElementType Element;
    enum KindOfEntry Info;
};
typedef struct HashEntry cell;
struct HashTb
{
    int TableSize;
    Cell *TheCells;
}

初始化开放定址散列表：

HashTable InitializeTable(int TableSize)
{
    HashTable H;
    int i;
    if(TableSize < MinTableSize)
    {
        Error("tablesize too small");
        return NULL;
    }
    H=malloc(sizeof(struct HashTb));//为散列表分配内存
    if(H==NULL)
        FatalError("out of space");
    H->TableSize=NextPrime(TableSize);//设置散列表的大小为一个素数
    H->TheCells=malloc(sizeof(Cell)*H->TableSize);//分配散列表的表项单元的数组
    if(H->TheCell==NULL)
         FatalError("out of space");
    for(i=0;i<H->TableSize;i++)//将每个表项单元的数组设置为Empty
        H->TheCells[i].Info=Empty;
    return H;
}

3.1线性探测法

无冲突时Hash(X)=X mod TableSize

有冲突时，F(i)=i，F为冲突解决方法

例：将关键字{89，18，49，58，69}插入到一个散列表中

插入89时，无冲突，正常插入

插入18时，无冲突，正常插入

插入49时，与89产生一个冲突,F(1)=1,放入9下一个空闲位置，即地址0

插入58时，与18产生冲突，往下一个地址移动依次与89，49冲突，然后找到空地址，即地址1

插入69时，与89产生冲突，依次向后一个一个找空位置，直到找到地址2

3.2平方探测法

无冲突时Hash(X)=X mod TableSize

有冲突时，F(i)=i*i，F为冲突解决方法

例：将关键字{89，18，49，58，69}插入到一个散列表中

插入89时，无冲突，正常插入

插入18时，无冲突，正常插入

插入49时，与89产生一个冲突,F(1)=1,放入9下一个空闲位置，即地址0

插入58时，与18产生冲突，F(1)=1,探测相邻的单元，发生冲突，F(2)=2*2=4，探测18后的第四个位置，即位置2，找到空地址

插入69时，与89产生冲突，F(1)=1,探测相邻的单元，发生冲突，F(2)=2*2=4，探测89后的第四个位置，即位置3，找到空地址

使用平方探测散列法的Find例程：

Position Find(Element key,HashTable H)
{
    Position CollitionPos;
    int CollitionNum;
    CollitionNum=0;
    CollitionPos=Hash(key,H->TableSize);//返回散列值
    while(H->TheCells[CollitionPos].Info!=Empty&&H->TheCells[CollitionPos].Element!=key)//如果冲突,1次：CollitionPos=1，2次CollitionPos=4，3次：CollitionPos=9...
    {
        CollitionPos +=2 * ++CollitionNum - 1;//公式：F(i)=F(i-1)+2i-1
        if(CollitionPos>=H->TableSize)
            CollitionPos-=H->TableSize;
    }
    return CollitionPos;//返回key应插入的位置
}

使用平方探测散列法插入：

void Insert(ElementType key,HashTable H)
{
    Position Pos;
    Pos=Find(key,H);//Pos为应该插入的位置
    if(H->TheCells[Pos].Info!=Legitimate)//合法
    {
        H->TheCells[Pos].Info=Legistimate;
        H->TheCells[Pos].Element=key;
    }
}