哈希表的基本操作

最新推荐文章于 2024-06-18 14:22:42 发布

原创最新推荐文章于 2024-06-18 14:22:42 发布 · 1.1k 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#哈希 #哈希冲突 #闭散列 #开散列

数据结构专栏收录该内容

12 篇文章

订阅专栏

理想的搜索方式：可以不经过任何比较，一次直接从表中得到要搜索的元素，构造一种结构，通过某种函数使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么查找时就能通过函数一下找到该元素

哈希

本质上是一个数组，通过哈希函数使元素关键码和元素存储位置有一定的映射关系
搜索某一元素时，通过哈希函数计算出元素存储位置，在数组中按此位置取元素比较，相等即存在
插入，通过哈希函数计算出元素的存储位置并按这个位置进行存储
存在哈希冲突，两个不同元素通过哈希函数所映射出的存储位置相同

哈希冲突

哈希冲突避免不了，解决哈希冲突问题常见的俩方法：闭散列，开散列

闭散列

又名开放定址法，当发生哈希冲突时，如果哈希表没有满，将带插入元素key插入到下一空位去
如何找到下一个空位

线性探测

处理方式：从发生冲突的位置开始，依次继续往后探测，直到找到空位置
插入：用哈希函数找到带插入元素的存储位置，如果该位置没有元素，则直接插入，如果该位置有元素但不是带插入元素就发生了哈希冲突，使用线性探测找到下一个空位置，插入
不可删除：用闭散列时，不能随便删除哈希表中存在的元素，如果直接删除元素会影响其他元素的搜索【可以引用计数走一波】
缺点：发生哈希冲突，冲突都连在一起，不同的元素占用了空位置，使寻找某元素的位置需要多次比较，查找效率降低
负载因子
【填入表中的元素个数/散列表的长度】
因为散列表的长度是固定的，所以负载因子越大，表中的数据越多，产生冲突的可能性就越大，开放定址的负载因子严格控制在0.7-0.8

开散列

又名链地址法，开链法，用散列函数计算散列地址，具有相同的元素属于同一子集，每个子集是一个桶，每个桶通过一个单链表连接起来，每个链表的头结点存储在哈希表中
开散列

#pragma once
#include<assert.h>
#include<stdio.h>
#include<stdlib.h>
//char GetFirstKCountChar(char* str, int k)
//{
//  int hashtable[256] = { 0 };
//  char* cur = str;
//  while (*cur)
//  {
//      hashtable[*cur]++;
//      ++cur;
//  }
//  cur = str;
//  while (*cur)
//  {
//      if (hashtable[*cur] == k)
//          return *cur;
//      ++cur;
//  }
//  return -1;
//}
//void TestHashTable()
//{
//  char* str = "asdfghjkl";
//  GetFirstKCountChar(str, 1);
//}

//开放定址法
typedef int keyType;
typedef int ValueType;
enum Status
{
    EMPTY,
    EXITS,
    DELETE,
};
typedef struct HashNode
{
    keyType _key;
    ValueType _value;
    Status _status;
}HashNode;

typedef struct HashTable
{
    HashNode* _tables;
    size_t _size;//数据个数
    size_t _N;//容量
}HashTable;

size_t GetNextPrimeNum(size_t cur)
{
    const int _PrimeSize = 28;
    static const unsigned long _PrimeList[_PrimeSize] =
    {
        53ul, 97ul, 193ul, 389ul, 769ul, 1543ul, 3079ul,
        6161ul, 12289ul, 24593ul, 49157ul, 98317ul, 196613ul,
        393241ul, 786433ul, 1572869ul, 3145739ul, 6291469ul, 12582917ul,
        25165843ul, 50331653ul, 100663319ul, 201326611ul, 402653189ul,
        805306457ul, 1610612741ul, 3221225473ul, 4294967291ul
    };
    for (size_t i = 0; i < _PrimeSize; ++i)
    {
        if (_PrimeList[i]>cur)
        {
            return _PrimeList[i];
        }
    }
    return _PrimeList[_PrimeSize - 1];
}
void HashTableInit(HashTable* ht)
{
    assert(ht);
    ht->_size = 0;
    ht->_N = GetNextPrimeNum(0);
    ht->_tables = (HashNode*)malloc(sizeof(HashNode)*ht->_N);
    assert(ht->_tables);
    for (size_t i = 0; i < ht->_N; ++i)
    {
        ht->_tables[i]._status = EMPTY;
    }
}
size_t HashFunc(keyType key, size_t n)
{
    return key%n;
}
int HashTableInsert(HashTable* ht, keyType key, ValueType value)
{
    assert(ht);
    if (ht->_size * 10 / ht->_N >= 7)
    {
        //增容
        size_t newN=ht->_N = GetNextPrimeNum(ht->_N);
        HashNode* tmp = (HashNode*)realloc(ht->_tables, newN*sizeof(HashNode));
        assert(tmp);
        ht->_tables = tmp;
        //对增容空间进行初始化
        for (size_t i = ht->_N; i < newN; ++i)
            ht->_tables[i]._status = EMPTY;
        ht->_N = newN;
    }
    size_t index = HashFunc(key, ht->_N);
    while (ht->_tables[index]._status == EXITS)
    {
        if (ht->_tables[index]._key == key)
            return -1;

        //线性探测
        ++index;
        if (index == ht->_N)
        {
            index = 0;
        }
        ////二次探测
        //index += i*i;
        //index %= ht->_N;
        //++i;
    }
    ht->_tables[index]._key = key;
    ht->_tables[index]._value = value;
    ht->_tables[index]._status = EXITS;
    ht->_size++;
}
HashNode* HashTableFind(HashTable* ht, keyType key)
{
    assert(ht);
    size_t index = HashFunc(key, ht->_N);
    while (ht->_tables[index]._status != EMPTY)
    {
        if (ht->_tables[index]._key == key)
        {
            if (ht->_tables[index]._status == EXITS)
                return &(ht->_tables[index]);
            else
                return NULL;
        }
        ++index;
        if (index == ht->_N)
            index = 0;
    }
    return NULL;
}
int HashTableRemove(HashTable* ht, keyType key)
{
    HashNode* node = HashTableFind(ht, key);
    if (node)
    {
        node->_status = DELETE;
        ht->_size--;
        return 0;
    }
    else
    {
        return -1;
    }
}
void HashTableDestory(HashTable* ht)
{
    assert(ht);
    free(ht->_tables);
    ht->_size = 0;
    ht->_N = 0;
}
int HashTablePrint(HashTable* ht)
{
    for (size_t i = 0; i < ht->_N; ++i)
    {
        if (ht->_tables[i]._status == EXITS)
        {
            printf("[EX]%d",ht->_tables[i]._key);
        }
        else if (ht->_tables[i]._status == EMPTY)
        {
            printf("[%dEM]  ",i);
        }
        else
        {
            printf("[%dDE] ",i);
        }
    }
    printf("\n");
}