hash函数进阶一
通过一个例子来熟悉hash函数的功能作用以及如何使用。
0.例子:
(1)有100万个IP地址,从中查询是否存在待查的IP地址
(2)使用hash映射来实现
1.前言:
(1)hash表面上看起来只是将输入对象散列到hash表中,其实在使用使用中过程有:
- 将目标对象通过hash函数,得到对应的key键值(hash code),形成对应的hash对<key, value>
- 通过某种散列方式将key键值尽可能的均匀放于hash表中
- 冲突处理机制
2.实现:
2.1.头文件:存放需要生成的IP地址个数等公共数据
#define MAXIPNUM 1000000
#define HASHTABLELEN 10000
#define IPSTRLEN 16
2.2.源文件一:随机生成样本IP地址及待查IP地址
#include <stdio.h>
#include <time.h>
#include "hash_ip.h"
void main()
{
int index = 0;
int i = 0, j = 0, k = 0;
srand((unsigned int)time(NULL));
while(index < (MAXIPNUM + 1))
{
i = rand() % 255;
j = rand() % 255;
k = rand() % 255;
index++;
printf("%d.%d.%d\n", i, j, k);
}
}
2.3.源文件二:hash实现
(1)节点结构:
struct hashNode
{
char str[IPSTRLEN];
int hashcode;
struct hashNode *next;
int value;
};
(2)hash数组表:
struct hashtable
{
int value;
struct hashNode *next;
}hashTable[HASHTABLELEN];
(3)初始化hash表(使用链表来解决冲突):
void inittable()
{
int index = 0;
for(index = 0; index < HASHTABLELEN; index++)
{
hashTable[index].next = NULL;
hashTable[index].value = 0;
}
}
(4)映射和散列:将目标IP地址映射散列到hash表中:
void beginhash()
{
int index = 0, hashcode = 0, tmpindex = 0;
char tmpstr[IPSTRLEN];
struct hashNode *tmpNode = NULL, *node = NULL;
while(index < MAXIPNUM)
{
gets(tmpstr);
hashcode = gethashcode(tmpstr);
tmpindex = hashcode % HASHTABLELEN;
node = (struct hashNode *)malloc(sizeof(struct hashNode));
node->hashcode = hashcode;
node->value = hashTable[tmpindex].value;
node->next = NULL;
strcpy(node->str, tmpstr);
if(hashTable[tmpindex].next == NULL)
{
hashTable[tmpindex].next = node;
}
else
{
tmpNode = hashTable[tmpindex].next;
while(tmpNode->next != NULL)
{
tmpNode = tmpNode->next;
}
tmpNode->next = node;
}
hashTable[tmpindex].value++;
index++;
}
}
(5)hash函数:可以自行设计优化,使得更均匀,冲突更少
int gethashcode(const char *str)
{
int hashcode = 0;
const char *tmpstr = str;
while(*tmpstr)
{
//hashcode = (hashcode << 4) ^ (hashcode >> 28) + *tmpstr++;
//hashcode = hashcode + *tmpstr++;
//hashcode = *tmpstr++ + (hashcode << 6) + (hashcode << 16) - hashcode;
//hashcode = hashcode * 1366 + *tmpstr++;
hashcode = hashcode * 2345 + *tmpstr++;
}
return (hashcode & 0x7FFFFFFF);
}
(6)查找:是否存在待查IP地址
void findstr(const char *str)
{
int hashcode = 0, index = 0, tmpindex = 0, mark = 0;
struct hashNode *tmpNode = NULL;
hashcode = gethashcode(str);
tmpindex = hashcode % HASHTABLELEN;
tmpNode = hashTable[tmpindex].next;
while(tmpNode != NULL)
{
if(tmpNode->hashcode != hashcode)
{
}
else if(!strcmp(tmpNode->str, str))
{
mark = 1;
printf("find str: %s in %d index, and depth is %d\n", str, tmpindex, tmpNode->value);
}
tmpNode = tmpNode->next;
}
if(0 == mark)
{
printf("not find str: %s\n", str);
}
}
2.4.自动化测试:使用批处理
@cl /nologo haship.c
@cl /nologo randip.c
@del haship.obj randip.obj
@call randip.exe > randip.txt
@call haship.exe < randip.txt > result.txt
@del haship.exe randip.exe
@pause