散列(hashing)的基本思想是:通过一个确定的散列函数关系H,把数据对象的关键字K映射到相应的散列值H (K),这个值就是该对象在散列表中的存储位置,又称散列地址。查找时根据要查找的关键字k用同样的散列函数计算地址H(k),然后在散列表相应的单元取要找的对象。对于散列表,最重要的是构造散列函数和对冲突的处理。影响散列表查找效率的因素是装填因子(load factor)。
实验课题:做这个实验时采用Open Addressing框架,也可加做Separate Chaining以形成比较。
1 构造散列表,把字符串数组中的各项加入到散列表中
string MyBirds[13] = { "robin", "sparrow", "hawk", "eagle", "seagull", "bluejay",
"owl", "cardinal", "Jakana", "Moa", "Egret", "Penguin", "hawk" };
用C表示,可以是
char * MyBirds[13] = { "robin", "sparrow", "hawk", "eagle", "seagull", "bluejay",
"owl", "cardinal", "Jakana", "Moa", "Egret", "Penguin", "hawk" };
为便于观察冲突现象,初始构造散列表时,表的容量不要过大,对Open Addressing,装载因子为0.5左右,对于Separate Chaining,装载因子为1左右即可。也不要做rehash(该改源代码的哪里,如何改)。
建议对源代码做些改动、增加一些输出(建议用条件编译控制这些输出),以便于观察冲突的发生和解决;
对于Open Addressing,参考代码的冲突解决方案是用的平方探测(quadratic probing),如果用线性探测(linear probing)的策略,应该对函数findPos做什么修改(冲突解决的策略都集中在那里)?
2 观察不同的散列函数产生冲突散列地址的情况
教科书上给了3个以字符串作输入的散列函数(两教科书第3个不一样),观察记录它们产生冲突散列地址的情况,写入你的实验报告。还可对下列散列函数(所谓ELF hash,Unix System V用的)作观察
int hash (const string & key)
{ // C++ version
unsigned longh = 0;
for ( int i = 0; i < key.length( ); i++ ) {
h = (h << 4) + key [ i ];
unsignedlong g = h & 0xF0000000L;
if (g) h ^=g >> 24;
h &= ~g;
}
return h; // %M
}
int hash (char * key)
{ // C version
unsigned longh = 0;
while ( *key ){
h = (h<< 4) + *key ++;
unsignedlong g = h & 0xF0000000L;
if (g) h ^=g >> 24;
h &= ~g;
}
return h; // %M
}
实验课题:做这个实验时采用Open Addressing框架,也可加做Separate Chaining以形成比较。
1 构造散列表,把字符串数组中的各项加入到散列表中
string MyBirds[13] = { "robin", "sparrow", "hawk", "eagle", "seagull", "bluejay",
"owl", "cardinal", "Jakana", "Moa", "Egret", "Penguin", "hawk" };
用C表示,可以是
char * MyBirds[13] = { "robin", "sparrow", "hawk", "eagle", "seagull", "bluejay",
"owl", "cardinal", "Jakana", "Moa", "Egret", "Penguin", "hawk" };
为便于观察冲突现象,初始构造散列表时,表的容量不要过大,对Open Addressing,装载因子为0.5左右,对于Separate Chaining,装载因子为1左右即可。也不要做rehash(该改源代码的哪里,如何改)。
建议对源代码做些改动、增加一些输出(建议用条件编译控制这些输出),以便于观察冲突的发生和解决;
对于Open Addressing,参考代码的冲突解决方案是用的平方探测(quadratic probing),如果用线性探测(linear probing)的策略,应该对函数findPos做什么修改(冲突解决的策略都集中在那里)?
2 观察不同的散列函数产生冲突散列地址的情况
教科书上给了3个以字符串作输入的散列函数(两教科书第3个不一样),观察记录它们产生冲突散列地址的情况,写入你的实验报告。还可对下列散列函数(所谓ELF hash,Unix System V用的)作观察
int hash (const string & key)
{ // C++ version
unsigned longh = 0;
for ( int i = 0; i < key.length( ); i++ ) {
h = (h << 4) + key [ i ];
unsignedlong g = h & 0xF0000000L;
if (g) h ^=g >> 24;
h &= ~g;
}
return h; // %M
}
int hash (char * key)
{ // C version
unsigned longh = 0;
while ( *key ){
h = (h<< 4) + *key ++;
unsignedlong g = h & 0xF0000000L;
if (g) h ^=g >> 24;
h &= ~g;
}
return h; // %M
}
3 对散列表做查找。
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define PrintCollision 1 //是否打印冲突过程
#define NumItems 400
#define MinTableSize (10)
#define LoadA (0.5)
//typedef int ElementType;
typedef char* ElementType;
typedef unsigned int Index;
typedef Index Position;
struct HashTbl;
typedef struct HashTbl *HashTable; // HashTable 是一个指向HashTbl结构体类型的指针
typedef unsigned int (*HashFunc)(ElementType, int);//定义一个哈希函数的指针类型
enum KindOfEntry { Legitimate, Empty, Deleted };
struct HashEntry
{
ElementType Element;
enum KindOfEntry Info;
};
typedef struct HashEntry Cell;
/* Cell *TheCells will be an array of */
/* HashEntry cells, allocated later */
struct HashTbl
{
int TableSize;
Cell *TheCells;
};
/* Return next prime; assume N >= 10 */
static int
NextPrime( int N ) //求接近N 的下一个素数
{
int i;
if( N % 2 == 0 )
N++;
for( ; ; N += 2 )
{
for( i = 3; i * i <= N; i += 2 )
if( N % i == 0 )
goto ContOuter; /* Sorry about this! */
return N;
ContOuter: ;
}
}
// void CreateHashTable( ElementType A[],int A_size, HashTable H,float loaderA, HashFunc* pHash)
HashTable CreateHashTable( ElementType A[],int A_size,HashTable H, HashFunc pHash,float loaderA)
{
int i,Pos;
int CollisionNum=0;
/* Allocate table */
/* 4*/ H =( struct HashTbl *) malloc( sizeof( struct HashTbl ) );
/* 5*/ if( H == NULL )
/* 6*/ printf( "Out of space!!!" );
/* 7*/ H->TableSize = NextPrime( A_size/loaderA );
printf("the TableSize should be :%d\n",H->TableSize);
/* Allocate array of Cells */
/* 8*/ H->TheCells = ( Cell *)malloc( sizeof( Cell ) * H->TableSize );
/* 9*/ if( H->TheCells == NULL )
/*10*/ printf( "Out of space!!!" );
/*11*/ for( i = 0; i < H->TableSize; i++ )
/*12*/ H->TheCells[ i ].Info = Empty;
for( i = 0; i < A_size; i++)
{
Pos = pHash( A[i], H->TableSize );
if( H->TheCells[ Pos ].Info ==Empty ) /* OK to insert here */
{
H->TheCells[ Pos ].Info = Legitimate;
H->TheCells[ Pos ].Element = A[i];
#ifdef PrintCollision
// printf("Put %d in H[%d]\n " ,A[i],Pos);
printf("Put %s in H[%d]\n " ,A[i],Pos);
#endif
}
else{
while( H->TheCells[ Pos ].Info != Empty )
{
#ifdef PrintCollision
printf("\tCollision %d at H[%d]\n " ,CollisionNum+1,Pos);//输出&k[i]和k[i]
#endif
Pos += 2 * ++CollisionNum - 1;//平方探测方法
/* 5*/ if( Pos >= H->TableSize )
/* 6*/ Pos -= H->TableSize;
}
#ifdef PrintCollision
printf("\tFind the H[%d] is Empty.\n " ,Pos);//输出&k[i]和k[i]
printf("Put %s in H[%d]\n " ,A[i],Pos);
#endif
H->TheCells[ Pos ].Info = Legitimate;
H->TheCells[ Pos ].Element = A[i];
/* Probably need strcpy! */
}
}
return H;
}
unsigned int
Hash1( char *Key, int TableSize )
{
unsigned int HashVal = 0;
/* 1*/ while( *Key != '\0' )
/* 2*/ HashVal += *Key++;
/* 3*/ return HashVal % TableSize;
}
/* END */
/* START: fig5_4.txt */
unsigned int
Hash2( char *Key, int TableSize )
{
return ( Key[ 0 ] + 27 * Key[ 1 ] + 729 * Key[ 2 ] )
% TableSize;
}
/* END */
/* START: fig5_5.txt */
unsigned int
Hash3( char *Key, int TableSize )
{
unsigned int HashVal = 0;
/* 1*/ while( *Key != '\0' )
/* 2*/ HashVal = ( HashVal << 5 ) + *Key++;
/* 3*/ return HashVal % TableSize;
}
/* END */
unsigned int Hash4 (char * key,int TableSize)
{ // C version
unsigned long h = 0;
while ( *key ) {
h = (h << 4) + *key ++;
unsigned long g = h & 0xF0000000L;
if (g) h ^= g >> 24;
h &= ~g;
}
return h% TableSize; // % M
}
int FindInHashTable( ElementType Key, HashTable H,HashFunc pHash )
{
Position Pos;
int CollisionNum;
/* 1*/ CollisionNum = 0;
Pos = pHash( Key, H->TableSize );
while( H->TheCells[ Pos ].Info != Empty )
{
// if (H->TheCells[ Pos ].Element == Key ) //原地址没有冲突的地址
if (strcmp(H->TheCells[ Pos ].Element, Key ) ==0)
return Pos;
Pos += 2 * ++CollisionNum - 1; //按探测方法查找pos位置
if( Pos >= H->TableSize )
Pos -= H->TableSize;
}
return -1; //-1表示没有所要查找的元素
}
main( )
{
HashTable H;
int i=0;
char search_key[20];
// int k[10]={26,36,41,38,41,15,68,12,06,51};
char* MyBirds[13] = { "robin", "sparrow", "hawk", "eagle", "seagull", "bluejay",
"owl", "cardinal", "Jakana", "Moa", "Egret", "Penguin", "hawk" };
H=CreateHashTable(MyBirds,13,H,Hash1,LoadA);
printf( "\nPlease input the key_word you would search:\n" );
scanf("%s",search_key);
i=FindInHashTable((char*)search_key,H,Hash1 );
if(i!=-1)
printf( "Find the %s at H[%d]\n" ,search_key,i);
else
printf( "No The %s you find!\n" ,search_key);
DestroyTable( H );
return 0;
}
545

被折叠的 条评论
为什么被折叠?



