Hash算法的应用(我的博客)

最新推荐文章于 2024-11-23 21:43:13 发布

置顶看个人资料

最新推荐文章于 2024-11-23 21:43:13 发布

阅读量504

点赞数 1

分类专栏： C++ 文章标签： hash 算法

本文链接：https://blog.youkuaiyun.com/ucas_123/article/details/53021181

版权

C++ 专栏收录该内容

16 篇文章

订阅专栏

欢迎大家访问我的博客，相当好！问题：要解析用户输入的字符串，如new，edit，line，load，generator等等，在软件中是否已经定义了这些字符串呢。通常的做法是将软件已经把这些字符串存在一个数组里，用户输出的字符串，与软件的字符串数组进行比较，这如果对于有大量数据存储，这样去比较，固然效率低下，有么有更好的办法呢，当然有，用哈希表来实现。
将代码记录如下：
MyHash.h

#pragma once
#include <string>
using namespace std;
typedef struct TSubList
{
	int Nelem;
	int NAllocated;
	std::string *str;   //尽量不要使用栈区间，有可能栈区空间不足，一般windows的栈区2MB
	int *Idx;
}
TSubList;
class MyHash
{
public:
	MyHash(int Nelements);
	~MyHash(void);
int NumElementsAllocated;
int NumLists;
int NumElements;
int LastHash;
TSubList* ListPtr;
string *StringPtr;
string LastFindString;
int LastFind;
int AllocationInc;
int Add(const string &S);
int Find(const string &S);
string get(int i);
int Hash(const string &S);
TSubList* resizeSubList(TSubList *sublist);
void ResizeStrPtr( );
string *ReallocStr(string *s,int oldSize,int NewSize);
int *ReallocIdx(int *idx,int oldSize,int NewSize);
};

MyHash.cpp

#include "MyHash.h"
#include<math.h>
#include<algorithm>
#define  Min(a,b)   ((a)>(b)?(b):(a))
#include <iostream>
MyHash::MyHash(int Nelements)
{
int ElementsPerList;
NumElements = 0;
StringPtr = NULL;
NumLists = floor(sqrt((float)Nelements)+0.5);
ElementsPerList =  Nelements/NumLists +1;
AllocationInc = ElementsPerList;
    if( NumLists < 1)
	{
		NumLists = 1;
	}
//为链表分配内存
	 ListPtr = new TSubList[NumLists];   //对于链表中的str，idx后续用到再开辟内存,由于不能够确定要开辟多少。
	for(int i = 0;i < NumLists ;i++)
	{
		cout<< ListPtr+i<<endl;
		this->ListPtr[i].str = NULL;
		this->ListPtr[i].Idx = NULL;
		this->ListPtr[i].NAllocated = 0;
		this->ListPtr[i].Nelem =0;
	}
	NumElementsAllocated = 0;

}
int MyHash::Hash(const string &S)  //求取hash值,简单的哈希算法
{
	long hashValue = 0;

	for(int i=0;i < Min(S.length(),8);i++)  //字符串长度超过8个字符，只截取字符串的前8个字符
	{
		hashValue = hashValue*2 + S[i];    //用ASCII码相加的方式求取hash值

	}
	hashValue = hashValue % NumLists;             //限定hash值的范围为[0，NumLists）
	return hashValue;
}

int MyHash::Find(const string &S)
{
	int result = 0 ;
	LastFind = 0;
	LastFindString = S;
	transform(LastFindString.begin(),LastFindString.end(),LastFindString.begin(),::tolower);
	LastHash  = Hash(LastFindString);
	for(int i=0;i<ListPtr[LastHash].Nelem;i++)
	{
		if(LastFindString.compare(ListPtr[LastHash].str[i]) == 0)
		{
			result = ListPtr[LastHash].Idx[i];
			LastFind = i;
			break;
		}
	}
	
	return result;

}

int MyHash::Add(const string &S)  //添加进哈希表
{
int hashNum;
int result=0;
string temp = S;
TSubList *p1;
transform(temp.begin(),temp.end(),temp.begin(),::tolower); //统一都小写
hashNum = Hash(temp);
  NumElements++;
if(NumElements > NumElementsAllocated)
{
	ResizeStrPtr();
} 
result = NumElements;
ListPtr[hashNum].Nelem ++;
if(ListPtr[hashNum].Nelem >= ListPtr[hashNum].NAllocated)
{
	p1 = (ListPtr+hashNum);
	p1 = resizeSubList(p1);
	
}
StringPtr[NumElements] = temp;
ListPtr[hashNum].str[ListPtr[hashNum].Nelem] = temp;
ListPtr[hashNum].Idx[ListPtr[hashNum].Nelem] = NumElements;
return result;
}
TSubList* MyHash::resizeSubList(TSubList *sublist) 
{
	int OldAllocation;
	int NewSize;
	OldAllocation = sublist->NAllocated;
	sublist->NAllocated = OldAllocation + AllocationInc;
	NewSize = sublist->NAllocated;
	sublist->str = ReallocStr(sublist->str,OldAllocation,NewSize);
	sublist->Idx = ReallocIdx(sublist->Idx,OldAllocation,NewSize);
	return sublist;
}


string *MyHash::ReallocStr(string *s,int oldSize,int NewSize)
{
	string *X;
	X = new string[NewSize];
	if(oldSize >0)
	{
		for(int i=0;i< oldSize;i++)
		{
			X[i] = s[i];
		}
		delete[] s; //释放原来的内存

	}
	s = X;
	return s;
}

int* MyHash::ReallocIdx(int *idx,int oldSize,int NewSize)//这里将为指针开辟的内存返回，也可以选择二级指针
{
int *temp;
temp = new int[NewSize];
if(oldSize >0 )
{
	memcpy(temp,idx,oldSize*sizeof(int));
	delete[] idx;

}
idx = temp;
return idx;
}


void MyHash::ResizeStrPtr()   //调整StringPtr开辟内存
{
	int  OldAllocation ;
	string *NewPointer;
	OldAllocation = NumElementsAllocated;
	NumElementsAllocated = OldAllocation + AllocationInc*NumLists;
	NewPointer = new string[NumElementsAllocated];
	if(OldAllocation > 0)   //释放原来的内存
	{
     //先拷贝内存
		for(int i=0;i < OldAllocation;i++)
		{
			NewPointer[i] = StringPtr[i];
		}

		delete[] StringPtr;  //释放原来的内存
	}
	StringPtr = NewPointer;

}

MyHash::~MyHash(void)
{
	for(int i = 0;i < NumLists;i++)
	{
		if(ListPtr[i].Idx)
		{
			delete[] ListPtr[i].Idx;     //这样释放有问题，check
			ListPtr[i].Idx = NULL; //避免野指针
		}
		if(ListPtr[i].str)
		{
			delete[] ListPtr[i].str;
			ListPtr[i].str = NULL;
		}

	}   //释放链表中的指针开辟的内存。
	if(ListPtr)
	{

		delete[] ListPtr ;
		ListPtr = NULL;
		
	}
	if(StringPtr)
	{
		delete[] StringPtr;
		StringPtr = NULL;
	}

}

测试代码main.cpp

#include <stdio.h>
#include "MyHash.h"
#include <iostream>
using namespace  std;
/*******************作者：************************
*********************2016/11/4日**********************
/

#define  numberCommand  11
string ExecCommand[numberCommand]; 
void defineCommand();
void New( );
void Edit();
int main()      //测试代码
{ 
   MyHash *commandList;
   int paramPointer;
   commandList = new MyHash(numberCommand);
   defineCommand();
   for(int i=0;i< numberCommand;i++)
   {
	   commandList->Add(ExecCommand[i]);
   }
   paramPointer = commandList->Find("Show");
   cout << paramPointer << endl;
   switch(paramPointer)        //定义要实现的函数
   {
	  case 0:
	   New();
	   break;
	  case 1:
	   Edit();
	   break;
      default:
	   break;

   }
   
   delete commandList;
   system("pause");
	return 0;
}
void defineCommand()
{

	ExecCommand [ 0 ] = "New";
	ExecCommand [ 1 ] = "Edit";
	ExecCommand [ 2] = "More";
	ExecCommand [ 3 ] = "M";
	ExecCommand [ 4 ] = "~";
	ExecCommand [ 5 ] = "Select";
	ExecCommand [ 6 ] = "Save";
	ExecCommand [ 7 ] = "Show";
	ExecCommand [ 8 ] = "Solve";
	ExecCommand [ 9 ] = "Enable";
	ExecCommand [10] = "Disable";
}
void New( )
{
}
void Edit()
{

}

这是其中哈希表简单的应用，其中网上较为流传的有暴雪哈希算法，尤为经典，它算法的实现原理与MD5加密算法有点相似。它里面用到三次哈希，通过这种哈希算法，能够快速查找是否库里有这个文件，而不需要一个个字符串的去对比。对于其中为何来实现单向散列算法，这还得深究！