hash_set的底层数据结构是哈希表,因此要深入了解hash_set,必须先分析哈希表。哈希表是根据关键码值(Key-Value)而直接进行访问的数据结构,它用哈希函数处理数据得到关键码值,关键码值对应表中一个特定位置再由应该位置来访问记录,这样可以在时间复杂性度为O(1)内访问到数据。但是很有可能出现多个数据经哈希函数处理后得到同一个关键码——这就产生了冲突,解决冲突的方法也有很多,采用最方便最有效的一种——链地址法,当有冲突发生时将具同一关键码的数据组成一个链表。下图展示了链地址法的使用:

前面已经介绍过哈希表了,这里直接看hash_set的源码:
// Filename: stl_hash_set.h
// Comment By: 凝霜
// E-mail: mdl2009@vip.qq.com
// Blog: http://blog.youkuaiyun.com/mdl13412
// hash_set和hash_multiset是对hashtable的简单包装, 很容易理解
/*
* Copyright (c) 1996
* Silicon Graphics Computer Systems, Inc.
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Silicon Graphics makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
*
* Copyright (c) 1994
* Hewlett-Packard Company
*
* Permission to use, copy, modify, distribute and sell this software
* and its documentation for any purpose is hereby granted without fee,
* provided that the above copyright notice appear in all copies and
* that both that copyright notice and this permission notice appear
* in supporting documentation. Hewlett-Packard Company makes no
* representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied warranty.
*
*/
/* NOTE: This is an internal header file, included by other STL headers.
* You should not attempt to use it directly.
*/
#ifndef __SGI_STL_INTERNAL_HASH_SET_H
#define __SGI_STL_INTERNAL_HASH_SET_H
__STL_BEGIN_NAMESPACE
#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma set woff 1174
#endif
// 如果编译器不能根据前面模板参数推导出后面使用的默认参数类型,
// 那么就需要手工指定, 并且对于基本的数据类型, 在<stl_hash_fun.h>
// 中都提供hash函数
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
#else
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
#endif
class hash_set
{
private:
// identity<Value>用于析出Value
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep; // 其实hash_set就是hashtable的简单封装
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
// 注意: reference, pointer, iterator都为const, 因为不能修改hashtable
// 内部的元素, 否则会导致hashtable失效
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::const_pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::const_reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
// 返回hash相关函数
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_set() : rep(100, hasher(), key_equal()) {}
explicit hash_set(size_type n) : rep(n, hasher(), key_equal()) {}
hash_set(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_set(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
hash_set(InputIterator f, InputIterator l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_set(InputIterator f, InputIterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_set(InputIterator f, InputIterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
template <class InputIterator>
hash_set(InputIterator f, InputIterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
#else
hash_set(const value_type* f, const value_type* l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_set(const value_type* f, const value_type* l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_set(const value_type* f, const value_type* l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
hash_set(const value_type* f, const value_type* l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
hash_set(const_iterator f, const_iterator l)
: rep(100, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_set(const_iterator f, const_iterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_unique(f, l); }
hash_set(const_iterator f, const_iterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_unique(f, l); }
hash_set(const_iterator f, const_iterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
public:
// 下面都是对hashtable的简单封装, 见<stl_hashtable.h>
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
void swap(hash_set& hs) { rep.swap(hs.rep); }
friend bool operator== __STL_NULL_TMPL_ARGS (const hash_set&,
const hash_set&);
iterator begin() const { return rep.begin(); }
iterator end() const { return rep.end(); }
public:
pair<iterator, bool> insert(const value_type& obj)
{
pair<typename ht::iterator, bool> p = rep.insert_unique(obj);
r
eturn pair<iterator, bool>(p.first, p.second);
}
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
void insert(InputIterator f, InputIterator l) { rep.insert_unique(f,l); }
#else
void insert(const value_type* f, const value_type* l) {
rep.insert_unique(f,l);
}
void insert(const_iterator f, const_iterator l) {rep.insert_unique(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
// hash_set和set一样, 都不允许key重复
pair<iterator, bool> insert_noresize(const value_type& obj)
{
pair<typename ht::iterator, bool> p = rep.insert_unique_noresize(obj);
return pair<iterator, bool>(p.first, p.second);
}
iterator find(const key_type& key) const { return rep.find(key); }
size_type count(const key_type& key) const { return rep.count(key); }
pair<iterator, iterator> equal_range(const key_type& key) const
{ return rep.equal_range(key); }
size_type erase(const key_type& key) {return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
void clear() { rep.clear(); }
public:
void resize(size_type hint) { rep.resize(hint); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
size_type elems_in_bucket(size_type n) const
{ return rep.elems_in_bucket(n); }
};
template <class Value, class HashFcn, class EqualKey, class Alloc>
inline bool operator==(const hash_set<Value, HashFcn, EqualKey, Alloc>& hs1,
const hash_set<Value, HashFcn, EqualKey, Alloc>& hs2)
{
return hs1.rep == hs2.rep;
}
// 如果编译器支持模板函数特化优先级
// 那么将全局的swap实现为使用hash_set私有的swap以提高效率
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER
template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
hash_set<Val, HashFcn, EqualKey, Alloc>& hs2)
{
hs1.swap(hs2);
}
#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */
// hash_multiset和hash_set除去允许key重复外, 其余性质一致
#ifndef __STL_LIMITED_DEFAULT_TEMPLATES
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
#else
template <class Value, class HashFcn, class EqualKey, class Alloc = alloc>
#endif
class hash_multiset
{
private:
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::size_type size_type;
typedef typename ht::difference_type difference_type;
typedef typename ht::const_pointer pointer;
typedef typename ht::const_pointer const_pointer;
typedef typename ht::const_reference reference;
typedef typename ht::const_reference const_reference;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
public:
hash_multiset() : rep(100, hasher(), key_equal()) {}
explicit hash_multiset(size_type n) : rep(n, hasher(), key_equal()) {}
hash_multiset(size_type n, const hasher& hf) : rep(n, hf, key_equal()) {}
hash_multiset(size_type n, const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) {}
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
hash_multiset(InputIterator f, InputIterator l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multiset(InputIterator f, InputIterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multiset(InputIterator f, InputIterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
template <class InputIterator>
hash_multiset(InputIterator f, InputIterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
#else
hash_multiset(const value_type* f, const value_type* l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const value_type* f, const value_type* l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const value_type* f, const value_type* l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const value_type* f, const value_type* l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
hash_multiset(const_iterator f, const_iterator l)
: rep(100, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const_iterator f, const_iterator l, size_type n)
: rep(n, hasher(), key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const_iterator f, const_iterator l, size_type n,
const hasher& hf)
: rep(n, hf, key_equal()) { rep.insert_equal(f, l); }
hash_multiset(const_iterator f, const_iterator l, size_type n,
const hasher& hf, const key_equal& eql)
: rep(n, hf, eql) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
public:
size_type size() const { return rep.size(); }
size_type max_size() const { return rep.max_size(); }
bool empty() const { return rep.empty(); }
void swap(hash_multiset& hs) { rep.swap(hs.rep); }
friend bool operator== __STL_NULL_TMPL_ARGS (const hash_multiset&,
const hash_multiset&);
iterator begin() const { return rep.begin(); }
iterator end() const { return rep.end(); }
public:
iterator insert(const value_type& obj) { return rep.insert_equal(obj); }
#ifdef __STL_MEMBER_TEMPLATES
template <class InputIterator>
void insert(InputIterator f, InputIterator l) { rep.insert_equal(f,l); }
#else
void insert(const value_type* f, const value_type* l) {
rep.insert_equal(f,l);
}
void insert(const_iterator f, const_iterator l) { rep.insert_equal(f, l); }
#endif /*__STL_MEMBER_TEMPLATES */
iterator insert_noresize(const value_type& obj)
{ return rep.insert_equal_noresize(obj); }
iterator find(const key_type& key) const { return rep.find(key); }
size_type count(const key_type& key) const { return rep.count(key); }
pair<iterator, iterator> equal_range(const key_type& key) const
{ return rep.equal_range(key); }
size_type erase(const key_type& key) {return rep.erase(key); }
void erase(iterator it) { rep.erase(it); }
void erase(iterator f, iterator l) { rep.erase(f, l); }
void clear() { rep.clear(); }
public:
void resize(size_type hint) { rep.resize(hint); }
size_type bucket_count() const { return rep.bucket_count(); }
size_type max_bucket_count() const { return rep.max_bucket_count(); }
size_type elems_in_bucket(size_type n) const
{ return rep.elems_in_bucket(n); }
};
template <class Val, class HashFcn, class EqualKey, class Alloc>
inline bool operator==(const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1,
const hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2)
{
return hs1.rep == hs2.rep;
}
// 如果编译器支持模板函数特化优先级
// 那么将全局的swap实现为使用hash_multiset私有的swap以提高效率
#ifdef __STL_FUNCTION_TMPL_PARTIAL_ORDER
template <class Val, class HashFcn, class EqualKey, class Alloc>
inline void swap(hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs1,
hash_multiset<Val, HashFcn, EqualKey, Alloc>& hs2)
{
hs1.swap(hs2);
}
#endif /* __STL_FUNCTION_TMPL_PARTIAL_ORDER */
#if defined(__sgi) && !defined(__GNUC__) && (_MIPS_SIM != _MIPS_SIM_ABI32)
#pragma reset woff 1174
#endif
__STL_END_NAMESPACE
#endif /* __SGI_STL_INTERNAL_HASH_SET_H */
// Local Variables:
// mode:C++
// End:<span style="font-family:Microsoft YaHei;font-size:18px;">
</span>
示例1:
#include<iostream>
#include<string>
#include<iterator>
#include<algorithm>
#include<hash_set>
#include<windows.h>
using namespace std;
void main(){
const int N= 3;
int s1[N]={1,2,3};
int s2[N]={4,5,6};
hash_set<int>sa(s1,s1+N);//申明一个集合sa,元素为数组s1
hash_set<int>sb(s2,s2+N);//申明一个集合sb,元素为数组s2
hash_set<int> sc;//申明一个集合sc,为空集合
ostream_iterator<int> output(cout," ");
cout<<"输出集合sa的元素:";
copy(sa.begin(),sa.end(),output);
cout<<"\n\n输出集合sb的元素:";
copy(sb.begin(),sb.end(),output);
cout<<endl;
system("pause");
}
示例2:
/*
*
************************************************************************************
* hash_set哈希集合容器的基础说明:
************************************************************************************
*
* hash_set哈希集合容器:使用hashtable数据结构的具有高效数据检索的关联容器
*
* 不提供反向迭代器,只有前向迭代器iterator和const_iterator
* 不允许插入重复的元素键值
* Hashed Associative Container Simple Associative Container Unique Associative Container
*
* 目前还不是C++的标准容器,只是SGI C++ STL的一个扩展容器
* 使用hash_set必须使用宏语句#include <hash_set>
*
**************************************************************************************
*
* 创建hash_set对象:
* 1.hash_set<int> hs; //键值比较使用默认的函数对象equal_to<Value>
* 2.hash_set(size_type n); //在质数列表中找出第一个大于等于n的质数作为表长:hash_set<int> hs(100);
* 3.hash_set(size_type n,const hasher& h); //hash函数对象为h
* 4.hash_set(size_type n,const hasher& h,const key_equal& k);//键值比较函数对象k
* 5.hash_set(const hash_set& h); //用一个hash集合容器拷贝生成另一个hash集合容器:hash_set<int> hs2(hs);
*
**************************************************************************************
*
* 元素的插入
* //typedef pair<const key,T> value_type;
* pair<iterator,bool> insert(const value_type& v);//second:返回true/false插入成功标志
* void insert(iterator pos,const value_type& v);
*
**************************************************************************************
*
* 元素的删除
* void erase(iterator pos);
* size_type erase(const key_type& k); //删除等于键值k的元素
* void erase(first,last); //删除[first,last)区间的元素
* void clear();
*
**************************************************************************************
*
* 访问与搜索
*
* iterator begin();iterator end(); //不会将元素排序遍历出来
*
* iterator find(const key_type& k) const; //对于非默认类型如char*,在搜素时应定义相关的函数对象
*
* 其它常用函数
* bool empty() const;
* size_type size() const;
* size_type bucket_count(const key_type& k) const; //获得hash表的表长
* void swap();
* resize();
* iterator lower_bound();iterator upper_bound();pair<iterator,iterator> equal_range();//上界、下届、确定区间
*
* 在SGI STL中,提供了以下hash函数:
* struct hash<char*>
* struct hash<const char*>
* struct hash<char>
* struct hash<unsigned char>
* struct hash<signed char>
* struct hash<short>
* struct hash<unsigned short>
* struct hash<int>
* struct hash<unsigned int>
* struct hash<long>
* struct hash<unsigned long>
*
* hash函数决定了如何划分散列表
*
*
*
********************************************
** cumirror ** tongjinooo@163.com ** **
********************************************
*
*/
#include <hash_set>
#include <iostream>
struct student{
char* name;
int age;
char* city;
char* phone;
};
//自定义数据的比较函数
class stuequal{
public:
bool operator() (const student& a,const student& b){
return strcmp(a.city,b.city)==0; //不允许同名,name为键值
} //将name换为city测试下
};
//自定义数据的hash函数
//typedef unsigned int size_t;
struct stu_hash{
size_t operator()(const student& stu) const
{
unsigned long res = 0;
char* s=stu.city;
for( ; *s; ++s ){
res=5*res+*s;
}
return size_t(res);
}
};
//针对字符串的比较函数对象
class strequal{
public:
bool operator () (const char* a,const char* b)const{
return strcmp(a,b)==0;
}
};
int main(){
using namespace std;
hash_set<const char*,hash<const char*>,strequal> a;
a.insert("tongjin");
a.insert("cumirror");
a.insert("makelaugh");
a.insert("feiguodeyun");
// hash<const char*>默认提供的hash函数对象
hash_set<const char*,hash<const char*>,strequal>::const_iterator b=a.find("tongjin");
cout<<*b<<" is "<<(b!=a.end()?"present":"not present")<<endl;
// 对于自定义类型数据,使用hash相关容器时应构造hash函数对象、比较函数对象
// 注意区别hash函数对象与比较函数对象各自的作用
student s[]={
{"童进",23,"长沙","XXX"},
{"老大",23,"武汉","XXX"},
{"饺子",23,"福州","XXX"},
{"王老虎",23,"地球","XXX"},
{"周润发",23,"香港","XXX"},
{"周星星",23,"香港","XXX"}, //city重复
{"童进",23,"香港","XXX"} //name重复、city也有重复
};
hash_set<student,stu_hash,stuequal> c;
c.insert(s[0]);
c.insert(s[1]);
c.insert(s[2]);
c.insert(s[3]);
c.insert(s[4]);
c.insert(s[5]);
c.insert(s[6]);
// 注意hash容器并不能实现排序
for(hash_set<student,stu_hash,stuequal>::iterator i=c.begin();i!=c.end();i++){
cout<<i->name<<" "<<i->age<<" "<<i->city<<endl;
}
return 0;
}

本文深入探讨了哈希集合容器hash_set的底层数据结构、创建方式、元素操作、访问与搜索等核心特性,并通过实例展示了其在实际应用中的使用方法。
2346

被折叠的 条评论
为什么被折叠?



