前言
ngx中的hash表有两种,一种是常规的hash表,另外一种是带有通配符的hash表(其中包含前缀通配符和后经通配符)。而带通配符的hash表是基于常规hash表建立的。
ngx_hash_t是nginx中的hash表的实现,定义和实现位于src/core/ngx_hash.c和src/core/ngx_hash.h文件中。ngx_hash_t的实现与一般的hash表实现大同小异。对于常用的解决冲突的方法有线性探测、二次线性探测和开链法。ngx_hash_t所使用的就是开链法。
但是ngx_hash_t的实现又有几个显著的特点:
1、ngx_hash_t不像其他的hash表的实现,可以插入、删除元素,它只能一次初始化,就构建起整个hash表以后,就不能再删除和插入。
2、ngx_hash_t的开链并不是真正的开链,实际上是一段连续的地址空间,可以看成是一个数组。因为ngx_hash_t在初始化的时候,会先预算,提前把每个桶里面会有多少元素放进去给计算出来,这样就提前知道每个桶的大小了。那么就不需要使用链表,一段连续的存储空间就足够了,这也从一定程度上节省了内存的使用。
1、hash结构
nginx的hash结构为ngx_hash_t,元素结构为ngx_hash_elt_t。定义如下
typedef struct {
void *value;//某个key对应的值,即<key,value>中的value
u_short len; //name长度
u_char name[1];//生成key_hash的数据
} ngx_hash_elt_t;
typedef struct {
ngx_hash_elt_t **buckets;//hash桶(size个桶)
ngx_uint_t size;//桶的个数
} ngx_hash_t;
hash初始化的结构ngx_hash_init_t,用来把hash表的相关数据封装起来作为参数传递给ngx_hash_init()或ngx_hash_wildcard_init()。这两个函数主要应用在http模块中,如ngx_http_init_headers_in_hashs、ngx_http_server_name、ngx_http_merge_types等中。定义如下
typedef struct {
ngx_hash_t *hash;//指向待初始化的hash结构
ngx_hash_key_pt key;//hash函数指针
ngx_uint_t max_size;//桶的最大个数
ngx_uint_t bucket_size;//每个桶的大小
char *name;//hash结构的名字
ngx_pool_t *pool;//hash结构从pool指向的内存池中分配
ngx_pool_t *temp_pool;//分配临时空间的内存池
} ngx_hash_init_t;
hash表中关键字的结构ngx_hash_key_t,定义如下
typedef struct {
ngx_str_t key;//<key,value>中的key
ngx_uint_t key_hash;//通过key计算出的hash值
void *value;//<key,value>中的value
} ngx_hash_key_t;
在构建ngx_hash_wildchard_t的时候,需要对通配符的哪些key进行预处理。而当有一组key,这些里面既有无通配符的key,也有包含通配符的key的时候,我们就需要构建三个hash表,一个包含普通key的hash表,一个包含前向通配符的hash表,一个包含后向通配符的hash表。在这种情况下,为了让大家方便构造hash表,nginx提供了此辅助类型。ngx_hash_keys_array_t结构定义如下
typedef struct {
ngx_uint_t hsize;//要构建的hash表的桶的个数,三种类型的hash表都会使用此参数
ngx_pool_t *pool;//构建hash表使用的pool
ngx_pool_t *temp_pool;//构建三个hash表中可能用到临时pool
ngx_array_t keys;//非通配符key的数组
ngx_array_t *keys_hash;//二维数组,第一维表示bucket的编号,那么keys_hash[i]存放的是所有的key算出来的hash值对hsize取模后的值为i的key
ngx_array_t dns_wc_head;//放前向通配符key被处理后的值 ,如”*.abc.com”被处理后,变成”com.abc.”
ngx_array_t *dns_wc_head_hash;//与keys_hash相似,用于保存和检测冲突
ngx_array_t dns_wc_tail;//放后向通配符key被处理后的值,如”abc.com.*”被处理后,变成”abc.com.”
ngx_array_t *dns_wc_tail_hash;
} ngx_hash_keys_arrays_t;
在定义一个这个类型的变量,并能字段pool和temp_pool赋值后,就可以调用函数ngx_hash_add_key把所有的key加入到这个结构中,该函数会自动实现普通key,前身通配符key和后向通配符key的分类和检查,并将这些值放到相应的字段中去。
2、hash的操作
2.1 hash的初始化
是由函数ngx_hash_init来完成的,代码如下
ngx_int_t
ngx_hash_init(ngx_hash_init_t *hinit, ngx_hash_key_t *names, ngx_uint_t nelts)
{
u_char *elts;
size_t len;
u_short *test;
ngx_uint_t i, n, key, size, start, bucket_size;
ngx_hash_elt_t *elt, **buckets;
for (n = 0; n < nelts; n++) {
if (hinit->bucket_size < NGX_HASH_ELT_SIZE(&names[n]) + sizeof(void *))
{
ngx_log_error(NGX_LOG_EMERG, hinit->pool->log, 0,
"could not build the %s, you should "
"increase %s_bucket_size: %i",
hinit->name, hinit->name, hinit->bucket_size);
return NGX_ERROR;
}
}
test = ngx_alloc(hinit->max_size * sizeof(u_short), hinit->pool->log);
if (test == NULL) {
return NGX_ERROR;
}
//估算出桶的起始大小
bucket_size = hinit->bucket_size - sizeof(void *);
start = nelts / (bucket_size / (2 * sizeof(void *)));
start = start ? start : 1;
if (hinit->max_size > 10000 && nelts && hinit->max_size / nelts < 100) {
start = hinit->max_size - 1000;
}
//计算出合适的桶的大小
for (size = start; size <= hinit->max_size; size++) {
ngx_memzero(test, size * sizeof(u_short));
for (n = 0; n < nelts; n++) {
if (names[n].key.data == NULL) {
continue;
}
key = names[n].key_hash % size;
test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));
if (test[key] > (u_short) bucket_size) {
goto next;
}
}
goto found;
next:
continue;
}
size--;
ngx_log_error(NGX_LOG_WARN, hinit->pool->log, 0,
"could not build optimal %s, you should increase "
"either %s_max_size: %i or %s_bucket_size: %i; "
"ignoring %s_bucket_size",
hinit->name, hinit->name, hinit->max_size,
hinit->name, hinit->bucket_size, hinit->name);
found:
//初始化化指针的大小
for (i = 0; i < size; i++) {
test[i] = sizeof(void *);
}
//计算每个桶的容量大小
for (n = 0; n < nelts; n++) {
if (names[n].key.data == NULL) {
continue;
}
key = names[n].key_hash % size;
test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));
}
len = 0;
//计算出需要从内存池中分配的大小
for (i = 0; i < size; i++) {
if (test[i] == sizeof(void *)) {
continue;
}
test[i] = (u_short) (ngx_align(test[i], ngx_cacheline_size));
len += test[i];
}
//分配桶
if (hinit->hash == NULL) { //针对有通配符的hash表
hinit->hash = ngx_pcalloc(hinit->pool, sizeof(ngx_hash_wildcard_t)
+ size * sizeof(ngx_hash_elt_t *));
if (hinit->hash == NULL) {
ngx_free(test);
return NGX_ERROR;
}
buckets = (ngx_hash_elt_t **)
((u_char *) hinit->hash + sizeof(ngx_hash_wildcard_t));
} else {
buckets = ngx_pcalloc(hinit->pool, size * sizeof(ngx_hash_elt_t *));
if (buckets == NULL) {
ngx_free(test);
return NGX_ERROR;
}
}
elts = ngx_palloc(hinit->pool, len + ngx_cacheline_size);
if (elts == NULL) {
ngx_free(test);
return NGX_ERROR;
}
elts = ngx_align_ptr(elts, ngx_cacheline_size);
//分配元素
for (i = 0; i < size; i++) {
if (test[i] == sizeof(void *)) {
continue;
}
buckets[i] = (ngx_hash_elt_t *) elts;
elts += test[i];
}
for (i = 0; i < size; i++) {
test[i] = 0;
}
for (n = 0; n < nelts; n++) {
if (names[n].key.data == NULL) {
continue;
}
key = names[n].key_hash % size;
elt = (ngx_hash_elt_t *) ((u_char *) buckets[key] + test[key]);
elt->value = names[n].value;
elt->len = (u_short) names[n].key.len;
ngx_strlow(elt->name, names[n].key.data, names[n].key.len);
test[key] = (u_short) (test[key] + NGX_HASH_ELT_SIZE(&names[n]));
}
for (i = 0; i < size; i++) {
if (buckets[i] == NULL) {
continue;
}
elt = (ngx_hash_elt_t *) ((u_char *) buckets[i] + test[i]);
elt->value = NULL;
}
ngx_free(test);
hinit->hash->buckets = buckets;
hinit->hash->size = size;
return NGX_OK;
}
2.2有通配符hash的初始化
在构造通配符hash表时,实现上是构造了一个hash表的链表,是通过hash表中的key链接起来的,如,对于”*.abc.com”将会构造2个hash表,第一个表中有一个key为com的表项,该表项的value包含有指向第二个hash表的指针,而第二个hash表中有一个表项abc,该表项的value包含有指向*.abc.com对应的value指针。比如在查询www.abc.com的时候,先查com,通过查com可以找到第二级的hash表,在第二级hash表中,再查找abc,依次类推,直到在某一级的hash表中查到的表项对应的vluae对应一个真正的值而非一个指向下一级hash表的指针的时候,查询过程结束。所以,names数组中的元素的value值低两位bit必须为0,如果不满足这个条件,hash表查询不出正确结果。其是通过ngx_hash_wildcard_init来实现
ngx_int_t
ngx_hash_wildcard_init(ngx_hash_init_t *hinit, ngx_hash_key_t *names,
ngx_uint_t nelts)
{
size_t len, dot_len;
ngx_uint_t i, n, dot;
ngx_array_t curr_names, next_names;
ngx_hash_key_t *name, *next_name;
ngx_hash_init_t h;
ngx_hash_wildcard_t *wdc;
if (ngx_array_init(&curr_names, hinit->temp_pool, nelts,
sizeof(ngx_hash_key_t))
!= NGX_OK)
{
return NGX_ERROR;
}
if (ngx_array_init(&next_names, hinit->temp_pool, nelts,
sizeof(ngx_hash_key_t))
!= NGX_OK)
{
return NGX_ERROR;
}
for (n = 0; n < nelts; n = i) {
#if 0
ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,
"wc0: \"%V\"", &names[n].key);
#endif
dot = 0;
for (len = 0; len < names[n].key.len; len++) {
if (names[n].key.data[len] == '.') {
dot = 1;
break;
}
}
name = ngx_array_push(&curr_names);
if (name == NULL) {
return NGX_ERROR;
}
name->key.len = len;
name->key.data = names[n].key.data;
name->key_hash = hinit->key(name->key.data, name->key.len);
name->value = names[n].value;
#if 0
ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,
"wc1: \"%V\" %ui", &name->key, dot);
#endif
dot_len = len + 1;
if (dot) {
len++;
}
next_names.nelts = 0;
if (names[n].key.len != len) {
next_name = ngx_array_push(&next_names);
if (next_name == NULL) {
return NGX_ERROR;
}
next_name->key.len = names[n].key.len - len;
next_name->key.data = names[n].key.data + len;
next_name->key_hash = 0;
next_name->value = names[n].value;
#if 0
ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,
"wc2: \"%V\"", &next_name->key);
#endif
}
for (i = n + 1; i < nelts; i++) {
if (ngx_strncmp(names[n].key.data, names[i].key.data, len) != 0) {
break;
}
if (!dot
&& names[i].key.len > len
&& names[i].key.data[len] != '.')
{
break;
}
next_name = ngx_array_push(&next_names);
if (next_name == NULL) {
return NGX_ERROR;
}
next_name->key.len = names[i].key.len - dot_len;
next_name->key.data = names[i].key.data + dot_len;
next_name->key_hash = 0;
next_name->value = names[i].value;
#if 0
ngx_log_error(NGX_LOG_ALERT, hinit->pool->log, 0,
"wc3: \"%V\"", &next_name->key);
#endif
}
if (next_names.nelts) {
h = *hinit;
h.hash = NULL;
if (ngx_hash_wildcard_init(&h, (ngx_hash_key_t *) next_names.elts,
next_names.nelts)
!= NGX_OK)
{
return NGX_ERROR;
}
wdc = (ngx_hash_wildcard_t *) h.hash;
if (names[n].key.len == len) {
wdc->value = names[n].value;
}
//将value值包含有上一个hash表的指针
name->value = (void *) ((uintptr_t) wdc | (dot ? 3 : 2));
} else if (dot) {
name->value = (void *) ((uintptr_t) name->value | 1);
}
}
if (ngx_hash_init(hinit, (ngx_hash_key_t *) curr_names.elts,
curr_names.nelts)
!= NGX_OK)
{
return NGX_ERROR;
}
return NGX_OK;
}
2.3 hash表的查找
由三种查找:普通查找,前向通配符查找和后向通配符查找,分别由ngx_hash_find、ngx_hash_find_wc_head和ngx_hash_find_wc_tail来完成
void *
ngx_hash_find(ngx_hash_t *hash, ngx_uint_t key, u_char *name, size_t len)
{
ngx_uint_t i;
ngx_hash_elt_t *elt;
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "hf:\"%*s\"", len, name);
#endif
elt = hash->buckets[key % hash->size];
if (elt == NULL) {
return NULL;
}
while (elt->value) {
if (len != (size_t) elt->len) {
goto next;
}
for (i = 0; i < len; i++) {
if (name[i] != elt->name[i]) {
goto next;
}
}
return elt->value;
next:
elt = (ngx_hash_elt_t *) ngx_align_ptr(&elt->name[0] + elt->len,
sizeof(void *));
continue;
}
return NULL;
}
void *
ngx_hash_find_wc_head(ngx_hash_wildcard_t *hwc, u_char *name, size_t len)
{
void *value;
ngx_uint_t i, n, key;
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "wch:\"%*s\"", len, name);
#endif
n = len;
while (n) {
if (name[n - 1] == '.') {
break;
}
n--;
}
key = 0;
for (i = n; i < len; i++) {
key = ngx_hash(key, name[i]);
}
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "key:\"%ui\"", key);
#endif
value = ngx_hash_find(&hwc->hash, key, &name[n], len - n);
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "value:\"%p\"", value);
#endif
if (value) {
/*
* the 2 low bits of value have the special meaning:
* 00 - value is data pointer for both "example.com"
* and "*.example.com";
* 01 - value is data pointer for "*.example.com" only;
* 10 - value is pointer to wildcard hash allowing
* both "example.com" and "*.example.com";
* 11 - value is pointer to wildcard hash allowing
* "*.example.com" only.
*/
if ((uintptr_t) value & 2) {
if (n == 0) {
/* "example.com" */
if ((uintptr_t) value & 1) {
return NULL;
}
hwc = (ngx_hash_wildcard_t *)
((uintptr_t) value & (uintptr_t) ~3);
return hwc->value;
}
hwc = (ngx_hash_wildcard_t *) ((uintptr_t) value & (uintptr_t) ~3);
value = ngx_hash_find_wc_head(hwc, name, n - 1);
if (value) {
return value;
}
return hwc->value;
}
if ((uintptr_t) value & 1) {
if (n == 0) {
/* "example.com" */
return NULL;
}
return (void *) ((uintptr_t) value & (uintptr_t) ~3);
}
return value;
}
return hwc->value;
}
void *
ngx_hash_find_wc_tail(ngx_hash_wildcard_t *hwc, u_char *name, size_t len)
{
void *value;
ngx_uint_t i, key;
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "wct:\"%*s\"", len, name);
#endif
key = 0;
for (i = 0; i < len; i++) {
if (name[i] == '.') {
break;
}
key = ngx_hash(key, name[i]);
}
if (i == len) {
return NULL;
}
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "key:\"%ui\"", key);
#endif
value = ngx_hash_find(&hwc->hash, key, name, i);
#if 0
ngx_log_error(NGX_LOG_ALERT, ngx_cycle->log, 0, "value:\"%p\"", value);
#endif
if (value) {
/*
* the 2 low bits of value have the special meaning:
* 00 - value is data pointer;
* 11 - value is pointer to wildcard hash allowing "example.*".
*/
if ((uintptr_t) value & 2) {
i++;
hwc = (ngx_hash_wildcard_t *) ((uintptr_t) value & (uintptr_t) ~3);
value = ngx_hash_find_wc_tail(hwc, &name[i], len - i);
if (value) {
return value;
}
return hwc->value;
}
return value;
}
return hwc->value;
}
参考:
http://blog.youkuaiyun.com/livelylittlefish