跳表的原理就不写了,主要记录下实现中遇到的问题。
1.插入的新节点时分配高度:原本以为高度是随机的且越随机越好,于是简单的写下height = rand() % maxheight + 1,当然也不会报错什么的。但写完后和map一对比,数据规模2W时已经要比map要慢个10倍左右,而且随着数据增大,时间也不是对数级增长(花费更多时间),这和理论结果不符。想了想为什么跳表平均情况下能和rbtree差不多呢?最好的结果是每一层跳过的数量都是上一层*2,且分布均匀,这样理想状态就类似于avltree了。简单打印+调试了下查找过程,问题就出在每层的节点数量上了:
a)按照上面计算方式,假设有L层,N个数据。每一层的数量会为N/L, 2*N/L, 3*N/L,..,N。这样即使分布均匀,那每一层能跳的最多节点也就是N/L(理想状态第一层可以跳N,第二层能跳N/2,...1)。这无疑会导致查询效率较低。
b)改成:每次有1/2的概率层数+1,否则停止。这样每一层数量会为 N / 2^(L-1), N / 2^(L-2),...,N。比较符合理想状态,改完后效率立刻上来了。操作耗时大概是map的2倍。
2.写这个东西主要是工作中用到ssdb,而存储层的leveldb在内存数据结构上是跳表。突然想到这玩意好像还没实现过,于是写了下。自然的,也要写hrscan这样具备反向查找的功能。写完后觉得,反向查找其实也可以用正向实现,找到节点后直接在最底层反向遍历即可。不过既然是反向,那比较多的应用场景应该是从尾部开始,取多少个。于是感觉还是手动写个从尾部遍历的也许会更符合应用场景吧。
附实现代码和测试结果:
.h
#pragma once
#include <string.h>
#include <stdlib.h>
namespace
{
struct LKNode
{
LKNode* next;
LKNode* prev;
void* header;
LKNode() { memset(this, 0, sizeof(this)); }
};
template<typename key_t, typename val_t>
struct SLNode
{
key_t key;
val_t val;
int height;
LKNode node[1];
SLNode(int h, const key_t& k, const val_t& v)
: height(h)
, key(k)
, val(v)
{
for (int i = 0; i < height; ++i)
{
node[i].next = node[i].prev = NULL;
node[i].header = this;
}
}
};
struct DefaultRand
{
int operator()(int nMaxHeight)
{
int l = 1;
while ((rand() & 1) && l < nMaxHeight) ++l;
return l;
}
};
}
template<typename key_t, typename val_t, typename randfun = DefaultRand, typename keycomp_t = less<key_t> >
class SkipList
{
public:
typedef SLNode<key_t, val_t> node_t;
typedef keycomp_t cmp_t;
typedef SkipList<key_t, val_t, randfun, keycomp_t> this_t;
SkipList(int nMaxHeight);
~SkipList();
bool Set(const key_t& key, const val_t& val);
bool Get(const key_t& key, val_t& val);
bool Erase(const key_t& key);
void Clear();
bool Empty() const { return m_nCount == 0; }
int Size() const { return m_nCount; }
// (last, first] in reverse order
template<typename Container>
void RScan(const key_t& keyfirst, const key_t& keylast, Container& vals, unsigned int limit = -1);
// [first, last)
template<typename Container>
void Scan(const key_t& keyfirst, const key_t& keylast, Container& vals, unsigned int limit = -1);
template<typename TStream>
void Dump(TStream& o);
protected:
void Init(int nMaxHeight);
void Term();
bool find(const key_t& key, LKNode** node);
bool rfind(const key_t& key, LKNode** node);
node_t* Alloc(int h, const key_t& key, const val_t& val);
void Dealloc(LKNode* node);
inline node_t* GetNode(LKNode* node)
{
return (node_t*)(node->header);
}
inline bool IsTail(LKNode* node)
{
return GetNode(node) == m_pTail;
}
inline bool IsHead(LKNode* node)
{
return GetNode(node) == m_pHead;
}
protected:
// dummy head node with max height
node_t* m_pHead;
// dummy tail node for reverse travel
node_t* m_pTail;
int m_nMaxHeight;
cmp_t m_cmp;
randfun m_RandFunc;
int m_nCount;
};
.inl
#include "stdafx.h"
#include "SkipList.h"
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
SkipList<key_t, val_t, randfun, keycomp_t>::SkipList(int nMaxHeight)
{
Init(nMaxHeight);
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
SkipList<key_t, val_t, randfun, keycomp_t>::~SkipList()
{
Term();
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
void SkipList<key_t, val_t, randfun, keycomp_t>::Init(int nMaxHeight)
{
m_nMaxHeight = nMaxHeight;
m_nCount = 0;
m_pHead = Alloc(m_nMaxHeight, key_t(), val_t());
m_pTail = Alloc(m_nMaxHeight, key_t(), val_t());
for (int i = 0; i < m_nMaxHeight; ++i)
{
m_pHead->node[i].next = &m_pTail->node[i];
m_pTail->node[i].prev = &m_pHead->node[i];
}
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
void SkipList<key_t, val_t, randfun, keycomp_t>::Term()
{
LKNode* pNode = &m_pHead->node[m_nMaxHeight - 1];
while (pNode)
{
LKNode* q = pNode->next;
Dealloc(pNode);
pNode = q;
}
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
void SkipList<key_t, val_t, randfun, keycomp_t>::Clear()
{
Term();
Init(m_nMaxHeight);
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
bool SkipList<key_t, val_t, randfun, keycomp_t>::rfind(const key_t& key, LKNode** node)
{
int i = 0;
LKNode* p = &m_pTail->node[0];
while (p)
{
if (IsHead(p->prev) || m_cmp(GetNode(p->prev)->key, key))
{
node[i] = p++;
if (++i == m_nMaxHeight) return true;
}
else
p = p->prev;
}
return false;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
bool SkipList<key_t, val_t, randfun, keycomp_t>::find(const key_t& key, LKNode** node)
{
// current level
int i = 0;
// from the topmost level
LKNode* p = &m_pHead->node[0];
while (p)
{
if (IsTail(p->next) || m_cmp(key, GetNode(p->next)->key))
{
node[i] = p++; // next level
if (++i == m_nMaxHeight) return true;
}
else
p = p->next; // next jump
}
return false;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
typename SkipList<key_t, val_t, randfun, keycomp_t>::node_t* SkipList<key_t, val_t, randfun, keycomp_t>::Alloc(int h, const key_t& key, const val_t& val)
{
char* p = (char*)malloc(sizeof(node_t) + (h - 1) * sizeof(LKNode));
new (p)node_t(h, key, val);
return (node_t*)p;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
void SkipList<key_t, val_t, randfun, keycomp_t>::Dealloc(LKNode* node)
{
node_t* head = GetNode(node);
head->~node_t();
free(head);
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
template<typename Container>
void SkipList<key_t, val_t, randfun, keycomp_t>::RScan(const key_t& keyfirst, const key_t& keylast, Container& vals, unsigned int limit)
{
LKNode** res = (LKNode**)alloca(sizeof(void*)* m_nMaxHeight);
if (!rfind(keyfirst, res))
return;
LKNode* node = res[m_nMaxHeight - 1];
if (IsTail(node))
node = node->prev;
while (!IsHead(node) && m_cmp(keylast, GetNode(node)->key) && limit--)
{
vals.push_back(GetNode(node)->val);
node = node->prev;
}
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
template<typename Container>
void SkipList<key_t, val_t, randfun, keycomp_t>::Scan(const key_t& keyfirst, const key_t& keylast, Container& vals, unsigned int limit)
{
LKNode** res = (LKNode**)alloca(sizeof(void*)* m_nMaxHeight);
if (!find(keyfirst, res))
return;
LKNode* node = res[m_nMaxHeight - 1];
if (IsHead(node))
node = node->next;
while (!IsTail(node) && m_cmp(GetNode(node)->key, keylast) && limit--)
{
vals.push_back(GetNode(node)->val);
node = node->next;
}
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
bool SkipList<key_t, val_t, randfun, keycomp_t>::Set(const key_t& key, const val_t& val)
{
int h = m_RandFunc(m_nMaxHeight);
LKNode** res = (LKNode**)alloca(sizeof(void*)* m_nMaxHeight);
if (!find(key, res))
return false;
if (GetNode(res[m_nMaxHeight - 1]) != m_pHead)
{
int key2 = GetNode(res[m_nMaxHeight - 1])->key;
if (!m_cmp(key, key2) && !m_cmp(key2, key))
return false;
}
node_t* p = Alloc(h, key, val);
for (int i = 0, j = m_nMaxHeight - h; i < h; ++i, ++j)
{
res[j]->next->prev = &p->node[i];
p->node[i].next = res[j]->next;
res[j]->next = &p->node[i];
p->node[i].prev = res[j];
}
++m_nCount;
return true;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
bool SkipList<key_t, val_t, randfun, keycomp_t>::Get(const key_t& key, val_t& val)
{
LKNode** res = (LKNode**)alloca(sizeof(void*)* m_nMaxHeight);
if (!find(key, res))
return false;
val = GetNode(res[m_nMaxHeight - 1])->val;
return true;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
bool SkipList<key_t, val_t, randfun, keycomp_t>::Erase(const key_t& key)
{
LKNode** res = (LKNode**)alloca(sizeof(void*)* m_nMaxHeight);
if (!find(key, res))
return false;
node_t* node = GetNode(res[m_nMaxHeight - 1]);
if (node == m_pHead || m_cmp(key, node->key) || m_cmp(node->key, key))
return false;
for (int i = m_nMaxHeight - node->height; i < m_nMaxHeight; ++i)
{
res[i]->prev->next = res[i]->next;
res[i]->next->prev = res[i]->prev;
}
Dealloc(res[m_nMaxHeight - 1]);
--m_nCount;
return true;
}
template<typename key_t, typename val_t, typename randfun, typename keycomp_t>
template<typename TStream>
void SkipList<key_t, val_t, randfun, keycomp_t>::Dump(TStream& o)
{
map<key_t, int> m;
map<int, size_t> ms;
LKNode* p = (&m_pHead->node[m_nMaxHeight - 1])->next;
for (int i = 0; !IsTail(p); ++i, p = p->next)
m[GetNode(p)->key] = i;
for (int i = 0; i < m_nMaxHeight; ++i)
{
p = (&m_pHead->node[i])->next;
if (IsTail(p)) continue;
int k = 0, j = 0, c = 0;
while (!IsTail(p))
{
k = j;
j = m[GetNode(p)->key];
for (int l = 0; l < j - k; ++l)
o << "\t";
o << GetNode(p)->key << "(" << GetNode(p)->val << ")";
p = p->next;
++c;
}
ms[i] = c;
o << endl;
}
if (ms.empty())
{
o << "empty SkipList" << endl;
return;
}
for (map<int, size_t>::iterator it = ms.begin(); it != ms.end() ; ++ it)
o << "level " << it->first << " : " << it->second << endl;
}
测试:
void main()
{
typedef SkipList<int, int> slii;
slii s(16);
map<int, int> m;
const int N = 800000;
cout << "data count " << N << endl;
vector<int> v;
v.reserve(N);
for (int i = 1; i <= N; ++i)
v.push_back(i);
random_shuffle(v.begin(), v.end());
DWORD dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
s.Set(v[i], v[i]);
cout << "skiplist Set : " << GetTickCount() - dw << " ms." << endl;
dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
m.insert(make_pair(v[i], v[i]));
cout << "map insert : " << GetTickCount() - dw << " ms." << endl;
random_shuffle(v.begin(), v.end());
dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
{
int val = 0;
if (!s.Get(v[i], val))
{
cout << "Get error\n";
exit(-1);
}
assert(val == v[i]);
}
cout << "skiplist Get : " << GetTickCount() - dw << " ms." << endl;
dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
{
map<int, int>::iterator it = m.find(v[i]);
assert(it != m.end() && it->second == v[i]);
}
cout << "map find : " << GetTickCount() - dw << " ms." << endl;
random_shuffle(v.begin(), v.end());
dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
{
if (!s.Erase(v[i]))
{
cout << "Erase error\n";
exit(-1);
}
}
assert(s.Empty());
cout << "skiplist Erase: " << GetTickCount() - dw << " ms." << endl;
dw = GetTickCount();
for (size_t i = 0; i < v.size(); ++i)
{
m.erase(v[i]);
}
cout << "map Erase : " << GetTickCount() - dw << " ms." << endl;
#if 0
fstream fs;
fs.open("d:\\11.txt", ios::out);
s.Dump(fs);
fs.close();
#endif
}
结果:
本文探讨了跳表在实际实现中遇到的问题及其解决方案。针对插入节点时的高度分配,通过对比发现随机分配高度导致查询效率低下。优化策略是采用二分概率增加层数,使得查询效率接近理想状态。此外,介绍了在工作中应用跳表(如SSDB中的Leveldb)并实现反向查找功能,指出反向查找可正向遍历最底层实现。
2218

被折叠的 条评论
为什么被折叠?



