近来无事,每天坚持写一些算法。
1、TOP K的小顶堆算法
/* Make sure array index begin at 1. First make a small stack, Next only to compare first element,and adjust smallstack(array, 1, arraylen) */
int smallstack(unsigned long *ptArray, unsigned long pos, unsigned long len)
{
unsigned long dwLeft;
unsigned long dwRight;
unsigned long dwSmall;
unsigned long dwchange;
dwLeft = 2 * pos;
dwRight = 2 * pos + 1;
if(dwLeft > len)
{
dwSmall = pos;
return 0;
}
if(dwRight > len)
{
dwSmall = (*(ptArray+pos-1) <= *(ptArray+dwLeft-1))?pos:dwLeft;
if(dwSmall != pos)
{
dwchange = *(ptArray+dwSmall-1);
*(ptArray+dwSmall-1) = *(ptArray+pos-1);
*(ptArray+pos-1) = dwchange;
smallstack(ptArray, dwSmall, len);
}
return 0;
}
/* compare to choose smallest one */
if(*(ptArray+pos-1) <= *(ptArray+dwLeft-1))
{
dwSmall = pos;
}
else
{
dwSmall = dwLeft;
}
if(*(ptArray+dwSmall-1) > *(ptArray+dwRight-1))
{
dwSmall = dwRight;
}
if(dwSmall != pos)
{
dwchange = *(ptArray+dwSmall-1);
*(ptArray+dwSmall-1) = *(ptArray+pos-1);
*(ptArray+pos-1) = dwchange;
smallstack(ptArray, dwSmall, len);
}
return 0;
}
小顶堆的原理,在于树的每个分支都是严格按序排列的。如果动了上层的节点,只需要调整该节点自己的子节点即可,没有改变的其它节点(包括同级的和下级的)均无须改变。
2、Trie树
#define MAX_CHAR_NUM 26
const int num_chars = MAX_CHAR_NUM;
class Trie {
public:
Trie();
Trie(Trie* tr);
virtual ~Trie();
int trie_search(const char *ptword, char **str );
int trie_insert(const char *ptword, char *str );
int trie_remove(const char *ptword, char *str );
int count;
protected:
char* data;
Trie* branch[num_chars];
Trie* parent;
};
Trie::Trie()
{
unsigned long i;
data = NULL;
parent = this;
count = 0;
for(i=0; i<num_chars; i++)
{
branch[i] = NULL;
}
}
Trie::Trie(Trie* tr)
{
unsigned long i;
data = NULL;
parent = tr;
count = 1; /* insert with a char,so count must be 1 */
for(i=0; i<num_chars; i++)
{
branch[i] = NULL;
}
}
Trie::~Trie()
{
}
int Trie::trie_search(const char *ptword, char **str )
{
int ret = -1;
char *ptchar = (char *)ptword;
Trie *ptTr = parent;
char num;
while((ptchar != NULL) && (*ptchar != '\0'))
{
if((*ptchar >= 'a') && (*ptchar <= 'z'))
{
num = *ptchar-'a';
}
else if((*ptchar >= 'A') && (*ptchar <= 'Z'))
{
num = *ptchar-'A';
}
else
{
return ret; /* if you want to pass empty char ,you also can use continue */
}
if(ptTr->branch[num] == NULL)
{
return ret;
}
else
{
ptTr = ptTr->branch[num];
}
ptchar ++;
}
*str = ptTr->data;
return ptTr->count;
}
int Trie::trie_insert(const char *ptword, char *str )
{
int ret = -1;
char *ptchar = (char *)ptword;
char num;
Trie *ptTr;
if(parent == NULL)
{
parent = new Trie;
}
ptTr = parent;
while((ptchar != NULL) && (*ptchar != '\0'))
{
if((*ptchar >= 'a') && (*ptchar <= 'z'))
{
num = *ptchar-'a';
}
else if((*ptchar >= 'A') && (*ptchar <= 'Z'))
{
num = *ptchar-'A';
}
else
{
return ret; /* if you want to pass empty char ,you also can use continue */
}
if(ptTr->branch[num] == NULL)
{
ptTr->branch[num] = new Trie(ptTr);
}
ptTr->count ++;
ptTr = ptTr->branch[num];
ptchar ++;
}
ptTr->data = str;
return 0;
}
trie树的作用,是搜索海量记录中重复的字段,进行统计。
3、多路归并算法,败者树
class LtNode
{
public:
int lost;
int data;
};
/* pos is current,lost tree is to choose min value */
int K_merge2(int k,LtNode *ptlt, int *ptArray, int pos)
{
int parent, tmp;
parent = (pos + k)/2;
while (parent > 0) {
if (pos == -1) {
break;
}
if (((ptlt+parent)->lost == -1) || (ptArray[pos] > ptArray[(ptlt+parent)->lost]))
{/* smaller one go ahead and bigger one remain at parent position */
tmp = pos; /* pos is the smaller one, winner */
pos = (ptlt+parent)->lost;
(ptlt+parent)->lost = tmp;
}
parent >>= 1;
}
if ((pos != -1))
{
(ptlt)->lost = pos;
}
return 0;
}
void create_loser_tree(LtNode *ptlt,int *ptArray, int n)
{
int i;
LtNode *ptlttmp = ptlt;
for (i = 0; i < n; i++) {
ptlttmp->lost = -1; /* initial value is important for creating tree */
ptlttmp ++;
}
for (i = n-1; i >= 0; i--) {
K_merge2(n, ptlt,ptArray, i);
}
}