哈夫曼树也称为最优二叉树,其叶子结点带有权值,所有根结点到叶子结点的路径长度与该叶子结点的权值乘积之和被称为带权路径长度。对于一组确定权值的叶子结点,哈夫曼树是具有最小带权路径长度的二叉树。
哈夫曼算法:对于给定的n个权值构造n个只有根结点的二叉树,其集合为F;
选取F中根结点权值最小的两个两颗树分别作为左右子树合并为一颗新二叉树,其根结点权值为左右子树根结点之和;
将新树加入F,而合并前的两棵树从F中删除;
重复直到F中只有一棵树;
利用哈夫曼算法即可构造出哈夫曼编码树。哈夫曼编码树常用于压缩与解码,例如对于一组字符,其各个字母的使用频率作为权值,构造哈夫曼编码树,左树为0,右树为1,从根结点到叶子结点所构成的编码具有唯一性,其任意编码都不是其他编码的前缀。
struct element
{
float weight;
int lchild, rchild, parent;
bool can_be_select;
size_t pos_in_list;
};
struct char_list
{
float weight;
char c;
size_t pos_in_huffman;
string code;
};
void select(vector<element>& huffman, size_t& i1, size_t& i2)//从F中选择二叉树
{
size_t k1=-1, k2=-1;
for (vector<element>::size_type i = 0; i < huffman.size(); ++i)
{
if (!huffman[i].can_be_select)continue;
if (k1 == -1)
{
k1 = i;
continue;
}
if (k2 == -1)
{
k2 = i;
continue;
}
if (huffman[i].weight < huffman[k1].weight)
k1 = i;
else if (huffman[i].weight < huffman[k2].weight)
k2 = i;
}
huffman[k1].can_be_select = false;
huffman[k2].can_be_select = false;
i1 = k1;
i2 = k2;
}
void HuffmanTree(vector<element>& huffman, size_t n)//构造哈夫曼树
{
for (size_t i = 0; i < n - 1; i++)
{
size_t i1, i2;
select(huffman, i1, i2);
element t;
t.weight = huffman[i1].weight + huffman[i2].weight;
t.lchild = i1;
t.rchild = i2;
t.can_be_select = true;
t.pos_in_list = -1;
huffman.push_back(t);
huffman[i1].parent = i + n;
huffman[i2].parent = i + n;
}
}
void get_code(vector<element>& huffman, char_list l[], size_t n)//获得编码
{
string temp;
list<element> t;
list<element>::iterator it;
t.push_front(huffman.back());
it = t.begin();
while (it!=t.end())
{
while (it->lchild != -1)
{
temp.push_back('0');
t.push_front(huffman[it->lchild]);
it->lchild = -1;
it = t.begin();
}
if (it->rchild != -1)
{
temp.push_back('1');
t.push_front(huffman[it->rchild]);
it->rchild = -1;
it = t.begin();
}
else
{
if (it->pos_in_list!=-1)
l[it->pos_in_list].code = temp;
if (temp.size()!=0)
temp.pop_back();
t.pop_front();
it = t.begin();
}
}
}