霍夫曼编码算法:
按父节点频度的递增序排序最小优先队列的内容,合并具有最低频度数的两棵树为一棵新的二叉树,
其父节点频度为其二个子节点的频度之和,重复此过程,直到得到最终的小根堆。
#include <iostream>
#include <string.h>
#include <string>
#include <queue>
#include <vector>
#include <stack>
using std::cout;
using std::endl;
using std::string;
using std::priority_queue;
using std::vector;
using std::stack;
// alphabet size of extended ASCII
const int R = 256;
struct Node
{
char ch;
int freq;
Node *left;
Node *right;
Node(char c='\0', int f=0, Node *l=NULL, Node *r=NULL) : ch(c), freq(f), left(l), right(r)
{
}
bool isLeaf()
{
return (NULL == left && NULL == right);
}
};
// in order to build a minimum priority queue
struct CmpNode
{
bool operator()(Node *a, Node *b)
{
if (a->freq == b->freq)
return a->ch > b->ch;
return a->freq > b->freq;
}
};
class Huffman
{
public:
Huffman(string str) { input = str; }
~Huffman();
void compress();
private:
Node *buildTrie(int f[]);
void buildCode(string table[], Node *x, string s);
private:
Node *root;
string input;
};
Huffman::~Huffman()
{
Node *tmp = root;
stack<Node *> st;
vector<Node *> vec;
while (NULL != tmp || !st.empty())
{
if (NULL != tmp)
{
st.push(tmp);
vec.push_back(tmp);
tmp = tmp->left;
}
else
{
tmp = st.top();
st.pop();
tmp = tmp->right;
}
}
typename std::vector<Node *>::iterator it = vec.begin();
for (;it != vec.end(); ++it)
{
delete (*it);
*it = NULL;
}
}
Node * Huffman::buildTrie(int f[])
{
priority_queue<Node *, vector<Node *>, CmpNode> pq;
Node *tmp = NULL;
for (int i = 0; i < R; ++i)
if (f[i] > 0)
{
tmp = new Node(char(i), f[i], NULL, NULL);
pq.push(tmp);
}
// special case in case there is only one character with a nonzero frequency
if (1 == pq.size())
{
if (0 == f['\0'-'0'+48])
{
tmp = new Node('\0', 0, NULL, NULL);
pq.push(tmp);
}
else
{
tmp = new Node('\1', 0, NULL, NULL);
pq.push(tmp);
}
}
// merge two smallest trees
while (pq.size() > 1)
{
Node *l = pq.top();
pq.pop();
Node *r = pq.top();
pq.pop();
tmp = new Node('\0', l->freq+r->freq, l, r);
pq.push(tmp);
}
return pq.top();
}
// make a lookup table from symbols and their encoding
void Huffman::buildCode(string table[], Node *x, string s)
{
if (NULL != x)
{
if (!x->isLeaf())
{
buildCode(table, x->left, s+"0");
buildCode(table, x->right, s+"1");
}
else
table[x->ch-'0'+48] = s;
}
}
void Huffman::compress()
{
// tabulate frequency counts
int i, f[R] = { 0 };
int len = input.length();
for (i = 0; i < len; ++i)
++f[input[i]-'0'+48];
// build Huffman trie
vector<Node> vec;
Node *tmp = buildTrie(f);
root = tmp;
//build code table
string table[R] = { "" };
buildCode(table, tmp, "");
cout << "codeword table:\n";
for (i = 0; i < R; ++i)
if (f[i] > 0)
cout << char(i) << ": " << table[i] << endl;
cout << "compressed bitstring:\n";
for (i = 0; i < len; ++i)
cout << table[input[i]-'0'+48];
cout << endl;
}
int main()
{
Huffman huffman("ABRACADABRA!");
huffman.compress();
return 0;
}
codeword table:
!: 1010
A: 0
B: 110
C: 1011
D: 100
R: 111
compressed bitstring:
0110111010110100011011101010