霍夫曼编码

霍夫曼编码算法:
按父节点频度的递增序排序最小优先队列的内容,合并具有最低频度数的两棵树为一棵新的二叉树,
其父节点频度为其二个子节点的频度之和,重复此过程,直到得到最终的小根堆。


#include <iostream>
#include <string.h>
#include <string>
#include <queue>
#include <vector>
#include <stack>

using std::cout;
using std::endl;
using std::string;
using std::priority_queue;
using std::vector;
using std::stack;

// alphabet size of extended ASCII
const int R = 256;

struct Node
{
        char ch;
        int freq;
        Node *left;
        Node *right;

        Node(char c='\0', int f=0, Node *l=NULL, Node *r=NULL) : ch(c), freq(f), left(l), right(r)
        {
        }

        bool isLeaf()
        {
                return (NULL == left && NULL == right);
        }

};

// in order to build a minimum priority queue
struct CmpNode
{
        bool operator()(Node *a, Node *b)
        {
                if (a->freq == b->freq)
                        return a->ch > b->ch;

                return a->freq > b->freq;
        }
};

class Huffman
{
public:
        Huffman(string str) { input = str; }
        ~Huffman();
        void compress();

private:
        Node *buildTrie(int f[]);
        void buildCode(string table[], Node *x, string s);

private:
        Node *root;
        string input;

};

Huffman::~Huffman()
{
        Node *tmp = root;
        stack<Node *> st;
        vector<Node *> vec;
        while (NULL != tmp || !st.empty())
        {
                if (NULL != tmp)
                {
                        st.push(tmp);
                        vec.push_back(tmp);
                        tmp = tmp->left;
                }
                else
                {
                        tmp = st.top();
                              st.pop();
                        tmp = tmp->right;
                }
        }

        typename std::vector<Node *>::iterator it = vec.begin();
        for (;it != vec.end(); ++it)
        {
                delete (*it);
                *it = NULL;
        }
}

Node * Huffman::buildTrie(int f[])
{
        priority_queue<Node *, vector<Node *>, CmpNode> pq;

        Node *tmp = NULL;
        for (int i = 0; i < R; ++i)
                if (f[i] > 0)
                {
                        tmp = new Node(char(i), f[i], NULL, NULL);
                        pq.push(tmp);
                }

        // special case in case there is only one character with a nonzero frequency
        if (1 == pq.size())
        {
                if (0 == f['\0'-'0'+48])
                {
                        tmp = new Node('\0', 0, NULL, NULL);
                        pq.push(tmp);
                }
                else
                {
                        tmp = new Node('\1', 0, NULL, NULL);
                        pq.push(tmp);
                }
        }

        // merge two smallest trees
        while (pq.size() > 1)
        {
                Node *l = pq.top();
                         pq.pop();
                Node *r = pq.top();
                         pq.pop();
                tmp = new Node('\0', l->freq+r->freq, l, r);
                pq.push(tmp);
        }

        return pq.top();
}

// make a lookup table from symbols and their encoding
void Huffman::buildCode(string table[], Node *x, string s)
{
        if (NULL != x)
        {
                if (!x->isLeaf())
                {
                        buildCode(table, x->left, s+"0");
                        buildCode(table, x->right, s+"1");
                }
                else
                        table[x->ch-'0'+48] = s;
        }
}

void Huffman::compress()
{
        // tabulate frequency counts
        int i, f[R] = { 0 };
        int len = input.length();
        for (i = 0; i < len; ++i)
                ++f[input[i]-'0'+48];

        // build Huffman trie
        vector<Node> vec;
        Node *tmp = buildTrie(f);
        root = tmp;
        //build code table
        string table[R] = { "" };
        buildCode(table, tmp, "");

        cout << "codeword table:\n";
        for (i = 0; i < R; ++i)
                if (f[i] > 0)
                        cout << char(i) << ": " << table[i] << endl;

        cout << "compressed bitstring:\n";
        for (i = 0; i < len; ++i)
                cout << table[input[i]-'0'+48];
        cout << endl;
}

int main()
{
        Huffman huffman("ABRACADABRA!");
        huffman.compress();

        return 0;
}
 

codeword table:
!: 1010
A: 0
B: 110
C: 1011
D: 100
R: 111
compressed bitstring:
0110111010110100011011101010

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值