huffman编码

原创已于 2022-06-29 10:14:34 修改 · 2k 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#算法 #c++ #开发语言

于 2022-06-29 10:14:14 首次发布

算法专栏收录该内容

164 篇文章

订阅专栏

使用优先队列进行优化的haffman编码。

统计文章中的词频
使用优先队列(堆)创建haffman树
遍历Haffman树得到每一个字符的huffman编码，保存在enc(unordered_map)中
文章进行编码
使用huffman树和haffman编码进行解码。

创建huffman树的时间复杂度 $O (C l o g C)$ 其中C是单词种类的个数。
编码的时间复杂度 $O (n)$ ，只需要遍历一遍字符串就行了。
解码的时间复杂度 $O (m)$ ，只需要遍历一遍编码后的字符串就行了。

huffman编码是现代压缩技术的基础。应该写个输入输出测试一下对文件的压缩的情况的。
char是8个bit, 而bit数组中的一位是1个bit。所以肯定会有压缩的效果。
文件最后保存的是haffman树以及对应的huaffman编码。在解码的时候先读取haffmans树，然后对二进制文件进行解码。


#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <unordered_map>

using namespace std;

// Encodes a URL to a shortened URL.
struct HuffmanTree {
    HuffmanTree *l, *r;
    int cnt;
    char ch;                              // 词频和单词
    HuffmanTree(int cnt, char ch, HuffmanTree *l, HuffmanTree *r):cnt(cnt), ch(ch), l(l), r(r) {}
};
class cmp{
public:
    /* Node::priority 大的优先, 重写小于号 */
    bool operator () (HuffmanTree* &a, HuffmanTree* &b) const
    {
        return a->cnt > b->cnt;
    }
};

typedef pair<int, char>PR;
priority_queue<HuffmanTree*, vector<HuffmanTree*>, cmp>que, bakcup;
unordered_map<char, int>mp;

HuffmanTree* buildHuffman() {
    bakcup = que;
    while (!bakcup.empty()) {
        printf("%d %c\n", bakcup.top()->cnt, bakcup.top()->ch);
        bakcup.pop();
    }

    while (que.size() >= 2) {
        HuffmanTree* t1 = que.top(); que.pop();
        HuffmanTree* t2 = que.top(); que.pop();
        HuffmanTree *fa = new HuffmanTree(t1->cnt + t2->cnt, '#', t1, t2);
        que.push(fa);
    }
    return que.top();
}
HuffmanTree *root;
unordered_map<char, string>enc;
void travel(HuffmanTree *root, string s) {              // 获取每个结点的huffman编码
    if (root == NULL) return ;
    // 叶子结点, 统计编码
    if (root->l == NULL && root->r == NULL) {
        cout << root->ch << " " << s << endl;
        enc[root->ch] = s;
        return ;
    }
    travel(root->l, s + '0');
    travel(root->r, s + '1');
}
string encode(string longUrl) {
    for (char ch : longUrl) {
        mp[ch]++;
    }
    for (auto [ch, cnt] : mp) {
        que.push(new HuffmanTree(cnt, ch, NULL, NULL)); // 存放的是指针。
    }

    root = buildHuffman();
    travel(root, "");                                   // 遍历haffman树获取Huffman编码
    string ans;
    for (char ch : longUrl) {
        ans += enc[ch];
    }
    return ans;
}


string treeDecode(HuffmanTree *root, int &cur, string &s) {
    if (root == NULL) return "";
    if (root->l == NULL && root->r == NULL) {
        string ans = "";
        ans += root->ch;
        return ans;
    }
    cur++;
    if (s[cur] == '0') return treeDecode(root->l, cur, s);
    else return treeDecode(root->r, cur, s);
}
// Decodes a shortened URL to its original URL.
string decode(string shortUrl) {
    int cur = -1;
    string ans;
    int n = shortUrl.size();
    for (cur; cur < n - 1;) {
        printf("cur = %d\n", cur);
        string s = treeDecode(root, cur, shortUrl);
        cout << s << endl;
        ans += s;
    }
    return ans;
}

int main() {

    string s = "aaabbc";
    s = encode(s);
    cout << s << endl;
    cout << decode(s) << endl;
}