读取一个Txt文本参与哈夫曼编码压缩文本运算

最新推荐文章于 2024-05-11 10:30:00 发布

原创最新推荐文章于 2024-05-11 10:30:00 发布 · 239 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#c++ #开发语言

开发项目模块功能专栏收录该内容

2 篇文章

订阅专栏

该程序读取文本，包括空格和换行符，使用哈夫曼编码进行压缩，然后解码还原原始文本。它首先统计字符频率，构建哈夫曼树，生成哈夫曼编码表，然后进行编码和解码操作。

读取文本时会读取空格和换行符参与哈夫曼编码运算，等到解码时可还原原本的文段

#include <iostream>
#include <queue>
#include <unordered_map>
#include <string>
#include <fstream>
#include <string>
#include <windows.h>
#include <commdlg.h>

using namespace std;

// 结点类
class Node {
public:
    char ch;        // 字符
    int freq;       // 字符出现频率
    Node* left;     // 左孩子
    Node* right;    // 右孩子

    Node() {}

    Node(char c, int f) {
        ch = c;
        freq = f;
        left = nullptr;
        right = nullptr;
    }

    ~Node() {
        delete left;
        delete right;
    }
};

// 比较函数，用于优先队列中的排序
struct cmp {
    bool operator() (const Node* a, const Node* b) {
        return a->freq > b->freq;
    }
};

// 建立哈夫曼树
Node* buildHuffmanTree(unordered_map<char, int>& freqMap) {
    priority_queue<Node*, vector<Node*>, cmp> pq;

    for (auto it : freqMap) {
        pq.push(new Node(it.first, it.second));
    }

    while (pq.size() > 1) {
        Node* left = pq.top(); pq.pop();
        Node* right = pq.top(); pq.pop();

        Node* parent = new Node('$', left->freq + right->freq);
        parent->left = left;
        parent->right = right;

        pq.push(parent);
    }

    return pq.top();
}

// 哈夫曼编码哈希表
void buildHuffmanCode(Node* root, string code, unordered_map<char, string>& huffmanCode) {
    if (!root) return;

    if (!root->left && !root->right) {   // 叶子结点
        huffmanCode[root->ch] = code;
    }

    buildHuffmanCode(root->left, code + "0", huffmanCode);
    buildHuffmanCode(root->right, code + "1", huffmanCode);
}

// 哈夫曼编码压缩
string compress(string text, unordered_map<char, string>& huffmanCode) {
    string compressedText = "";

    for (char c : text) {
        compressedText += huffmanCode[c];
    }

    return compressedText;
}

// 哈夫曼解码，返回原始文本
string decompress(string compressedText, Node* root) {
    string text = "";
    Node* cur = root;

    for (char c : compressedText) {
        if (c == '0') {
            cur = cur->left;
        }
        else {
            cur = cur->right;
        }

        if (!cur->left && !cur->right) {   // 叶子结点
            text += cur->ch;
            cur = root;     // 重置为根节点
        }
    }

    return text;
}


string text;
string line;


void Hafuman() {
    
    /*cout << "请输出一段字符" << endl;*/
    //getline(cin, text);    // 改为使用getline读入一行字符串
    cout << endl;

    // 统计字符出现频率
    unordered_map<char, int> freqMap;
    for (char c : text) {
        freqMap[c]++;
    }
    freqMap[' '] = 1;   // 统计空格

    // 建立哈夫曼树
    Node* root = buildHuffmanTree(freqMap);

    // 建立哈夫曼编码哈希表
    unordered_map<char, string> huffmanCode;
    buildHuffmanCode(root, "", huffmanCode);

    // 压缩文本
    string compressedText = compress(text, huffmanCode);

    // 输出压缩后的文本和哈夫曼编码表
    cout << "压缩后的文本: " << compressedText << endl;

    cout << "哈夫曼编码:" << endl;//数量从大到小排序；注意：文本参与哈夫曼压缩编码的包含:空格和换行符
    

    vector<pair<char, int>> sortedFreqMap(freqMap.begin(), freqMap.end());
    sort(sortedFreqMap.begin(), sortedFreqMap.end(), [](const auto& a, const auto& b) {
        return a.second > b.second;
        });

    // 按顺序输出哈夫曼编码
    for (auto& it : sortedFreqMap) {
        char c = it.first;
        if (huffmanCode.count(c)) {
            cout << c << " :Numbers: " << freqMap[c] << " Huffman Code: " << huffmanCode[c] << endl;
        }
    }

    // 解压文本
    string decompressedText = decompress(compressedText, root);

    // 输出解压后的原始文本
    cout << "解压后的原始文本: " << decompressedText << endl;

    delete root;
}

int readInfo() {
    // 打开文件选择对话框
    OPENFILENAMEA ofn;
    char szFile[260] = { 0 };
    ZeroMemory(&ofn, sizeof(ofn));
    ofn.lStructSize = sizeof(ofn);
    ofn.lpstrFile = szFile;
    ofn.nMaxFile = sizeof(szFile);
    ofn.lpstrFilter = "Text Files (*.txt)\0*.txt\0All Files (*.*)\0*.*\0";
    ofn.nFilterIndex = 1;
    ofn.Flags = OFN_PATHMUSTEXIST | OFN_FILEMUSTEXIST | OFN_NOCHANGEDIR;

    if (GetOpenFileNameA(&ofn) == TRUE) {
        // 将 char 类型字符串转换为 wchar_t 类型字符串
        int len = MultiByteToWideChar(CP_UTF8, 0, szFile, -1, NULL, 0);
        wchar_t* wstr = new wchar_t[len];
        MultiByteToWideChar(CP_UTF8, 0, szFile, -1, wstr, len);
        wstring filename(wstr);
        delete[] wstr;

        ifstream infile(filename);

        if (!infile.is_open()) {
            cout << "Failed to open file." << endl;
            return 1;
        }

        
        while (getline(infile, line)) {
            text += line + "\n";
        }

        cout << "The content of the file is:\n" << text;

        infile.close();

        return 0;
    }
    else {
        cout << "User canceled the file selection dialog." << endl;
        return 1;
    }
}

int main() {
    readInfo();
    Hafuman();
    
    return 0;
}