利用Huffman树进行文本编码解码的实现

霍夫曼编码压缩与解压实现
本文介绍了一个基于霍夫曼编码的文本压缩与解压程序的实现细节,包括构建霍夫曼树、编码解码算法及其实现流程等关键技术点。

---------------------------------

功能:利用Huffman树进行文本编码解码的实现
环境:WinXP,VC6.0
输入:C:\\in.txt
输出:C:\\out.dec
注:控制台输出的调试信息还在,只要将相应的cout<<""<<endl;去掉即可
----------------------------------
步骤:
1.创建Win32 Console Application 工程HuffmanEncoder,添加头文件Huffmancoding.h和工程文件HuffmanEncoder.cpp
2.创建Win32 Console Application 工程HuffmanDecoder,添加头文件Huffmancoding.h和工程文件HuffmanDecoder.cpp
3.运行HuffmanEncoder,输入要压缩文件路径,这里用C:\\in.txt,产生压缩文件in.z
4.运行HuffmanDECoder,输入要解压文件路径,这里用C:\\in.z,产生解压文件in.dec
5.文件路径可以随自己随便输入,产生的压缩解压缩文件与源文件在同一路径下
-----------------------------------
Huffmancoding.h内容:
-----------------------------------
#include <vector>
#include <algorithm>

class HuffmanNode
{
public:
    char symbol;
    unsigned long codeword, freq;
    unsigned int runLen, codewordLen;
    HuffmanNode *left, *right;
    HuffmanNode()
    {
        left = right = 0;
    }
    HuffmanNode(char s, unsigned long f, unsigned int r,
                HuffmanNode *lt = 0, HuffmanNode *rt = 0)
    {
        symbol = s;
        freq = f;
        runLen = r;
        left = lt;
        right = rt;
    }
};

class ListNode
{
public:
    HuffmanNode *tree;
    ListNode *next, *prev;
    ListNode()
    {
        next = prev = 0;
    }
    ListNode(ListNode *p, ListNode *n)
    {
        prev = p;
        next = n;
    }
};

class DataRec
{
public:
    char symbol;
    unsigned int runLen;
    unsigned long freq;
    DataRec()
    {
    }
    bool operator== (const DataRec& dr) const   // used by find();
    {
        return symbol == dr.symbol && runLen == dr.runLen;
    }
    bool operator< (const DataRec& dr) const    // used by sort();
    {
        return freq < dr.freq;
    }
};

class HuffmanCoding
{
public:
    HuffmanCoding() : mask(0xff), bytes(4), bits(8), ASCII(256)
    {
        chars = new HuffmanNode*[ASCII+1];
    }
    void compress(char*,ifstream&);
    void decompress(char*,ifstream&);
private:
    const unsigned int bytes, bits, ASCII;
    unsigned int dataSize;
    const unsigned long mask;
    unsigned long charCnt;
    ofstream fOut;
    HuffmanNode *HuffmanTree, **chars;
    vector<DataRec> data;
    void error(char *s)
    {
        cerr << s << endl;
        exit(1);
    }
    void output(unsigned long pack);
    void garnerData(ifstream&);
    void outputFrequencies(ifstream&);
    void read2ByteNum(unsigned int&,ifstream&);
    void read4ByteNum(unsigned long&,ifstream&);
    void inputFrequencies(ifstream&);
    void createHuffmanTree();
    void createCodewords(HuffmanNode*,unsigned long,int);
    void transformTreeToArrayOfLists(HuffmanNode*);
    void encode(ifstream&);
    void decode(ifstream&);
};

void HuffmanCoding::output(unsigned long pack)
{
    char *s = new char[bytes];
    int i;
    for (i = bytes - 1; i >= 0; i--)
    {
        s[i] = pack & mask;
        pack >>= bits;
    }
    for (i = 0; i < bytes; i++)
        fOut.put(s[i]);
}

void HuffmanCoding::garnerData(ifstream& fIn)
{
    char ch, ch2;
    DataRec r;
    vector<DataRec>::iterator i;//定义迭代器
    r.freq = 1;
     for(i = data.begin();i != data.end();i++)
    {
        cout<<i->symbol<<" "<<i->runLen<<" "<<i->freq<<endl;
    }
    for (fIn.get(ch); !fIn.eof(); ch = ch2)
    {
        for (r.runLen = 1, fIn.get(ch2); !fIn.eof() && ch2 == ch; r.runLen++)
            fIn.get(ch2);
        r.symbol = ch;//记录一个run
        if ((i = find(data.begin(),data.end(),r)) == data.end()&& r.symbol != '\n')
            data.push_back(r);//如果run当前不存在,则压入向量中
        else i->freq++;//并将相应的run频率加1
    }
    sort(data.begin(),data.end());//然后重新排序
    cout<<"向量data里的字符与频率"<<endl;
    for(i = data.begin();i != data.end();i++)
    {
        cout<<i->symbol<<" "<<i->runLen<<" "<<i->freq<<endl;
    }
}

void HuffmanCoding::outputFrequencies(ifstream& fIn)
{
    unsigned long temp4;//无符号整型temp4
    char ch = data.size();//记录有多少个run
    unsigned int temp2 = data.size();//记录有多少个run
    temp2 >>= bits;
    fOut.put(char(temp2)).put(ch);
    fIn.clear();
    output((unsigned long)fIn.tellg());
    for (int j = 0; j<data.size(); j++)
    {
        fOut.put(data[j].symbol);//字符
        ch = temp2 = data[j].runLen;
        temp2 >>= bits;
        fOut.put(char(temp2)).put(ch);
        temp4 = data[j].freq;//频率
        output(temp4);
    }
}

void HuffmanCoding::read2ByteNum(unsigned int& num, ifstream& fIn)
{
    num = fIn.get();
    num <<= bits;
    num |= fIn.get();//赋值位或
}

void HuffmanCoding::read4ByteNum(unsigned long& num, ifstream& fIn)
{
    num = (unsigned long) fIn.get();
    for (int i = 1; i < 4; i++)
    {
        num <<= bits;
        num |= (unsigned long) fIn.get();
    }
}

void HuffmanCoding::inputFrequencies(ifstream& fIn)
{
    DataRec r;
    read2ByteNum(dataSize,fIn);
    read4ByteNum(charCnt,fIn);
    data.reserve(dataSize);
    for (int j = 0; !fIn.eof() && j <dataSize; j++)
    {
        r.symbol = fIn.get();
        read2ByteNum(r.runLen,fIn);
        read4ByteNum(r.freq,fIn);
        data.push_back(r);
    }
}

void HuffmanCoding::createHuffmanTree()
{
    ListNode *p, *newNode, *head, *tail;
    unsigned long newFreq;
    head = tail = new ListNode;             // initialize list pointers;
    head->tree = new HuffmanNode(data[0].symbol,data[0].freq,data[0].runLen);
    //′′?¨μ¥á′±í
    for (int i = 1; i < data.size(); i++)   // create the rest of the list;
    {
        tail->next = new ListNode(tail,0);
        tail = tail->next;
        tail->tree =
            new HuffmanNode(data[i].symbol,data[i].freq,data[i].runLen);
    }
    int k;
    for(k = 0,p = head; p != 0 ; p = p->next)
    {
        cout<<++k<<p->tree->symbol<<" "<<endl;
    }
    while (head != tail)                    // create one Huffman tree;
    {
        newFreq = head->tree->freq + head->next->tree->freq; // two lowest frequencies
        for (p = tail; p != 0 && p->tree->freq > newFreq; p = p->prev);
        newNode = new ListNode(p,p->next);
        p->next = newNode;
        if (p == tail)
             tail = newNode;
        else newNode->next->prev = newNode;
        newNode->tree = new HuffmanNode('\0',newFreq,0,head->tree,head->next->tree);//插入新生成的结点
        head = head->next->next;
        delete head->prev->prev;
        delete head->prev;
        head->prev = 0;
    }
    HuffmanTree = head->tree;
    while(head->prev!= 0 || head->next!= 0)
    {
        cout<<head->tree->symbol<<" "<<head->tree->runLen<<endl;
        head = head->prev;
    }
    delete head;
}

void HuffmanCoding::createCodewords(HuffmanNode *p, unsigned long codeword, int level)
{
    cout<<p->symbol<<" "<<endl;
    if (p->left == 0 && p->right == 0)           // if p is a leaf,
    {
        p->codeword    = codeword;
        cout<<codeword<<endl;             // store codeword
        p->codewordLen = level;                 // and its lenght,
    }
    else                                         // otherwise add 0
    {
        createCodewords(p->left,  codeword<<1,   level+1); // for left branch

        createCodewords(p->right,(codeword<<1)+1,level+1); // and 1 for right;

        }
}

void HuffmanCoding::transformTreeToArrayOfLists(HuffmanNode *p)
{
    if (p->left == 0 &&  p->right == 0)         // if p is a leaf,
    {
        //由symbol的ASCII码来确定位置
        p->right = chars[(char)p->symbol]; // include it in
        chars[(unsigned char)p->symbol] = p;  // a list associated
    }                                          // with symbol found in p;
    else
    {
        transformTreeToArrayOfLists(p->left);
        transformTreeToArrayOfLists(p->right);
    }
}

void HuffmanCoding::encode(ifstream& fIn)
{
    unsigned long packCnt = 0, hold, maxPack = bytes*bits, pack = 0;//32位的pack
    char ch, ch2;
    int bitsLeft, runLength;
    HuffmanNode *p;
    for (fIn.get(ch); !fIn.eof()&&ch !='\n'; )
    {
        for (runLength = 1, fIn.get(ch2); !fIn.eof() && ch2 == ch; runLength++)
            fIn.get(ch2);
        for (p = chars[(unsigned char) ch];p != 0 && runLength != p->runLen; p = p->right);////获取字符的指针找到相应树结点
        if (p == 0)
            error("A problem in encode()");
        if (p->codewordLen < maxPack - packCnt)     // if enough room in
        {
            //将pack左移codewordLen位,来将codeword加到pack末尾
            pack = (pack << p->codewordLen) | p->codeword; // pack to store new
            //改变当前pack长度
            packCnt += p->codewordLen;             // codeword, shift its
        }                                           // content to the left
        // and attach new codeword;
        else                                        // otherwise move
        {
            //如果pack空间不足
            bitsLeft = maxPack - packCnt;          // pack's content to
            pack <<= bitsLeft;                     // the left by the
            if (bitsLeft != p->codewordLen)        // number of left
            {
                hold = p->codeword;               // spaces and if new
                hold >>= p->codewordLen - bitsLeft;// codeword is longer than
                pack |= hold;                     // room left, transfer
            }                                      // only as many bits as
            // can be fitted in pack;
            else pack |= p->codeword;              // if new codeword
            // exactly fits in
            // pack, transfer it;
            output(pack);                          // output pack as
            cout<<pack<<endl;
            // four chars;
            if (bitsLeft != p->codewordLen)        // transfer
            {
                pack = p->codeword;               // unprocessed bits
                packCnt = maxPack - (p->codewordLen - bitsLeft); // of new
                packCnt = p->codewordLen - bitsLeft; // codeword to pack;
            }
            else packCnt = 0;
        }
                  ch = ch2;
    }
    if (packCnt != 0)
    {
        pack <<= maxPack - packCnt; // transfer leftover codewords and some 0's
        output(pack);
    }
}

void HuffmanCoding::compress(char *inFileName, ifstream& fIn)
{
    char outFileName[30];
    strcpy(outFileName,inFileName);
    if (strchr(outFileName,'.'))               // if there is an extension
        strcpy(strchr(outFileName,'.')+1,"z");// overwrite it with 'z'
    else strcat(outFileName,".z");             // else add extension '.z';
    fOut.open(outFileName,ios::out);//产生压缩文件
//use this line on a PC:
//    fOut.open(outFileName,ios::out|ios::binary);
    garnerData(fIn);
    outputFrequencies(fIn);
    createHuffmanTree();
    createCodewords(HuffmanTree,0,0);
    for (int i = 0; i <= ASCII; i++)
        chars[i] = 0;
    transformTreeToArrayOfLists(HuffmanTree);
     for ( i = 0; i <= ASCII; i++)
     {
         if(chars[i]!= 0)
         cout<<i<<" "<<chars[i]<<endl;
     }

    fIn.clear();      // clear especially the eof flag;
    fIn.seekg(0,ios::beg);
    encode(fIn);
    fIn.clear();      // clear especially the eof flag;
    cout.precision(2);
    cout<<"tellg"<<fIn.tellg()<<endl;
    cout<<"tellp"<<fOut.tellp()<<endl;

    cout << fixed << "Compression rate = " <<
         100.0*(fIn.tellg()-fOut.tellp())/fIn.tellg() << "%\n"
         << "Compression rate without table = " <<
         100.0*(fIn.tellg()-fOut.tellp()+long(data.size()*(2+4)))/fIn.tellg() << endl;
    fOut.close();
}

void HuffmanCoding::decode(ifstream& fIn)
{
    unsigned long chars;
    char ch, bitCnt = 1, mask = 1;
    mask <<= bits - 1;        // change 00000001 to 100000000;
    for (chars = 0, fIn.get(ch); !fIn.eof() && chars < charCnt; )
    {
        for (HuffmanNode *p = HuffmanTree; ; )
        {
            if (p->left == 0 && p->right == 0)
            {
                for (int j = 0; j < p->runLen; j++)
                    fOut.put(p->symbol);
                chars += p->runLen;
                break;
            }
            else if ((ch & mask) == 0)
                p = p->left;
            else p = p->right;
            if (bitCnt++ == bits)   // read next character from fIn
            {
                fIn.get(ch);       // if all bits in ch are checked;
                bitCnt = 1;
            }                       // otherwise move all bits in ch
            else ch <<= 1;          // to the left by one position;
        }
    }
}

void HuffmanCoding::decompress(char *inFileName, ifstream& fIn)
{
    char outFileName[30];
    strcpy(outFileName,inFileName);
    if (strchr(outFileName,'.'))                  // if there is an extension
        strcpy(strchr(outFileName,'.')+1,"dec"); // overwrite it with 'z'
    else strcat(outFileName,".dec");              // else add extension '.z';
    fOut.open(outFileName,ios::out);
    //use this line on a PC:
    //    fOut.open(outFileName,ios::out|ios::binary);
    inputFrequencies(fIn);
    createHuffmanTree();
    createCodewords(HuffmanTree,0,0);
    decode(fIn);
    fOut.close();
}
----------------------------------------
HuffmanEncoder.cpp内容:
------------------------------------------
#include <iostream>
#include <fstream>
#include <cstring>

using namespace std;

#include "HuffmanCoding.h"

int main(int argc, char* argv[]) {
    char fileName[30];
    HuffmanCoding Huffman;
    if (argc != 2) {
        cout << "Enter a file name: ";
        cin  >> fileName;
    }
    else strcpy(fileName,argv[1]);
    //ifstream fIn(fileName);
    //use this line on a PC:
    //ifstream fIn(fileName,ios::binary);
    ifstream fIn;
    fIn.open(fileName);
    if (fIn.fail()) {
        cerr << "Cannot open " << fileName << endl;
        return 0;
    }
    Huffman.compress(fileName,fIn);//压缩文件中的信息
    fIn.close();
    return 0;
}
------------------------------------------
HuffmanDecoder.cpp内容:
-------------------------------------------
#include <iostream>
#include <fstream>
#include <cstring>

using namespace std;

#include "Huffmancoding.h"

int main(int argc, char* argv[]) {
    char fileName[30];
    HuffmanCoding Huffman;
    if (argc != 2) {
        cout << "Enter a file name: ";
        cin  >> fileName;
    }
    else strcpy(fileName,argv[1]);
    //ifstream fIn(fileName);
    //use this line on a PC:
    ifstream fIn(fileName,ios::binary);
    if (fIn.fail()) {
        cerr << "Cannot open " << fileName << endl;
        return 0;
    }
    Huffman.decompress(fileName,fIn);//解码
    fIn.close();
    return 0;
}
-----------------------------------------------
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值