Huffman压缩解压器

//数据类型:

MAX_FILE_SIZE 0x3f3f3f  // 最大文件长度
MAX_NODE_SIZE 300   //最大节点数
priority_queue<int , vector<int>,  Hufode> N //堆排序
char node[MAX_NODE_SIZE + 1];   //节点元素
char nodeNum[MAX_NODE_SIZE + 1];  //节点元素值
Int tbuf[MAX_NODE_SIZE + 1];    //树缓冲区
char code[MAX_NODE_SIZE][]; //节点路径
char filename[30];  //文件名
struct HufNode{     //节点信息
    int weight, parents, lchild, rchild;
    HufNode(){}; //自定义节点操作,用于优先队列
}HUFTREE[MAX_NODE_SIZE];

//功能描述:

COMPRESS:
Creat_Node(fbuf, flen);     //得到节点权值
            Creat_Tree(count);      //构建树
            Code_Node(count);           //构建码表
            Compress(count);                //创建压缩文件
EXTRACT:
Restore_Tree(char *t, char v, int &pos) //还原树
Extract(int zero, int len, char *str)       //解压文件

这次课程设计我选择的题目是哈弗曼编/译器,本来我觉得哈弗曼进行压缩解码并不是很难,只要你理解了哈夫曼算法,但在实际编写中,我发现并不是这样的。
首先,对任意文件进行压缩,要理解到任何类型的文件实质上都是可以进行压缩的。一开始为考虑这个钻了牛角尖,想着去统一用wchar_t保存或是转为Unicode等等什么的。但其实不必那么复杂,因为汉字(不仅仅汉字, 任何字符都是这样的)都是以字节为单位的,由多个字节组成的,将其分开对待,因为最终解压时恢复原串还是按照原有顺序组装,所以和纯英文文件的实现没有什 么区别);那么,实质上,所构建的哈夫曼树最只有256个叶子节点.。这样程序应该可以针对图片,各种内容的文本文件等进行压缩。还原后应该与源文件是相同的。
其次,在进行树的构造的时候,因为要附加开辟一个大于源文件3,4倍的空间。我是在堆中分配空间,对程序中资源分配不是很好,这样就对压缩的目标文件有一定的要求,文件不能太大,否则会造成内存溢出。对于小文件来说,压缩有些得不偿失,因为在压缩时必须将构建的树的码表同时存储进去,文件太小但树可能是文件的好几倍大。不同内容的文件压缩率也不同,相对来说字符类型压缩比率高一些,汉子类型压缩比率比较低。
压缩中,对于一个文件,必须要遍历一次利用统排得到每个节点的权值。相对于比较耗费资源的是构建树的时候,要每次选择其中最小的两个操作,后放回新数据再进行排序。这是十分耗费的,感觉相对来说利用利用优先队列构建大根堆是比较好的选择,堆排的时间复杂度是而二维阶,对数据要求没有那么高。因为是所得到的压缩码都是01字符串,尽可能的减少存储空间,将所得到的01码进行或运算,使得其八位构成一个字节,不够八位的01码在其后补0,并记录个数,将这些写入压缩文件中,码表应放在压缩文件首。
解压相对来说比较简单,用文件中有的码表还原树还原文件。感觉代码还可以优化很多,而且运行时不太稳定,通过这次课程设计收获了很多。

参考资料
http://blog.youkuaiyun.com/rushkid02/article/details/7687125
http://blog.youkuaiyun.com/tsdl2009/article/details/6514057
http://blog.youkuaiyun.com/ns_code/article/details/20227303
http://blog.youkuaiyun.com/to_be_better/article/details/50431352

但是因为我是用数组开的,所以不压缩大文件,图片也不可以。��

压缩

#include <iostream>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;

#define MAX_FILE_SIZE 1000000
#define MAX_NODE_SIZE 300

unsigned char *node = new unsigned char[MAX_NODE_SIZE + 1];
int *nodeNum = new int[MAX_FILE_SIZE + 1];
int *tbuf = new int[MAX_NODE_SIZE + 1];
char *code[MAX_NODE_SIZE];
char filename[30];

struct HufNode
{
    int weight, parents, lchild, rchild;
    HufNode(){};
}HUFTREE[MAX_NODE_SIZE];

struct NodeGreat{
    bool operator()(int a, int b){
        return HUFTREE[a].weight > HUFTREE[b].weight;
    }
};


//compress, return boolean, leaves count
bool Compress(int count, long flen, unsigned char *fbuf){


    ofstream fout(filename, ios::out);
    if(!fout.good())
        throw "open temp file filed";
            //write node count
    fout << count << endl;
            //write code list in file.temp, node and code
    for (int i = 1; i < count; ++i)
    {
        fout << node[i] << endl;
        fout << code[i] << endl;
    }

            //set temp file pos
    long  tempPos = 0;
            //set a temp file buffer
    char *tempBuf = new char[MAX_FILE_SIZE];
            // %
    int bit = 7;

    for(long i = 0; i < flen; i++)
    {
        int k = 0;
                //fbuf[i]:file content; tbuf[fbuf[i]]:node; code[tbuf[fbuf[i]]]:node code;
        while(code[nodeNum[fbuf[i]]][k] != '\0')
        {
                    //turn char to a bit
            tempBuf[tempPos] |= (code[nodeNum[fbuf[i]]][k]-'0') << bit--;
                    //every eight bit for a Byte,
            if(bit < 0)
            {
                bit = 7;
                tempPos++;
            }
            ++k;
        }
    }
            //note the number of zero
    int zero = 0;
    if(bit != 7)
        zero = bit % 7 + 1, tempPos++;
            //write 0 and space in temp file;
    fout << zero << " " << tempPos << endl;
            //write all the tempbuf in the temp file
            //cout << tempBuf << endl;
    fout.write(tempBuf, sizeof(char)*tempPos);

//    cout << zero <<tempBuf << endl;

    delete[] tempBuf;
    fout.close();
}

//get the Node code with 0 and 1, leaves count
void Code_Node(int count){
            //temp path
    char *path = new char[count];
    path[count-1] = '\0';

            //get leaves path in Code
    for (int i = 1; i < count; ++i)
    {
        int child = i;
        int parent = HUFTREE[child].parents;
        int num = count-1;
                //from leaf to root
        while(parent){
                    //path wont long as leaves count;
            num--;
            if(HUFTREE[parent].rchild == child){
                path[num] = '0';
            }
            else if(HUFTREE[parent].lchild == child){
                path[num] = '1';
            }
            else{
                throw "code tree error";
            }

            child = parent;
            parent = HUFTREE[parent].parents;
        }

                //copy to Code
        code[i] = new char[count - num];
        strcpy(code[i], &path[num]);
        // << code[i] << endl;
    }
}



//build tree
void Creat_Tree(int count){

    printf("%ld\n", count);
            //priority queues save the index
    priority_queue<int, vector<int>, NodeGreat> N;
    for (int i = 1; i < count; ++i)
    {
        N.push(i);
    }

    printf("he;;op");
    int len = count;
    for (int i = 0; i < count - 2; i++)
    {
                //get the minimum two;
        int lc, rc;
        lc = N.top();
        N.pop();
        rc = N.top();
        N.pop();
                //make node
        HUFTREE[len].weight = HUFTREE[lc].weight + HUFTREE[rc].weight;
        HUFTREE[len].lchild = lc;
        HUFTREE[len].rchild = rc;
        HUFTREE[lc].parents = HUFTREE[rc].parents = len;

        N.push(len++);
    }

            //set the root parents;
    printf("he;;op");
    int root;
    root = N.top();
    HUFTREE[root].parents = 0;
}


//get tree leaves, return leaves count;
int Creat_Node(unsigned char *fbuf, long flen){

            //get tree node value;
    for (int i = 0; i < flen; ++i)
    {
        tbuf[fbuf[i]]++;
    }
            //tree leaves
            //
    int count = 1;
    for (int i = 0; i < flen; ++i)
    {
        if(tbuf[fbuf[i]]){

            HUFTREE[count].weight = tbuf[fbuf[i]];
            HUFTREE[count].lchild = 0;
            HUFTREE[count].rchild = 0;
            HUFTREE[count].parents = 0;
            tbuf[fbuf[i]] = 0;

                    //note the node number
            if(!nodeNum[fbuf[i]]){
                nodeNum[fbuf[i]] = count;
            }
                    // note the node
            node[count] = fbuf[i];
            ++count;
            //cout << fbuf[i] << endl;
        }
    }

    //cout << count << endl;
            //make tbuf values;
    for (int i = 0; i < flen; ++i)
    {
        tbuf[fbuf[i]]++;
    }
    return count;
}


int main(int argc, char* argv[]){


            //copy filename
    sprintf(filename, "%s.temp", argv[1]);

    ifstream fin;

            // file check, put pointer on the file end
    fin.open(argv[1], ios::ate | ios::in);
    if(!fin.good())
        throw "open file filed";

            //get file length, put pointer on the file begin
    long flen = fin.tellg();
    if(flen > MAX_FILE_SIZE)
        throw "file too big";
    fin.seekg(0, ios::beg);

            //copy file to file buffer
    unsigned char *fbuf = new unsigned char[flen + 1];
    fin.read((char*)fbuf, flen);
    fin.close();
            //file operater done;
    cout << "FILE: " << argv[1] << "  LENHTH: " << flen << "Btye" << endl;

            //creaat huffuman Node, get node count
    int count = 0;

    count = Creat_Node(fbuf, flen);
            // build tree
    Creat_Tree(count);
            // get the node code
    printf("not here\n");
    Code_Node(count);
    printf("not here\n");
            //compress the file
    Compress(count, flen, fbuf);

    printf("not here\n");
    delete[] fbuf;

    return 0;
}

解压

#include <iostream>
#include <cstring>
#include <cstdio>
#include <algorithm>
#include <queue>
#include <fstream>
#include <cstring>
using namespace std;

#define MAX_FILE_SIZE 0x3f3f3f
#define MAX_NODE_SIZE 300

char *fbuf = new char[MAX_FILE_SIZE + 1];
char *ext = new char[MAX_FILE_SIZE + 1];
char filename[30];

struct HufNode
{
    int weight, parents, lchild, rchild;
    char elem;
    HufNode(){};
}HUFTREE[MAX_NODE_SIZE];

struct NodeGreat{
    bool operator()(int a, int b){
        return HUFTREE[a].weight > HUFTREE[b].weight;
    }
};



//creat the tree with code list,
void Restore_Tree(char node, char* code, int &nodePos){
            //the tree root;
    int NODE = 0;
    for (int i = 0; code[i] != '\0' ; ++i)
    {
        if(code[i] == '1'){
            if(HUFTREE[NODE].rchild == 0){
                HUFTREE[NODE].rchild = nodePos++;
            }
            NODE = HUFTREE[NODE].rchild;
        }
        else{
            if(HUFTREE[NODE].lchild == 0){
                HUFTREE[NODE].lchild = nodePos++;
            }
            NODE = HUFTREE[NODE].lchild;
        }
   // num++;
    }
    //cout << "ss" <<NODE << node << endl;
    HUFTREE[NODE].elem = node;
}


void Extract(int zero, long flen, char *fbuf){
            //set file pos, NODE, end 0 count
    long long filePos = 0;
    int NODE = 0;
    int end = 0;
    for(int i = 0; i < flen; i++)
    {
                //temp for temp file element
        char temp = fbuf[i];
                //in case 0
        if(i == flen - 1)
            end = zero;
                //analyse file element
        for(int j = 7; j >= end; j--)
        {
                    //& operator, return 1 or 0
            if((1 << j) & temp)
                        //if 1
                NODE = HUFTREE[NODE].rchild;
            else
                        // if 0
                NODE = HUFTREE[NODE].lchild;
                    // when it is the leaf
            if(HUFTREE[NODE].rchild == 0 && HUFTREE[NODE].lchild == 0)
            {
                //cout << HUFTREE[NODE].elem << endl;
                ext[filePos++] = HUFTREE[NODE].elem;
                NODE = 0;
            }
        }
    }

   // cout<< "len::" << filePos <<endl;
    ofstream out(filename, ios::out);
   // cout << ext << endl;
    out.write(ext, sizeof(char)*(filePos));
    out.close();
}




int main(int argc, char *argv[]){

            //copy filename
    int tl = strlen(argv[1]);
    memcpy(filename, argv[1], tl-1-4);
    sprintf(filename, "%s.out", filename);
            //open compress file
    ifstream fin(argv[1], ios::in);
    if(!fin.good())
        throw "open file error";
            //get node count, node and code;
    int count;
    char *code = new char[count + 1];
    char *node = new char[5];
            //get count
    fin >> count;
    fin.getline(node, count);
            //node pos
    int nodePos = 1;
            //each loop get a node and a code
    for (int i = 1; i < count; ++i)
    {
        fin.getline(node, count);
                //if node is blank, get code again
        if(node[0] == '\0'){
            fin.getline(node, count);
            node[0] = '\n';
        }
        fin.getline(code, count);
                //creat a tree with list code
        Restore_Tree(node[0], code, nodePos);
                //reset code ????
        node[0] = 0;
    }

            //the zero count, temp file length;
    int zero = 0;
    long long flen;
    char* tbuf = new char[flen + 1];
            //get zero count, get temp file length
    fin>>zero;
    fin>>flen;
    //cout << flen << endl;
            //get temp file content
    fin.getline(tbuf, count);
            //copy temp file content to file buffer
    fin.read(fbuf,sizeof(char)*flen);
            //translate the temp file
    Extract(zero, flen, fbuf);
    return 0;
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值