先回顾一下哈夫曼树
huffman树即最优二叉树,是加权路径长度最短的二叉树。哈夫曼树的树使用贪心算法。
每次选择该集合中权值最小的两个作为叶子结点,父亲节点的权值为叶子节点权值之和。然后又将其父亲重新放进此集合里。重复前面的做法,直到完成哈夫曼树的建立。
每次都要在集合中找出2个权值最小的。这里我们就可以建立一个小堆,每次找出最小的时候只需要向上调整堆就行了。
那么文件哈夫曼树怎么实现文件压缩的呢。
1.统计文件中字符出现的个数
2.每个字符的个数作为权值构建哈夫曼树,这样每个字符对应的权值为叶子结点,然后获取每个叶子节点的哈夫曼编码。
3.进行压缩,哈夫曼编码满8位写进压缩文件。
4.解压缩。
代码实现
//heap.h
#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include <iostream>
#include<vector>
using namespace std;
template<class T>
struct Less
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T,class Compare=Less<T>>
class Heap
{
public:
Heap()
{}
Heap(T* a, size_t n)
{
_a.reserve(n);
for (size_t i = 0; i < n; i++)
{
_a.push_back(a[i]);
}
for (int i = (_a.size() - 2) / 2; i >= 0; i--)
{
_AdjustDown(i);
}
}
void Push(const T& x)
{
_a.push_back(x);
_AdjustUp(_a.size() - 1);
}
void Pop()
{
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
_AdjustDown(0);
}
T& Top()
{
return _a[0];
}
size_t Size()
{
return _a.size();
}
protected:
void _AdjustDown(int root)
{
Compare compare;
int parent = root;
int child = parent * 2 + 1;
while (child < (int)_a.size())
{
if (child <(int) _a.size() - 1 && compare(_a[child + 1] ,_a[child]))
{
child++;
}
if (compare(_a[child] ,_a[parent]))
{
swap(_a[parent], _a[child]);
parent = child;
child = parent * 2 + 1;
}
else
{
break;
}
}
}
void _AdjustUp(int root)
{
Compare compare;
int child = root;
int parent = (child - 1) >> 1;
while (child > 0)
{
if (compare(_a[child], _a[parent]))
{
swap(_a[parent], _a[child]);
child = parent;
parent = (child - 1) >> 1;
}
else
{
break;
}
}
}
protected:
vector<T> _a;
};
void TestHeap()
{
int a[] = { 1, 3, 9, 3, 54, 87, 21, 15, 8 };
int n = sizeof(a) / sizeof(a[0]);
Heap<int> hp(a, n);
hp.Push(2);
}
//huffman.h
#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include"heap.h"
template<class T>
struct HuffmanTreeNode
{
T _w;
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
HuffmanTreeNode<T>* _parent;
HuffmanTreeNode(const T& x)
:_w(x)
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree()
:_root(NULL)
{}
~HuffmanTree()
{
_Destory(_root);
_root = NULL;
}
HuffmanTree(T* a, size_t n,const T& invalid)
{
struct Compare
{
bool operator()(Node* l, Node* r)
{
return l->_w < r->_w;
}
};
Heap<Node* ,Compare> minHeap;
for (size_t i = 0; i < n; i++)
{
if (a[i] != invalid )
{
minHeap.Push(new Node(a[i]));
}
}
//贪心算法
while (minHeap.Size()>1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_w + right->_w);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent= parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node*& GetRoot()
{
return _root;
}
public:
void _Destory(Node* root)
{
if (root == NULL)
{
return;
}
_Destory(root->_left);
_Destory(root->_right);
delete root;
}
protected:
Node* _root;
};
/*void TestHuffManTree()
{
int a[] = { 0, 4, 2, 1, 3, 0};
int n = sizeof(a) / sizeof(a[0]);
HuffmanTree<int> t(a, n, 0);
cout << endl;
}*/
//FileCompress.h
#define _CRT_SECURE_NO_WARNINGS 1
#include"heap.h"
#include"HuffManTree.h"
#include<string>
#include<assert.h>
#include<algorithm>
typedef long long longtype;
struct CharInfo
{
longtype _count;//字符出现的次数
string _code;//字符的哈夫曼编码
char _ch;//字符
CharInfo(const longtype x=0)
:_count(x)
{}
bool operator!=(const CharInfo& info)
{
return _count != info._count;
}
CharInfo operator+(const CharInfo& info)
{
return CharInfo(_count + info._count);
}
bool operator <(const CharInfo& info)
{
return _count < info._count;
}
};
struct CountInfo
{
char _ch;
longtype _count;
CountInfo()
:_count(0)
{}
};
class FileCompess
{
public:
FileCompess()
{
for (int i = 0; i < 256; i++)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
}
}
void Compess(const char* filename)
{
//统计字符个数
assert(filename);
FILE* fout = fopen(filename, "rb");
assert(fout);
char ch = fgetc(fout);
while (!feof(fout))
{
_infos[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
//构建哈夫曼树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_infos, 256, invalid);
//获取哈夫曼编码
GetHuffmanCode(tree.GetRoot());
//压缩
string compressfile = filename;
compressfile += ".huffman";
FILE* fin = fopen(compressfile.c_str(), "wb");
assert(fin);
fseek(fout, 0, SEEK_SET);//将指针偏移到文件开始
ch = fgetc(fout);
int pos = 0;
char value = 0;
while (!feof(fout))
{
string& code = _infos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); i++)
{
value <<= 1;
if (code[i] == '1')
{
value |= 1;
}
else
{
value |= 0;
}
pos++;
if (pos == 8)
{
fputc(value, fin);
pos = 0;
value = 0;
}
}
ch = fgetc(fout);
}
if (pos)
{
value <<= (8 - pos);
fputc(value, fin);
}
//写配置文件为解压缩
string configfile = filename;
configfile += ".config";
FILE* fcon = fopen(configfile.c_str(), "wb");
assert(fcon);
CountInfo info;
for (size_t i = 0; i < 256; i++)
{
if (_infos[i]._count)
{
info._ch = _infos[i]._ch;
info._count = _infos[i]._count;
fwrite(&info, sizeof(info), 1, fcon);
}
}
CountInfo info2;
info2._count = -1;
fwrite(&info2, sizeof(info2), 1, fcon);
fclose(fout);
fclose(fin);
fclose(fcon);
}
void UnCompess(const char* filename)
{
//读配置文件
string configfile = filename;
configfile += ".config";
FILE* confile = fopen(configfile.c_str(), "rb");
CountInfo info;
while (1)
{
fread(&info, sizeof(info), 1, confile);
if (info._count == -1)
{
break;
}
_infos[(unsigned char)info._ch]._ch = info._ch;
_infos[(unsigned char)info._ch]._count = info._count;
}
string uncompressfile(filename);
size_t pos = uncompressfile.rfind('.');
assert(pos != string::npos);
uncompressfile = uncompressfile.substr(0,pos);
uncompressfile += ".unhaffman";
//还原的文件
FILE* fin = fopen(uncompressfile.c_str(), "wb");
assert(fin);
//压缩的文件
string file = filename;
file += ".huffman";
FILE* fout = fopen(file.c_str(), "rb");
assert(fout);
//重建哈夫曼树
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo>tree(_infos, 256, invalid);
HuffmanTreeNode<CharInfo>* root = tree.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;
longtype count = root->_w._count;
char value = fgetc(fout);
while (!feof(fout))
{
int pos = 7;
char test = 1;
while (pos >= 0)
{
if (value & (test << pos))//找出读出字符的每一位
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL&&cur->_right == NULL)
{
fputc(cur->_w._ch, fin);
cur = root;
count--;
}
pos--;
}
if (count == 0)//循环从这退出,当压缩的时候,最后不够8位就会补0,count是字符的个数,当解压缩的count==0时,说明已解压缩完成,就不会把无效的字符解压说出来。
{
break;
}
value = fgetc(fout);
}
fclose(fout);
fclose(fin);
}
protected:
//构建哈夫曼编码
void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root)
{
if (root == NULL)
{
return;
}
if (root->_left == NULL&&root->_right == NULL)
{
HuffmanTreeNode<CharInfo>* cur = root;
HuffmanTreeNode<CharInfo>* parent = cur->_parent;
string& code = _infos[(unsigned char)root->_w._ch]._code;
while (parent)
{
if (cur == parent->_left)
{
code += '0';
}
else
{
code += '1';
}
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(), code.end());
}
GetHuffmanCode(root->_left);
GetHuffmanCode(root->_right);
}
protected:
CharInfo _infos[256];
};
void test()
{
//TestHeap();
//TestHuffManTree();
FileCompess t;
t.Compess("x.jpg");
//cout << "压缩成功" << endl;
//t.Compess("xs.jpg");
//t.UnCompess("xs.jpg");
t.UnCompess("x.jpg");
cout << "解压成功" << endl;
}
注:在做的时候哈夫曼树一定先要测试好,要不然就是你调试的一大坑。还右要用到的仿函数都要一一实现。记得一定要强转成unsigned char,否则程序会崩掉。这里我用的的是二进制读写文件,所以用feof来判断文件结束。写配置文件考虑到精度的丢失,用了结构体写进读出。