Heap.h:
#pragmaonce
#include<vector>
#include<assert.h>
//仿函数
template<class T>
//小堆
struct Less
{
bool operator()(const T& l, const T& r)
{
return l < r;
}
};
template<class T>
//大堆
struct Greater
{
bool operator()(const T& l, const T& r)
{
return l > r;
}
};
template<class T, class Compare = Less <T>>
class Heap
{
public:
Heap()
{}
Heap( const T * a, size_t size)
{
_a.resize( size);
for (int i = 0; i < size; ++i)
{
_a.push_back( a[i]);
}
//建堆
for (int i = (_a.size() - 2) / 2; i >= 0; i--)
{
_AdjustDown(i, size);
}
}
void Push(const T& x)
{
_a.push_back( x);
_AdjustUp(_a.size()-1);
}
void Pop()
{
assert(_a.size()> 0);
swap(_a[0], _a[_a.size() - 1]);
_a.pop_back();
_AdjustDown(0, _a.size());
}
T& Top()
{
assert(_a.size()>0);
return _a[0];
}
int Size()
{
return _a.size();
}
bool Empty()
{
return _a.empty();
}
//向下调整
void _AdjustDown(size_t parent, size_t size )
{
size_t child = 2 * parent + 1;//子节点
while (child < size )
{
Compare _com;
if (child + 1 < size
&& _com(_a[child + 1], _a[child]))//右子树存在且大于左子树
{
++child;
}
//大堆:父节点小于子节点,则交换
//小堆:父节点大于子节点,则交换
if (_com(_a[child], _a[parent ]))
{
swap(_a[child], _a[ parent]);
parent = child;//向下调整
child = 2 * parent + 1;
}
else//已有序
{
break;
}
}
}
//向上调整
void _AdjustUp(int child)
{
size_t parent = (child - 1) / 2;//父节点
while (child > 0)
{
Compare _com;
//大堆:父节点小于子节点,则交换
//小堆:父节点大于子节点,则交换
if (_com(_a[child ], _a[parent]))
{
swap(_a[ child], _a[parent]);
child = parent;//向上调整
parent = ( child - 1) / 2;
}
else
{
break;
}
}
}
vector<T > _a;
};
#####################################################################################
Huffman.h:
#pragma once
#include<assert.h>
#include"Heap.h"
template<class T>
struct HuffmanNode
{
HuffmanNode<T >* _left;
HuffmanNode<T >* _right;
T _weight;
HuffmanNode( const T & w)
:_left( NULL)
,_right( NULL)
, _weight( w)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanNode <T> Node;
public:
HuffmanTree()
:_root( NULL)
{}
HuffmanTree( const T * a, size_t size,const T& invalid)
{
_root = CreateTree( a, size , invalid);
}
Node* CreateTree(const T* a, size_t size,const T& invalid)
{
//仿函数
struct Compare
{
bool operator()(const Node* l,const Node* r)
{
return l ->_weight < r->_weight;
}
};
assert(a);
//小堆
Heap<Node *, Compare> minHeap;
for (size_t i = 0; i < size; ++i)
{
if (a [i] != invalid)
{
minHeap.Push( new Node (a[i]));
}
}
//生成HuffmanTree
while (minHeap.Size()>1)
{
//选出当前最小的两个节点
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
//权值相加,生成父节点
Node* parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
minHeap.Push(parent);
}
return minHeap.Top();
}
//获得根节点
Node* GetRootNode()
{
return _root;
}
protected:
Node* _root;
};
#########################################################################################
FileCompress.h:
#include<string>
#include"Huffman.h"
#include<assert.h>
typedef unsigned long long LongType ;
struct CharInfo
{
unsigned char _ch;//字符
LongType _count; //出现次数
string _code; //Huffman code
CharInfo( const LongType count=0)
:_ch(0)
, _count( count)
{}
CharInfo operator+(const CharInfo& fi)const
{
return CharInfo (_count + fi._count);
}
bool operator!=(const CharInfo fi)const
{
return _count != fi ._count;
}
bool operator<(const CharInfo& fi)const
{
return _count < fi ._count;
}
};
template<class T>
class FileCompress
{
public:
FileCompress()
{
for (int i = 0; i < 256; i++)
{
_infos[i]._ch = i;
_infos[i]._count = 0;
}
}
public:
//压缩
void Compress(const char* filename)
{
//统计字符个数
assert(filename);
FILE* fOut = fopen(filename , "rb");
assert(fOut);//判断是否读取成功
char ch = fgetc(fOut);//unsigned char ch=fgetc(fOut);不可行
while (!feof(fOut))//EOF值为-1,与无符号char比较会造成死循环
{
_infos[( unsigned char )ch]._count++;
ch = fgetc(fOut);
}
//构建HuffmanTree
CharInfo invalid(0);
HuffmanTree<CharInfo > tree(_infos, 256, invalid);
//生成Huffman code
string code;//编码
GenerateHuffmanCode(tree.GetRootNode(), code);
//读取源文件,压缩
string compress = filename ;
compress += ".compress";//加上压缩文件后缀
FILE* fIn = fopen(compress.c_str(), "wb" );
assert(fIn);
fseek(fOut, 0, SEEK_SET);//文件指针
char value = 0;
int pos = 0;
ch = fgetc(fOut); //读取字符
while (!feof(fOut))
{
//取出Huffman code
string& code = _infos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); ++i)
{
value <<= 1;
//将字符串转化成对应的码,存入fIn压缩文件中
if (code[i] == '1' )
{
value |= 1;
}
//8个字节
if (++pos == 8)
{
fputc(value, fIn);
value = 0;
pos = 0;
}
}
ch = fgetc(fOut);
}
if (pos)//不足8为,后补0
{
value <<= (8 - pos);
fputc(value, fIn);
}
//配置文件
string config = filename ;
config += ".config";//配置文件后缀
FILE* fConfig = fopen(config.c_str(), "wb");
assert(fConfig);
char countStr[128];//存储转化后的编码
string str;
for (size_t i = 0; i < 256; ++i)
{
//记录字符出现次数
if (_infos[i]._count>0)
{
str += _infos[i]._ch;
str += ',';
//将字符记录转换为10进制,存在countStr中
_itoa(_infos[i]._count, countStr, 10);
str += countStr;
str += '\n';
}
//将字符串写入配置文件
fputs(str.c_str(), fConfig);
str.clear();
}
//关闭文件
fclose(fOut);
fclose(fIn);
fclose(fConfig);
}
//解压缩
void UnCompress(const char* filename)
{
//配置文件
string config = filename ;
config += ".config";
FILE* fConfig = fopen(config.c_str(), "rb");
assert(fConfig);
string tmp;
while (ReadLine(fConfig, tmp))
{
if (!tmp.empty())
{
//收集字符
_infos[( unsigned char )tmp[0]]._count = atoi(tmp.substr(2).c_str());
tmp.clear();
}
else
{
tmp += '\n';
}
}
//重建Huffman树
CharInfo invalid(0);
HuffmanTree<CharInfo >ht(_infos, 256, invalid);
//读压缩文件
string compress = filename ;
compress += ".compress";
FILE* fOut = fopen(compress.c_str(), "rb");
assert(fOut);
//生成解压文件
string UnCompress = filename ;
UnCompress += ".uncompress";
FILE* fIn = fopen(UnCompress.c_str(), "wb");
assert(fIn);
unsigned char ch = fgetc(fOut);
HuffmanNode<CharInfo >* root = ht.GetRootNode();
HuffmanNode<CharInfo >* cur = root;
int pos = 8;
LongType charCount = root->_weight._count;
while (!feof(fOut))
{
if (ch & 1 << (pos-1))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch, fIn);
cur = root;
if (--charCount == 0)
{
break;
}
}
--pos;
if ( pos == 0)
{
pos = 8;
ch = fgetc(fOut);
}
}
fclose(fConfig);
fclose(fIn);
fclose(fOut);
}
//生成Huffman编码
void GenerateHuffmanCode(HuffmanNode <CharInfo>* root,string code)
{
if (root == NULL)
{
return;
}
//递归左右节点,生成Huffman Code
if (root ->_left)
{
GenerateHuffmanCode( root->_left, code + '0');
}
if (root ->_right)
{
GenerateHuffmanCode( root->_right, code + '1');
}
if (root ->_left == NULL&& root->_right == NULL )
{
//将得到的叶结点编码存入数组中节点位置的Huffman code中
_infos[ root->_weight._ch]._code = code ;
}
//cout << _infos[root->_weight._ch]._ch << code << endl;
}
//按行读
bool ReadLine(FILE * fConfig, string& tmp )
{
assert(fConfig);
char ch = fgetc(fConfig );
if (feof(fConfig ))
{
return false ;
}
while (ch != '\n' )
{
tmp += ch;
ch = fgetc( fConfig);
}
return true ;
}
protected:
CharInfo _infos[256];
};
######################################################################################
test.cpp:
#include<iostream>
using namespace std;
#include"FileCompress.h"
#include"Heap.h"
void test()
{
/*FileCompress<CharInfo> fc;
fc.Compress("input.txt");
cout << "压缩成功" << endl;
fc.UnCompress("input.txt");
cout << "解压成功" << endl;
*/
FileCompress<CharInfo > fcb;
fcb.Compress( "Input.BIG");
cout << "压缩成功" << endl;
fcb.UnCompress( "Input.BIG");
cout << "解压成功" << endl;
}
int main()
{
test();
//Heap<int> hp;
//hp.Push(20);
//hp.Push(30);
//hp.Push(10);
//hp.Push(50);
system( "pause");
return 0;
}
转载于:https://blog.51cto.com/hah11yn/1846014