---------------------------------
功能:利用Huffman树进行文本编码解码的实现环境:WinXP,VC6.0
输入:C:\\in.txt
输出:C:\\out.dec
注:控制台输出的调试信息还在,只要将相应的cout<<""<<endl;去掉即可
----------------------------------
步骤:
1.创建Win32 Console Application 工程HuffmanEncoder,添加头文件Huffmancoding.h和工程文件HuffmanEncoder.cpp
2.创建Win32 Console Application 工程HuffmanDecoder,添加头文件Huffmancoding.h和工程文件HuffmanDecoder.cpp
3.运行HuffmanEncoder,输入要压缩文件路径,这里用C:\\in.txt,产生压缩文件in.z
4.运行HuffmanDECoder,输入要解压文件路径,这里用C:\\in.z,产生解压文件in.dec
5.文件路径可以随自己随便输入,产生的压缩解压缩文件与源文件在同一路径下
-----------------------------------
Huffmancoding.h内容:
-----------------------------------
#include <vector>
#include <algorithm>
class HuffmanNode
{
public:
char symbol;
unsigned long codeword, freq;
unsigned int runLen, codewordLen;
HuffmanNode *left, *right;
HuffmanNode()
{
left = right = 0;
}
HuffmanNode(char s, unsigned long f, unsigned int r,
HuffmanNode *lt = 0, HuffmanNode *rt = 0)
{
symbol = s;
freq = f;
runLen = r;
left = lt;
right = rt;
}
};
class ListNode
{
public:
HuffmanNode *tree;
ListNode *next, *prev;
ListNode()
{
next = prev = 0;
}
ListNode(ListNode *p, ListNode *n)
{
prev = p;
next = n;
}
};
class DataRec
{
public:
char symbol;
unsigned int runLen;
unsigned long freq;
DataRec()
{
}
bool operator== (const DataRec& dr) const // used by find();
{
return symbol == dr.symbol && runLen == dr.runLen;
}
bool operator< (const DataRec& dr) const // used by sort();
{
return freq < dr.freq;
}
};
class HuffmanCoding
{
public:
HuffmanCoding() : mask(0xff), bytes(4), bits(8), ASCII(256)
{
chars = new HuffmanNode*[ASCII+1];
}
void compress(char*,ifstream&);
void decompress(char*,ifstream&);
private:
const unsigned int bytes, bits, ASCII;
unsigned int dataSize;
const unsigned long mask;
unsigned long charCnt;
ofstream fOut;
HuffmanNode *HuffmanTree, **chars;
vector<DataRec> data;
void error(char *s)
{
cerr << s << endl;
exit(1);
}
void output(unsigned long pack);
void garnerData(ifstream&);
void outputFrequencies(ifstream&);
void read2ByteNum(unsigned int&,ifstream&);
void read4ByteNum(unsigned long&,ifstream&);
void inputFrequencies(ifstream&);
void createHuffmanTree();
void createCodewords(HuffmanNode*,unsigned long,int);
void transformTreeToArrayOfLists(HuffmanNode*);
void encode(ifstream&);
void decode(ifstream&);
};
void HuffmanCoding::output(unsigned long pack)
{
char *s = new char[bytes];
int i;
for (i = bytes - 1; i >= 0; i--)
{
s[i] = pack & mask;
pack >>= bits;
}
for (i = 0; i < bytes; i++)
fOut.put(s[i]);
}
void HuffmanCoding::garnerData(ifstream& fIn)
{
char ch, ch2;
DataRec r;
vector<DataRec>::iterator i;//定义迭代器
r.freq = 1;
for(i = data.begin();i != data.end();i++)
{
cout<<i->symbol<<" "<<i->runLen<<" "<<i->freq<<endl;
}
for (fIn.get(ch); !fIn.eof(); ch = ch2)
{
for (r.runLen = 1, fIn.get(ch2); !fIn.eof() && ch2 == ch; r.runLen++)
fIn.get(ch2);
r.symbol = ch;//记录一个run
if ((i = find(data.begin(),data.end(),r)) == data.end()&& r.symbol != '\n')
data.push_back(r);//如果run当前不存在,则压入向量中
else i->freq++;//并将相应的run频率加1
}
sort(data.begin(),data.end());//然后重新排序
cout<<"向量data里的字符与频率"<<endl;
for(i = data.begin();i != data.end();i++)
{
cout<<i->symbol<<" "<<i->runLen<<" "<<i->freq<<endl;
}
}
void HuffmanCoding::outputFrequencies(ifstream& fIn)
{
unsigned long temp4;//无符号整型temp4
char ch = data.size();//记录有多少个run
unsigned int temp2 = data.size();//记录有多少个run
temp2 >>= bits;
fOut.put(char(temp2)).put(ch);
fIn.clear();
output((unsigned long)fIn.tellg());
for (int j = 0; j<data.size(); j++)
{
fOut.put(data[j].symbol);//字符
ch = temp2 = data[j].runLen;
temp2 >>= bits;
fOut.put(char(temp2)).put(ch);
temp4 = data[j].freq;//频率
output(temp4);
}
}
void HuffmanCoding::read2ByteNum(unsigned int& num, ifstream& fIn)
{
num = fIn.get();
num <<= bits;
num |= fIn.get();//赋值位或
}
void HuffmanCoding::read4ByteNum(unsigned long& num, ifstream& fIn)
{
num = (unsigned long) fIn.get();
for (int i = 1; i < 4; i++)
{
num <<= bits;
num |= (unsigned long) fIn.get();
}
}
void HuffmanCoding::inputFrequencies(ifstream& fIn)
{
DataRec r;
read2ByteNum(dataSize,fIn);
read4ByteNum(charCnt,fIn);
data.reserve(dataSize);
for (int j = 0; !fIn.eof() && j <dataSize; j++)
{
r.symbol = fIn.get();
read2ByteNum(r.runLen,fIn);
read4ByteNum(r.freq,fIn);
data.push_back(r);
}
}
void HuffmanCoding::createHuffmanTree()
{
ListNode *p, *newNode, *head, *tail;
unsigned long newFreq;
head = tail = new ListNode; // initialize list pointers;
head->tree = new HuffmanNode(data[0].symbol,data[0].freq,data[0].runLen);
//′′?¨μ¥á′±í
for (int i = 1; i < data.size(); i++) // create the rest of the list;
{
tail->next = new ListNode(tail,0);
tail = tail->next;
tail->tree =
new HuffmanNode(data[i].symbol,data[i].freq,data[i].runLen);
}
int k;
for(k = 0,p = head; p != 0 ; p = p->next)
{
cout<<++k<<p->tree->symbol<<" "<<endl;
}
while (head != tail) // create one Huffman tree;
{
newFreq = head->tree->freq + head->next->tree->freq; // two lowest frequencies
for (p = tail; p != 0 && p->tree->freq > newFreq; p = p->prev);
newNode = new ListNode(p,p->next);
p->next = newNode;
if (p == tail)
tail = newNode;
else newNode->next->prev = newNode;
newNode->tree = new HuffmanNode('\0',newFreq,0,head->tree,head->next->tree);//插入新生成的结点
head = head->next->next;
delete head->prev->prev;
delete head->prev;
head->prev = 0;
}
HuffmanTree = head->tree;
while(head->prev!= 0 || head->next!= 0)
{
cout<<head->tree->symbol<<" "<<head->tree->runLen<<endl;
head = head->prev;
}
delete head;
}
void HuffmanCoding::createCodewords(HuffmanNode *p, unsigned long codeword, int level)
{
cout<<p->symbol<<" "<<endl;
if (p->left == 0 && p->right == 0) // if p is a leaf,
{
p->codeword = codeword;
cout<<codeword<<endl; // store codeword
p->codewordLen = level; // and its lenght,
}
else // otherwise add 0
{
createCodewords(p->left, codeword<<1, level+1); // for left branch
createCodewords(p->right,(codeword<<1)+1,level+1); // and 1 for right;
}
}
void HuffmanCoding::transformTreeToArrayOfLists(HuffmanNode *p)
{
if (p->left == 0 && p->right == 0) // if p is a leaf,
{
//由symbol的ASCII码来确定位置
p->right = chars[(char)p->symbol]; // include it in
chars[(unsigned char)p->symbol] = p; // a list associated
} // with symbol found in p;
else
{
transformTreeToArrayOfLists(p->left);
transformTreeToArrayOfLists(p->right);
}
}
void HuffmanCoding::encode(ifstream& fIn)
{
unsigned long packCnt = 0, hold, maxPack = bytes*bits, pack = 0;//32位的pack
char ch, ch2;
int bitsLeft, runLength;
HuffmanNode *p;
for (fIn.get(ch); !fIn.eof()&&ch !='\n'; )
{
for (runLength = 1, fIn.get(ch2); !fIn.eof() && ch2 == ch; runLength++)
fIn.get(ch2);
for (p = chars[(unsigned char) ch];p != 0 && runLength != p->runLen; p = p->right);////获取字符的指针找到相应树结点
if (p == 0)
error("A problem in encode()");
if (p->codewordLen < maxPack - packCnt) // if enough room in
{
//将pack左移codewordLen位,来将codeword加到pack末尾
pack = (pack << p->codewordLen) | p->codeword; // pack to store new
//改变当前pack长度
packCnt += p->codewordLen; // codeword, shift its
} // content to the left
// and attach new codeword;
else // otherwise move
{
//如果pack空间不足
bitsLeft = maxPack - packCnt; // pack's content to
pack <<= bitsLeft; // the left by the
if (bitsLeft != p->codewordLen) // number of left
{
hold = p->codeword; // spaces and if new
hold >>= p->codewordLen - bitsLeft;// codeword is longer than
pack |= hold; // room left, transfer
} // only as many bits as
// can be fitted in pack;
else pack |= p->codeword; // if new codeword
// exactly fits in
// pack, transfer it;
output(pack); // output pack as
cout<<pack<<endl;
// four chars;
if (bitsLeft != p->codewordLen) // transfer
{
pack = p->codeword; // unprocessed bits
packCnt = maxPack - (p->codewordLen - bitsLeft); // of new
packCnt = p->codewordLen - bitsLeft; // codeword to pack;
}
else packCnt = 0;
}
ch = ch2;
}
if (packCnt != 0)
{
pack <<= maxPack - packCnt; // transfer leftover codewords and some 0's
output(pack);
}
}
void HuffmanCoding::compress(char *inFileName, ifstream& fIn)
{
char outFileName[30];
strcpy(outFileName,inFileName);
if (strchr(outFileName,'.')) // if there is an extension
strcpy(strchr(outFileName,'.')+1,"z");// overwrite it with 'z'
else strcat(outFileName,".z"); // else add extension '.z';
fOut.open(outFileName,ios::out);//产生压缩文件
//use this line on a PC:
// fOut.open(outFileName,ios::out|ios::binary);
garnerData(fIn);
outputFrequencies(fIn);
createHuffmanTree();
createCodewords(HuffmanTree,0,0);
for (int i = 0; i <= ASCII; i++)
chars[i] = 0;
transformTreeToArrayOfLists(HuffmanTree);
for ( i = 0; i <= ASCII; i++)
{
if(chars[i]!= 0)
cout<<i<<" "<<chars[i]<<endl;
}
fIn.clear(); // clear especially the eof flag;
fIn.seekg(0,ios::beg);
encode(fIn);
fIn.clear(); // clear especially the eof flag;
cout.precision(2);
cout<<"tellg"<<fIn.tellg()<<endl;
cout<<"tellp"<<fOut.tellp()<<endl;
cout << fixed << "Compression rate = " <<
100.0*(fIn.tellg()-fOut.tellp())/fIn.tellg() << "%\n"
<< "Compression rate without table = " <<
100.0*(fIn.tellg()-fOut.tellp()+long(data.size()*(2+4)))/fIn.tellg() << endl;
fOut.close();
}
void HuffmanCoding::decode(ifstream& fIn)
{
unsigned long chars;
char ch, bitCnt = 1, mask = 1;
mask <<= bits - 1; // change 00000001 to 100000000;
for (chars = 0, fIn.get(ch); !fIn.eof() && chars < charCnt; )
{
for (HuffmanNode *p = HuffmanTree; ; )
{
if (p->left == 0 && p->right == 0)
{
for (int j = 0; j < p->runLen; j++)
fOut.put(p->symbol);
chars += p->runLen;
break;
}
else if ((ch & mask) == 0)
p = p->left;
else p = p->right;
if (bitCnt++ == bits) // read next character from fIn
{
fIn.get(ch); // if all bits in ch are checked;
bitCnt = 1;
} // otherwise move all bits in ch
else ch <<= 1; // to the left by one position;
}
}
}
void HuffmanCoding::decompress(char *inFileName, ifstream& fIn)
{
char outFileName[30];
strcpy(outFileName,inFileName);
if (strchr(outFileName,'.')) // if there is an extension
strcpy(strchr(outFileName,'.')+1,"dec"); // overwrite it with 'z'
else strcat(outFileName,".dec"); // else add extension '.z';
fOut.open(outFileName,ios::out);
//use this line on a PC:
// fOut.open(outFileName,ios::out|ios::binary);
inputFrequencies(fIn);
createHuffmanTree();
createCodewords(HuffmanTree,0,0);
decode(fIn);
fOut.close();
}
----------------------------------------
HuffmanEncoder.cpp内容:
------------------------------------------
#include <iostream>
#include <fstream>
#include <cstring>
using namespace std;
#include "HuffmanCoding.h"
int main(int argc, char* argv[]) {
char fileName[30];
HuffmanCoding Huffman;
if (argc != 2) {
cout << "Enter a file name: ";
cin >> fileName;
}
else strcpy(fileName,argv[1]);
//ifstream fIn(fileName);
//use this line on a PC:
//ifstream fIn(fileName,ios::binary);
ifstream fIn;
fIn.open(fileName);
if (fIn.fail()) {
cerr << "Cannot open " << fileName << endl;
return 0;
}
Huffman.compress(fileName,fIn);//压缩文件中的信息
fIn.close();
return 0;
}
------------------------------------------
HuffmanDecoder.cpp内容:
-------------------------------------------
#include <iostream>
#include <fstream>
#include <cstring>
using namespace std;
#include "Huffmancoding.h"
int main(int argc, char* argv[]) {
char fileName[30];
HuffmanCoding Huffman;
if (argc != 2) {
cout << "Enter a file name: ";
cin >> fileName;
}
else strcpy(fileName,argv[1]);
//ifstream fIn(fileName);
//use this line on a PC:
ifstream fIn(fileName,ios::binary);
if (fIn.fail()) {
cerr << "Cannot open " << fileName << endl;
return 0;
}
Huffman.decompress(fileName,fIn);//解码
fIn.close();
return 0;
}
-----------------------------------------------