由于做一道作业,整理了一下——张安春
下面是4个头文件只要包含在需要进行压缩的代码里就行了
比如说入口文件为fin,出口文件为fout
压缩:Compress(fin,bufout);
Huffman_Compression("in","out");
bufin.open("out",ios::binary);
RLE_Compression(bufin,fout);
解压:bufout.open("in",ios::binary);
RLE_Decompression(fin,bufout);
bufout.close();
Huffman_Decompression("in","out");
bufin.open("out",ios::binary);
Decompress(bufin,fout);
头文件定义:
//common.h
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <fstream.h>
FILE *ifile,*ofile;
//the global variable and function in huffman encode
unsigned long file_size;
long frequency_count[1024];
short father[512];
unsigned short code[256], heap_length;
unsigned long compress_charcount,heap[257];
unsigned char code_length[256];
short decomp_tree[512];
unsigned short generate_code_table ();//产生编码树
int Huffman_Compression(char * infilename, char * outfilename);//huffman 压缩
int Huffman_Decompression(char * infilename, char * outfilename);//huffman 解压
void build_initial_heap ();//初始化堆
void build_code_tree ();//产生编码树
void compress_file ();//压缩
void get_frequency_count ();//获得字符频率
void build_decomp_tree ();//解码树
void decompress_file ();//解压
void reheap (unsigned short);
//the base class and variable in lzw encode
class element{
public:
operator unsigned long() const{
return key;
}
element& operator=(unsigned long y){
key = y;
return *this;
}
public:
int code;
unsigned long key;
};
class Delement{
public:
int prefix;
unsigned char suffix;
};
const D = 4099,
codes = 4096,
ByteSize = 8,
excess = 4,
alpha = 256,
mask1 = 255,
mask2 = 15,
mask = 15;
int size,
LeftOver,
status = 0;
Delement ht[codes];
unsigned char s[codes];
void Compress(ifstream in,ofstream out);
void Decompress(ifstream in,ofstream out);
//the global variable and function in rlc encode
#define MAX_LEN (0x7f)
#define MAX_RUN_HEADER (0xff)
#define MAX_SEQ_HEADER (0x7f)
#define RUN (0x80)
#define SEQ (0x00)
const int max_run_header = 0xff;
const int max_seq_header = 0x7f;
int RLE_Compression(ifstream in,ofstream out);//压缩函数原型
int RLE_Decompression(ifstream in,ofstream out);//解压函数原型
void process(int argc,char* args[]);//process the compress and depress
//lzw.h
#include<iostream.h>
#include<FSTREAM.H>
#include"common.h"
//the implementation of lzw
template <class T,class K>
class SortedChainNode{
// friend SortedChain<T>;
public:
T data;
SortedChainNode<T,K> *link;
};
template<class E, class K>
class SortedChain {
public:
SortedChain() {first = 0;}
~SortedChain(){
};
bool IsEmpty() const {return first == 0;}
int Length() const;
bool Search(const K& k, E& e) const;
SortedChain<E,K>& Delete(const K& k, E& e);
SortedChain<E,K>& Insert(const E& e);
SortedChain<E,K>& DistinctInsert(const E& e);
void Output(ostream& out) const;
private:
SortedChainNode<E,K> *first;
};
template<class E, class K>
bool SortedChain<E,K>::Search(const K& k, E& e)
const
{
SortedChainNode<E,K> *p = first;
while (p && p->data < k)
p = p->link;
// verify match
if (p && p->data == k) // yes, found match
{e = p->data; return true; }
return false; // no match
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::Delete(const K& k, E& e)
{
SortedChainNode<E,K> *p = first,
*tp = 0;
while (p && p->data < k) {
tp = p;
p = p->link;
}
if ( p && p->data == k ) {
e = p->data;
// remove p from chain
if (tp) tp->link = p->link;
else first = p->link; // p is first node
delete p;
return *this;
}
throw BadInput(); // no match
return *this; // Visual C++ needs this line
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::Insert (const E& e)
{
SortedChainNode<E,K> *p = first,
*tp = 0; // trail p
// move tp so that e can be inserted after tp
while (p && p->data < e) {
tp = p;
p = p->link;
}
// setup a new node *q for e
SortedChainNode<E,K> *q = new SortedChainNode<E,K>;
q->data = e;
// insert node just after tp
q->link = p;
if (tp) tp->link = q;
else first = q;
return *this;
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::DistinctInsert (const E& e)
{
SortedChainNode<E,K> *p = first, *tp = 0;
while (p && p->data < e) {
tp = p;
p = p->link;
}
// check if duplicate
if (p && p->data == e) return *this;
// not duplicate, set up node for e
SortedChainNode<E,K> *q = new SortedChainNode<E,K>;
q->data = e;
// insert node just after tp
q->link = p;
if (tp) tp->link = q;
else first = q;
return *this;
}
template<class E, class K>
class ChainHashTable {
public:
ChainHashTable(int divisor = 11)
{
D = divisor;
ht = new SortedChain<E,K> [D];
}
~ChainHashTable()
{
delete [] ht;
}
bool Search(const K& k, E& e)const
{
return ht[k % D].Search(k, e);
}
ChainHashTable<E,K>& Insert(const E& e)
{
ht[e % D].DistinctInsert(e);
return *this;
}
ChainHashTable<E,K>& Delete(const K& k, E& e)
{
ht[k % D].Delete(k, e);
return *this;
}
void Output() const;
public:
int D;
SortedChain<E,K> *ht;
};
/*
void SetFiles(int argc, char* argv[])
{
cout <<"LZW compression and depression Utility"<<endl;
if(argc != 4 )
{
cout <<"/nUsage : LZW -c|d sourcefilename targetfilename/n";
exit(0);
}
in.open(argv[2],ios::binary);
out.open(argv[3],ios::binary);
if (in.fail())
{
cerr << "Cannot open " << argv[2]<< argv[2] << endl;
exit(1);
}
if (out.fail())
{
cerr << "Cannot open " << argv[3]<< argv[3] << endl;
exit(1);
}
if (! strcmp(argv[1], "-c"))
{
cout <<"Copressing ..."<<endl;
Compress();
}
else
{
if(! strcmp(argv[1], "-d"))
{
cout <<"Depressing ..."<<endl;
Decompress();
}
}
}*/
void Output(ofstream out,unsigned long pcode)
{
unsigned char c,d;
if(status){
d = (unsigned char)pcode&mask1;
c = (unsigned char)((LeftOver<<excess)|(pcode>>ByteSize));
out.put(c);
out.put(d);
status = 0;
}
else{
LeftOver = pcode & mask2;
c = (unsigned char)(pcode >> excess);
out.put(c);
status = 1;
}
}
void Compress(ifstream in,ofstream out)
{
ChainHashTable<element, unsigned long> h(D);
element e;
for (int i = 0; i < alpha; i++)
{
e.key = i;
e.code = i;
h.Insert(e);
}
int used = alpha;
unsigned char c;
in.get(c);
unsigned long pcode = c;
if (!in.eof()) {
do {
in.get(c);
if (in.eof()) break;
unsigned long k = (pcode << ByteSize) + c;
if ( h.Search(k, e) ) pcode = e.code;
else
{
Output(out,pcode);
if (used < codes){
e.code = used++;
e.key = (pcode << ByteSize) | c;
h.Insert(e);
}
pcode = c;
}
} while(true);
Output(out,pcode);
if (status) {c = LeftOver << excess; out.put(c);}
}
out.close();
in.close();
}
void Doutput(ofstream out,int code)
{
size = -1;
while (code >= alpha)
{
s[++size] = ht[code].suffix;
code = ht[code].prefix;
}
s[++size] = code;
for (int i = size; i >= 0; i--)
out.put(s[i]);
}
bool GetCode(ifstream in,int& code)
{
unsigned char c, d;
in.get(c);
if (in.eof())
return false;
if (status) code = (LeftOver << ByteSize) | c;
else {
in.get(d);
code = (c << excess) | (d >> excess);
LeftOver = d & mask;
}
status = 1 - status;
return true;
}
void Decompress(ifstream in,ofstream out)
{
int used = alpha; // codes used so far
int pcode, // previous code
ccode; // current code
if (GetCode(in,pcode))
{ // file is not empty
s[0] = pcode; // character for pcode
out.put(s[0]); // output string for pcode
size = 0; // s[size] is first character of last string output
while( GetCode(in,ccode) )
{
if (ccode < used){ // ccode is defined
Doutput(out,ccode);
if (used < codes)
{
ht[used].prefix = pcode;
ht[used++].suffix = s[size]; }
} else {
ht[used].prefix = pcode;
ht[used++].suffix = s[size];
Doutput(out,ccode);
}
pcode = ccode;
}
}
out.close();
in.close();
}
RLC.h
void rlc_setFiles(int argc,char * argv[])//设置文件
{
cout <<"RLE compression and decompression utility/n";
if (4 != argc)
{
cout << "/nUsage : rle -c|d sourcefilename targetfilename/n";
exit(0);
}
if (! strcmp(argv[1], "-c"))
{
cout << "/nCompress.../n";
RLE_Compression(argv[2], argv[3]);
}
else
if (! strcmp(argv[1], "-d"))
{
cout << "/nDecompress.../n";
RLE_Decompression(argv[2], argv[3]);
}
else
cout << "/nUnknow command./n";
}
int RLE_Compression(ifstream in,ofstream out)
{
unsigned int i; /* generic index variable */
unsigned short run_len = 0; /* length of character run so far */
unsigned int j; /* another index variable */
unsigned short seq_len=0; /* length of non-run sequence */
char seq[MAX_LEN]; /* buffer for uncompressible data */
char cur_char;
char run_char;
while(!in.eof())
{
in.get(cur_char);
if(in.eof())
continue;
if(seq_len == 0)
{
if(run_len == 0)
{
run_char = cur_char;
++ run_len;
continue;
}
if (run_char == cur_char) //读入字符与前一个相同
if (++run_len == MAX_LEN) //是否已经到最大长度
{
out.put((char)max_run_header);
out.put(run_char);
run_len = 0;
continue;
}
if (run_len > 2) //so write out the run and
// start a new one of the new
// character.
{
out.put((char)(RUN|run_len));
out.put(run_char);
run_len = 1;
run_char = cur_char;
continue;
}
for (j = 0; j < run_len; j++); // copy 1 or 2 char run to seq[]
{
seq[seq_len] = run_char;
++seq_len;
if (seq_len == MAX_LEN) // if seq[] is full, write to disk
{
out.put((char)max_seq_header);
for (i = 0; i < seq_len; i++)
out.put(seq[i]);
seq_len = 0;
}
}
run_len = 0;
seq[seq_len++] = cur_char;
if (seq_len == MAX_LEN) // if seq[] is full, write to disk
{
out.put((char)max_seq_header);
for (i = 0; i < seq_len; i++)
out.put(seq[i]);
seq_len = 0;
}
}
else
{
if(run_len != 0) // if a run exists
{
if (cur_char == run_char ) // add to run! Yay.
{
++run_len;
if (run_len == MAX_LEN) // if run is full
{
out.put((char)(SEQ|seq_len));
for (i = 0; i < seq_len; i++)
out.put(seq[i]);
// write run
out.put((char)(RUN|run_len));
out.put(run_char);
seq_len = run_len = 0;
}
continue;
}
out.put((char)(SEQ|seq_len));
for (i = 0; i < seq_len; i++)
out.put(seq[i]);
// write run
out.put((char)(RUN|run_len));
out.put(run_char);
// and start a new run w/ cur_char
seq_len = 0;
run_len = 1;
run_char = cur_char;
continue;
} // end can't add to existing run, and preceding seq exists
if (seq[seq_len - 1] == cur_char) // if we can make a run
{
run_char = cur_char;
run_len = 2;
--seq_len;
continue;
}
seq[seq_len++] = cur_char;
if (seq_len == MAX_LEN) // if the sequence is full, write out
{
out.put((char)max_seq_header);
for (i = 0; i < MAX_LEN; i++)
out.put(seq[i]);
seq_len = 0;
}
}
}
if (seq_len != 0) // write sequence that precedes run
{
out.put((char)(SEQ|seq_len));
for (i = 0; i < seq_len; i++)
out.put(seq[i]);
}
if (run_len != 0) // write run
{
out.put((char)(RUN|run_len));
out.put(run_char);
}
in.close();
out.close();
return 0;
}
int RLE_Decompression(ifstream in,ofstream out)
{
unsigned short i;
unsigned short length;
char byte;
char packet_hdr;
char rd ;
while(!in.eof())
{
in.get(packet_hdr);
if(in.eof())
continue;
length = MAX_LEN & packet_hdr;
if (packet_hdr & RUN) /* if it's a run... */
{
in.get(byte);
for (i = 0; i < length; i++)
if (out.put(byte)&&out.eof())
{
cout <<"Error writing to "<<endl;
in.close();
out.close();
return 1;
}
}
else /* it's a sequence */
for (i = 0; i < length; i++)
{
in.get(rd);
if ((out.put(rd))&&out.eof())
{
cout <<"Error writing to "<<endl;
in.close();
out.close();
return 1;
}
}
}
in.close();
out.close();
return 0;
}
huffman.h
#ifndef _HUFFMAN_H
#define _HUFFMAN_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <fstream.h>
/* 函数原型 */
unsigned short generate_code_table ();//产生编码树
int Huffman_Compression(char * infilename, char * outfilename);
int Huffman_Decompression(char * infilename, char * outfilename);
void build_initial_heap ();
void build_code_tree ();//产生编码树
void compress_image ();//压缩
void get_frequency_count ();//
void build_decomp_tree ();//解码树
void decompress_image ();//解压
void reheap (unsigned short);
/*
short father[512];
unsigned short code[256], heap_length;
unsigned long compress_charcount, file_size, heap[257];
unsigned char code_length[256];
long frequency_count[512];
short decomp_tree[512];
FILE *ifile, *ofile;
*/
void huffman_setFiles(int argc,char* argv[])
{
cout << "Huffman compression and decompression utility/n";
if (4 != argc)
{
cout <<"/nUsage : huffman -c|d sourcefilename targetfilename/n";
exit(0);
}
if (! strcmp(argv[1], "-c"))
{
cout << "/nCompress.../n";
Huffman_Compression(argv[2], argv[3]);
}
else if (! strcmp(argv[1], "-d"))
{
cout << "/nDepress.../n";
Huffman_Decompression(argv[2], argv[3]);
}
else
cout << "/nUnknow command."<<endl;
}
int Huffman_Compression(char * infilename, char * outfilename)
{
if ((ifile = fopen (infilename, "rb")) != NULL)
{
fseek (ifile, 0L, 2);
file_size = (unsigned long) ftell (ifile);
fseek (ifile, 0L, 0);
get_frequency_count ();
build_initial_heap (); //get the minimum heap
build_code_tree (); //create the huffman tree
if (!generate_code_table ())
{
cout << "ERROR! Code Value Out of Range. Cannot Compress./n";
return 0;
}
else
{
if ((ofile = fopen (outfilename, "wb")) != NULL)
{
fwrite (&file_size, sizeof(file_size),1,ofile);
fwrite (code,sizeof(unsigned short),256,ofile);
fwrite (code_length,sizeof(char),256,ofile);
fseek (ifile, 0L, 0);
compress_image();
fclose (ofile);
}
else
{
cout << "/nERROR: Couldn't create output file "<<outfilename << endl;
return 0;
}
}
fclose (ifile);
}
else
{
printf ("/nERROR: %s -- File not found!/n", infilename);
return 0;
}
return 1;
}
void compress_image ()
{
unsigned int thebyte = 0;
short loop1;
unsigned short current_code;
unsigned long loop;
unsigned short current_length, dvalue;
unsigned long curbyte = 0;
short curbit = 7;
for (loop = 0L; loop<file_size; loop++)
{
dvalue = (unsigned short) getc(ifile);
current_code = code[dvalue]; //get the code of dvalue
current_length = (unsigned short) code_length[dvalue];//get the length of the code
for (loop1 = current_length-1; loop1 >= 0; --loop1)
//the current_length should subtract 1
{
if ((current_code >> loop1) & 1)
thebyte |= (char) (1 << curbit);
if (--curbit < 0)//if curbit < 0 means the code is more than 8 bits,write
{
putc (thebyte, ofile);
thebyte = 0;
curbyte++;
curbit = 7;
}
}
}
putc (thebyte, ofile);
compress_charcount = ++curbyte;
}
unsigned short generate_code_table ()
{
unsigned short loop;
unsigned short current_length;
unsigned short current_bit;
unsigned short bitcode;
short parent;
for (loop = 0; loop < 256; loop++)
if (frequency_count[loop])
{
current_length = bitcode = 0;
current_bit = 1;
parent = father[loop];//parent is the index of loop's father node
while (parent)
{
if (parent < 0) //parent less than 0 indicate it's the right node
//at this moment we should add current_bit to bitcode
{
bitcode += current_bit;
parent = -parent;
}
parent = father[parent];//continue to process its parent node
current_bit <<= 1;
current_length++;
}
code[loop] = bitcode;
if (current_length > 16)//if current_length the code more the 2^16
return (0);
else
code_length[loop] = (unsigned char) current_length;
}
else
code[loop] = code_length[loop] = 0;//if no node the node length is zero
return (1);
}
unsigned short heap_entry;
void reheap (unsigned short heap_entry)
{
unsigned short index;
unsigned short flag = 1;
unsigned long heap_value;
heap_value = heap[heap_entry];
//record the current value
//use >> and << take place of div 2 and multi 2
//check the heap_entry whether in the scope of the node having their children
while ((heap_entry <= (heap_length >> 1)) && (flag))
{
index = heap_entry << 1;
if (index < heap_length) //与两个孩子比较
if (frequency_count[heap[index]] >= frequency_count[heap[index+1]])//如果左孩子大
index++;
if (frequency_count[heap_value] < frequency_count[heap[index]])//如果父亲较小
flag--;
else
{
heap[heap_entry] = heap[index]; //如果父亲大则交换
heap_entry = index; //同时index设为其孩子结点,继续建堆
}
}
heap[heap_entry] = heap_value;
}
void build_code_tree ()
{
unsigned short findex;
unsigned long heap_value;
while (heap_length != 1)
{
heap_value = heap[1];
heap[1] = heap[heap_length--];
reheap(1);
//经过重新建立堆,此时heap[1]为次小值,heap_value是最小值(保存最小值的下标)
findex = heap_length + 255;
frequency_count[findex] = frequency_count[heap[1]] +
frequency_count[heap_value];
father[heap_value] = findex;
father[heap[1]] = -findex;
heap[1] = findex;//存放父结点下标
reheap(1);
}
father[256] = 0;
}
void build_initial_heap ()
{
unsigned short loop;
heap_length = 0;
for (loop = 0; loop < 256; loop++)
if (frequency_count[loop])
heap[++heap_length] = (unsigned long) loop;
cout <<endl<<"the heap_length is :"<<heap_length<<endl;
cout <<"Before Create the Heap : " <<endl;
for(loop = 0 ; loop < 256 ; loop ++)
{
if(loop%10 == 0)
cout << endl;
cout <<heap[loop]<<" : ";
}
for (loop = heap_length; loop > 0; loop--)
reheap (loop);
//after this step we get the minimun heap on the frequency
cout <<"After Create the Heap : " <<endl;
for(loop = 0 ; loop < 256 ; loop ++)
{
if(loop%10 == 0)
cout << endl;
cout <<heap[loop]<<" : ";
}
}
void get_frequency_count ()
{
unsigned long loop;
for (loop = 0; loop < file_size; loop++)
frequency_count[getc (ifile)]++;
for(loop = 0; loop < 256 ; loop++)
{
if(loop%10 == 0)
cout << endl;
cout <<frequency_count[loop]<<",";
}
}
int Huffman_Decompression(char * infilename, char * outfilename)
{
if ((ifile = fopen (infilename, "rb")) != NULL)
{
fread (&file_size, sizeof(file_size),1, ifile);
fread (code,sizeof(unsigned short), 256, ifile);
fread (code_length,sizeof(char), 256, ifile);
build_decomp_tree ();
if ((ofile = fopen (outfilename, "wb")) != NULL)
{
decompress_image();
fclose (ofile);
}
else
{
printf ("/nERROR: Couldn't create output file %s/n",outfilename);
return 0;
}
fclose (ifile);
}
else
{
printf ("/nERROR: %s -- File not found!/n", infilename);
return 0;
}
return 1;
}
void build_decomp_tree ()
{
register unsigned short loop1;
register unsigned short current_index;
unsigned short loop;
unsigned short current_node = 1;
decomp_tree[1] = 1;
for (loop = 0; loop < 256; loop++)
{
if (code_length[loop])
{
current_index = 1;
for (loop1 = code_length[loop] - 1; loop1 > 0; loop1--)
{
current_index = (decomp_tree[current_index] << 1) +((code[loop] >> loop1) & 1);
if (!(decomp_tree[current_index]))
decomp_tree[current_index] = ++current_node;
}
decomp_tree[(decomp_tree[current_index] << 1) +
(code[loop] & 1)] = -loop;
}
}
}
void decompress_image ()
{
register unsigned short cindex = 1;
register char curchar;
register short bitshift;
unsigned long charcount = 0L;
while (charcount < file_size)
{
curchar = (char) getc (ifile);
for (bitshift = 7; bitshift >= 0; --bitshift)
{
cindex = (cindex << 1) + ((curchar >> bitshift) & 1);
if (decomp_tree[cindex] <= 0)
{
putc ((int) (-decomp_tree[cindex]), ofile);
if ((++charcount) == file_size)
bitshift = 0;
else
cindex = 1;
}
else
cindex = decomp_tree[cindex];
}
}
}
#endif