一个LZW+huffman+RLC实现的压缩器

由于做一道作业,整理了一下——张安春

下面是4个头文件只要包含在需要进行压缩的代码里就行了

比如说入口文件为fin,出口文件为fout

压缩:Compress(fin,bufout);
   Huffman_Compression("in","out");
   bufin.open("out",ios::binary);
   RLE_Compression(bufin,fout);

解压:bufout.open("in",ios::binary);
   RLE_Decompression(fin,bufout);
   bufout.close();
   Huffman_Decompression("in","out");
   bufin.open("out",ios::binary);
   Decompress(bufin,fout);

头文件定义:

//common.h 

  #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <fstream.h>

FILE *ifile,*ofile;
//the global variable and function in huffman encode
unsigned long file_size;
long frequency_count[1024];
short           father[512];
unsigned short  code[256], heap_length;
unsigned long   compress_charcount,heap[257];
unsigned char   code_length[256];
short           decomp_tree[512];

unsigned short  generate_code_table ();//产生编码树
int    Huffman_Compression(char * infilename, char * outfilename);//huffman 压缩
int    Huffman_Decompression(char * infilename, char * outfilename);//huffman 解压
void   build_initial_heap ();//初始化堆
void   build_code_tree ();//产生编码树
void   compress_file ();//压缩
void   get_frequency_count ();//获得字符频率
void   build_decomp_tree ();//解码树
void   decompress_file ();//解压
void   reheap (unsigned short);

//the base class and variable in lzw encode

class element{
public:
 operator unsigned long() const{
  return key;
 }
 element& operator=(unsigned long y){
  key = y;
  return *this;
 }
public:
 int code;
 unsigned long key;
};
class Delement{
public:
 int prefix;
 unsigned char suffix;
};
const D = 4099,
 codes = 4096,
 ByteSize = 8,
 excess = 4,
 alpha = 256,
 mask1 = 255,
 mask2 = 15,
 mask = 15;
int size,
 LeftOver,
 status = 0;
Delement ht[codes];
unsigned char s[codes];
void Compress(ifstream in,ofstream out);
void Decompress(ifstream in,ofstream out);

//the global variable and function in rlc encode
#define MAX_LEN         (0x7f)
#define MAX_RUN_HEADER  (0xff)
#define MAX_SEQ_HEADER  (0x7f)
#define RUN    (0x80)
#define SEQ    (0x00)
const int max_run_header = 0xff;
const int max_seq_header = 0x7f;
int RLE_Compression(ifstream in,ofstream out);//压缩函数原型
int RLE_Decompression(ifstream in,ofstream out);//解压函数原型
void process(int argc,char* args[]);//process the compress and depress

//lzw.h

#include<iostream.h>
#include<FSTREAM.H>
#include"common.h"
//the implementation of lzw
template <class T,class K>
class SortedChainNode{
// friend SortedChain<T>;
public:
 T data;
 SortedChainNode<T,K> *link;
};
template<class E, class K>
class SortedChain {
public:
 SortedChain() {first = 0;}
 ~SortedChain(){
 };
 bool IsEmpty() const {return first == 0;}
 int Length() const;
 bool Search(const K& k, E& e) const;
 SortedChain<E,K>& Delete(const K& k, E& e);
 SortedChain<E,K>& Insert(const E& e);
 SortedChain<E,K>& DistinctInsert(const E& e);
 void Output(ostream& out) const;
private:
 SortedChainNode<E,K> *first;
};
template<class E, class K>
bool SortedChain<E,K>::Search(const K& k, E& e)
const
{
 SortedChainNode<E,K> *p = first;
 while (p && p->data < k)
 p = p->link;
 // verify match
 if (p && p->data == k) // yes, found match
 {e = p->data; return true; }
 return false; // no match
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::Delete(const K& k, E& e)
{
 SortedChainNode<E,K> *p = first,
 *tp = 0;
 while (p && p->data < k) {
  tp = p;
  p = p->link;
 }
 if ( p && p->data == k ) {
  e = p->data;
  // remove p from chain
  if (tp) tp->link = p->link;
  else first = p->link; // p is first node
  delete p;
  return *this;
 }
 throw BadInput(); // no match
 return *this; // Visual C++ needs this line
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::Insert (const E& e)
{
 SortedChainNode<E,K> *p = first,
 *tp = 0; // trail p
 // move tp so that e can be inserted after tp
 while (p && p->data < e) {
  tp = p;
  p = p->link;
 }
 // setup a new node *q for e
 SortedChainNode<E,K> *q = new SortedChainNode<E,K>;
 q->data = e;
 // insert node just after tp
 q->link = p;
 if (tp) tp->link = q;
 else first = q;
 return *this;
}
template<class E, class K>
SortedChain<E,K>& SortedChain<E,K>::DistinctInsert (const E& e)
{
 SortedChainNode<E,K> *p = first, *tp = 0;
 while (p && p->data < e) {
  tp = p;
  p = p->link;
 }
 // check if duplicate
 if (p && p->data == e) return *this;
 // not duplicate, set up node for e
 SortedChainNode<E,K> *q = new SortedChainNode<E,K>;
 q->data = e;
 // insert node just after tp
 q->link = p;
 if (tp) tp->link = q;
 else first = q;
 return *this;
}
template<class E, class K>
class ChainHashTable {
public:
 ChainHashTable(int divisor = 11)
 {
  D = divisor;
  ht = new SortedChain<E,K> [D];
 }
 ~ChainHashTable()
 {
  delete [] ht;
 }
 bool Search(const K& k, E& e)const
 {
  return ht[k % D].Search(k, e);
 }
 ChainHashTable<E,K>& Insert(const E& e)
 {
  ht[e % D].DistinctInsert(e);
  return *this;
 }
 ChainHashTable<E,K>& Delete(const K& k, E& e)
 {
  ht[k % D].Delete(k, e);
  return *this;
 }
 void Output() const;
public:
 int D;
 SortedChain<E,K> *ht;
};
/*
void SetFiles(int argc, char* argv[])
{
 cout <<"LZW compression and depression Utility"<<endl;
 if(argc != 4 )
 {
  cout <<"/nUsage : LZW -c|d sourcefilename targetfilename/n";
        exit(0);
 }
 in.open(argv[2],ios::binary);
 out.open(argv[3],ios::binary);
 if (in.fail())
 {
  cerr << "Cannot open " << argv[2]<< argv[2] << endl;
  exit(1);
 }
 if (out.fail())
 {
  cerr << "Cannot open " << argv[3]<< argv[3] << endl;
  exit(1);
 }
 if (! strcmp(argv[1], "-c"))
 {
  cout <<"Copressing ..."<<endl;
  Compress();
 }
 else
 {
  if(! strcmp(argv[1], "-d"))
  {
   cout <<"Depressing ..."<<endl;
   Decompress();
  }
 }

}*/
void Output(ofstream out,unsigned long pcode)
{
 unsigned char c,d;
 if(status){
  d = (unsigned char)pcode&mask1;
  c = (unsigned char)((LeftOver<<excess)|(pcode>>ByteSize));
  out.put(c);
  out.put(d);
  status = 0;
 }
 else{
  LeftOver = pcode & mask2;
  c = (unsigned char)(pcode >> excess);
  out.put(c);
  status = 1;
 }
}

void Compress(ifstream in,ofstream out)
{
 ChainHashTable<element, unsigned long> h(D);
 element e;
 for (int i = 0; i < alpha; i++)
 {
  e.key = i;
  e.code = i;
  h.Insert(e);
 }
 int used = alpha;
 unsigned char c;
 in.get(c);
 unsigned long pcode = c;
 if (!in.eof()) {
 do {
 in.get(c);
 if (in.eof()) break;
 unsigned long k = (pcode << ByteSize) + c;
 if ( h.Search(k, e) ) pcode = e.code;
 else
 {
  Output(out,pcode);
  if (used < codes){
   e.code = used++;
   e.key = (pcode << ByteSize) | c;
   h.Insert(e);
  }
  pcode = c;
 }
 } while(true);
 Output(out,pcode);
 if (status) {c = LeftOver << excess; out.put(c);}
 }
 out.close();
 in.close();
}
void Doutput(ofstream out,int code)
{
 size = -1;
 while (code >= alpha)
 {
  s[++size] = ht[code].suffix;
  code = ht[code].prefix;
 }
 s[++size] = code;
 for (int i = size; i >= 0; i--)
 out.put(s[i]);
}

bool GetCode(ifstream in,int& code)
{
 unsigned char c, d;
 in.get(c);
 if (in.eof())
  return false;
 if (status) code = (LeftOver << ByteSize) | c;
 else {
  in.get(d);
  code = (c << excess) | (d >> excess);
  LeftOver = d & mask;
 }
 status = 1 - status;
 return true;
}

void Decompress(ifstream in,ofstream out)
{
 int used = alpha; // codes used so far
 int pcode, // previous code
 ccode; // current code
 if (GetCode(in,pcode))
 { // file is not empty
   s[0] = pcode; // character for pcode
   out.put(s[0]); // output string for pcode
   size = 0; // s[size] is first character of last string output
   while( GetCode(in,ccode) )
   {
    if (ccode < used){ // ccode is defined
     Doutput(out,ccode);
    if (used < codes)
    {
     ht[used].prefix = pcode;
     ht[used++].suffix = s[size]; }
    } else {
     ht[used].prefix = pcode;
     ht[used++].suffix = s[size];
     Doutput(out,ccode);
     }
    pcode = ccode;
   }
 }
 out.close();
 in.close();
}

RLC.h


void rlc_setFiles(int argc,char * argv[])//设置文件
{
 cout <<"RLE compression and decompression utility/n";
    if (4 != argc)
    {
        cout << "/nUsage : rle -c|d sourcefilename targetfilename/n";
        exit(0);
    }
    if (! strcmp(argv[1], "-c"))
    {
        cout << "/nCompress.../n";
        RLE_Compression(argv[2], argv[3]);
    }
    else
  if (! strcmp(argv[1], "-d"))
  {
   cout << "/nDecompress.../n";
   RLE_Decompression(argv[2], argv[3]);
  }
  else
   cout << "/nUnknow command./n";
}
int RLE_Compression(ifstream in,ofstream out)

{
    unsigned int i;      /* generic index variable         */
    unsigned short run_len = 0;   /* length of character run so far */
    unsigned int j;                     /* another index variable         */
    unsigned short seq_len=0;           /* length of non-run sequence     */
    char seq[MAX_LEN];     /* buffer for uncompressible data */
 char cur_char;
 char run_char;
 while(!in.eof())
 {
  in.get(cur_char);
  if(in.eof())
   continue;
  if(seq_len == 0)
  {
   if(run_len == 0)
   {
    run_char = cur_char;
    ++ run_len;
    continue;
   }
            if (run_char == cur_char)  //读入字符与前一个相同
                if (++run_len == MAX_LEN) //是否已经到最大长度
                {
     out.put((char)max_run_header);
     out.put(run_char);
                    run_len = 0;
                    continue;
                }
   if (run_len > 2)    //so write out the run and     
           // start a new one of the new 
           // character.                  
            {
    out.put((char)(RUN|run_len));
    out.put(run_char);
                run_len = 1;
                run_char   = cur_char;
                continue;
            }
   for (j = 0; j < run_len; j++);    // copy 1 or 2 char run to seq[]
            {
                seq[seq_len] = run_char;
                ++seq_len;
                if (seq_len == MAX_LEN)       // if seq[] is full, write to disk
                {
     out.put((char)max_seq_header);
                    for (i = 0; i < seq_len; i++)
      out.put(seq[i]);
                    seq_len = 0;
                }
            }
   run_len = 0;
            seq[seq_len++] = cur_char;
            if (seq_len == MAX_LEN)        // if seq[] is full, write to disk
            {
    out.put((char)max_seq_header);
                for (i = 0; i < seq_len; i++)
     out.put(seq[i]);
                seq_len = 0;
            }
  }
  else
  {
          if(run_len != 0)      // if a run exists
            {
                if (cur_char == run_char )  // add to run!  Yay.
                {
                    ++run_len;
                    if (run_len == MAX_LEN)  // if run is full
                    {
      out.put((char)(SEQ|seq_len));
                        for (i = 0; i < seq_len; i++)
       out.put(seq[i]);
                        // write run                
      out.put((char)(RUN|run_len));
      out.put(run_char);
                        seq_len = run_len = 0;
                    }
     continue;
    }
    out.put((char)(SEQ|seq_len));
                for (i = 0; i < seq_len; i++)
     out.put(seq[i]);
                // write run
    out.put((char)(RUN|run_len));
    out.put(run_char);
                // and start a new run w/ cur_char
                seq_len = 0;
                run_len = 1;
                run_char = cur_char;
                continue;
            } // end can't add to existing run, and preceding seq exists
   if (seq[seq_len - 1] == cur_char)       // if we can make a run
            {
                run_char = cur_char;
                run_len = 2;
                --seq_len;
                continue;
            }
   seq[seq_len++] = cur_char;
            if (seq_len == MAX_LEN) // if the sequence is full, write out
            {
    out.put((char)max_seq_header);
                for (i = 0; i < MAX_LEN; i++)
     out.put(seq[i]);
                seq_len = 0;
            }

  }
 }
 if (seq_len != 0)  // write sequence that precedes run
    {
  out.put((char)(SEQ|seq_len));
        for (i = 0; i < seq_len; i++)
   out.put(seq[i]);
    }
    if (run_len != 0)  // write run
    {
  out.put((char)(RUN|run_len));
  out.put(run_char);
    }
 in.close();
 out.close();
    return 0;
}


int RLE_Decompression(ifstream in,ofstream out)

{

    unsigned short i;
    unsigned short length;
 char byte;
 char packet_hdr;
 char rd ;
 while(!in.eof())
 {
  in.get(packet_hdr);
  if(in.eof())
   continue;
  length = MAX_LEN & packet_hdr;
        if (packet_hdr & RUN)  /* if it's a run... */
        {
   in.get(byte);
            for (i = 0; i < length; i++)
                if (out.put(byte)&&out.eof())
                {
     cout <<"Error writing to "<<endl;
     in.close();
     out.close();
                    return 1;
                }

        }
        else /* it's a sequence */
            for (i = 0; i < length; i++)
            {
    in.get(rd);
    if ((out.put(rd))&&out.eof())
                {
     cout <<"Error writing to "<<endl;
     in.close();
     out.close();
                    return 1;
                }
   }
    }
 in.close();
 out.close();
    return 0;

}

huffman.h

#ifndef _HUFFMAN_H
#define _HUFFMAN_H
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream.h>
#include <fstream.h>
/* 函数原型 */
unsigned short  generate_code_table ();//产生编码树
int    Huffman_Compression(char * infilename, char * outfilename);
int    Huffman_Decompression(char * infilename, char * outfilename);
void   build_initial_heap ();
void   build_code_tree ();//产生编码树
void   compress_image ();//压缩
void   get_frequency_count ();//
void   build_decomp_tree ();//解码树
void   decompress_image ();//解压
void   reheap (unsigned short);
/*
short   father[512];
unsigned short  code[256], heap_length;
unsigned long   compress_charcount, file_size, heap[257];
unsigned char   code_length[256];
long            frequency_count[512];
short           decomp_tree[512];

FILE            *ifile, *ofile;
*/

void huffman_setFiles(int argc,char* argv[])
{
 cout << "Huffman compression and decompression utility/n";
    if (4 != argc)
    {
  cout <<"/nUsage : huffman -c|d sourcefilename targetfilename/n";
        exit(0);
    }
    if (! strcmp(argv[1], "-c"))
    {
        cout << "/nCompress.../n";
        Huffman_Compression(argv[2], argv[3]);
    }
    else if (! strcmp(argv[1], "-d"))
    {
        cout << "/nDepress.../n";
        Huffman_Decompression(argv[2], argv[3]);
    }
    else
        cout << "/nUnknow command."<<endl;
}

int Huffman_Compression(char * infilename, char * outfilename)
{
 
 if ((ifile = fopen (infilename, "rb")) != NULL)
    {
        fseek (ifile, 0L, 2);
        file_size = (unsigned long) ftell (ifile);
        fseek (ifile, 0L, 0);
        get_frequency_count ();
        build_initial_heap ();   //get the minimum heap
        build_code_tree ();    //create the huffman tree

        if (!generate_code_table ())
        {
            cout << "ERROR!  Code Value Out of Range. Cannot Compress./n";
            return 0;
        }
        else
        {
            if ((ofile = fopen (outfilename, "wb")) != NULL)
            {
                fwrite (&file_size, sizeof(file_size),1,ofile);
                fwrite (code,sizeof(unsigned short),256,ofile);
                fwrite (code_length,sizeof(char),256,ofile);
                fseek (ifile, 0L, 0);
                compress_image();
                fclose (ofile);
            }
            else
            {
                cout << "/nERROR: Couldn't create output file "<<outfilename << endl;
                return 0;
            }
        }
        fclose (ifile);
    }
    else
    {
        printf ("/nERROR:  %s -- File not found!/n", infilename);
        return 0;
    }
    return 1;
}
void compress_image ()
{
   unsigned int    thebyte = 0;
   short           loop1;
   unsigned short  current_code;
   unsigned long   loop;
   unsigned short  current_length, dvalue;
   unsigned long    curbyte = 0;
   short           curbit = 7;
   for (loop = 0L; loop<file_size; loop++)
   {
      dvalue = (unsigned short) getc(ifile);
      current_code = code[dvalue];     //get the code of dvalue
      current_length = (unsigned short) code_length[dvalue];//get the length of the code
      for (loop1 = current_length-1; loop1 >= 0; --loop1)
  //the current_length should subtract 1
      {
         if ((current_code >> loop1) & 1)
            thebyte |= (char) (1 << curbit);
         if (--curbit < 0)//if curbit < 0 means the code is more than 8 bits,write
         {
            putc (thebyte, ofile);
            thebyte = 0;
            curbyte++;
            curbit = 7;
         }
      }
   }
   putc (thebyte, ofile);
   compress_charcount = ++curbyte;
}

unsigned short  generate_code_table ()

{
   unsigned short  loop;
   unsigned short  current_length;
   unsigned short  current_bit;
   unsigned short  bitcode;
   short           parent;
   for (loop = 0; loop < 256; loop++)
      if (frequency_count[loop])
      {
         current_length = bitcode = 0;
         current_bit = 1;
         parent = father[loop];//parent is the index of loop's father node
         while (parent)
         {
            if (parent < 0) //parent less than 0 indicate it's the right node
       //at this moment we should add current_bit to bitcode
            { 
               bitcode += current_bit;
               parent = -parent;
            }
            parent = father[parent];//continue to process its parent node
            current_bit <<= 1;
            current_length++;
         }
         code[loop] = bitcode;
         if (current_length > 16)//if current_length the code more the 2^16
            return (0);
         else
            code_length[loop] = (unsigned char) current_length;
      }
      else
         code[loop] = code_length[loop] = 0;//if no node the node length is zero
   return (1);
}

unsigned short  heap_entry;
void reheap (unsigned short heap_entry)
{
   unsigned short  index;
   unsigned short  flag = 1;
   unsigned long   heap_value;
   heap_value = heap[heap_entry];
   //record the current value
   //use >> and << take place of div 2 and multi 2
   //check the heap_entry whether in the scope of the node having their children
   while ((heap_entry <= (heap_length >> 1)) && (flag))
   {
     index = heap_entry << 1;
      if (index < heap_length) //与两个孩子比较
  if (frequency_count[heap[index]] >= frequency_count[heap[index+1]])//如果左孩子大
            index++;
      if (frequency_count[heap_value] < frequency_count[heap[index]])//如果父亲较小
  flag--;
      else
      {
        heap[heap_entry] = heap[index]; //如果父亲大则交换
        heap_entry       = index;  //同时index设为其孩子结点,继续建堆
      }
   }
   heap[heap_entry] = heap_value;
}
void build_code_tree ()

{
   unsigned short  findex;
   unsigned long   heap_value;
   while (heap_length != 1)
   {
      heap_value = heap[1];
      heap[1]    = heap[heap_length--];
      reheap(1);
   //经过重新建立堆,此时heap[1]为次小值,heap_value是最小值(保存最小值的下标)
      findex = heap_length + 255;
      frequency_count[findex] = frequency_count[heap[1]] +
                                frequency_count[heap_value];
      father[heap_value] =  findex;
      father[heap[1]]    = -findex;
      heap[1]            =  findex;//存放父结点下标
      reheap(1);
   }
   father[256] = 0;
}


void build_initial_heap ()
{

 unsigned short  loop;
 heap_length = 0;
 for (loop = 0; loop < 256; loop++)
  if (frequency_count[loop])
   heap[++heap_length] = (unsigned long) loop;
 cout <<endl<<"the heap_length is :"<<heap_length<<endl;
 
 cout <<"Before Create the Heap : " <<endl;
 for(loop = 0 ; loop < 256 ; loop ++)
 {
  if(loop%10 == 0)
   cout << endl;
  cout <<heap[loop]<<" : ";
 }
 for (loop = heap_length; loop > 0; loop--)
  reheap (loop);
 //after this step we get the minimun heap on the frequency
 cout <<"After Create the Heap : " <<endl;
 for(loop = 0 ; loop < 256 ; loop ++)
 {
  if(loop%10 == 0)
   cout << endl;
  cout <<heap[loop]<<" : ";
 }
}
void get_frequency_count ()
{
   unsigned long  loop;
   for (loop = 0; loop < file_size; loop++)
      frequency_count[getc (ifile)]++;
   for(loop = 0; loop < 256 ; loop++)
   {
    if(loop%10 == 0)
     cout << endl;
    cout <<frequency_count[loop]<<",";
   }

}
int  Huffman_Decompression(char * infilename, char * outfilename)

{
    if ((ifile = fopen (infilename, "rb")) != NULL)
    {
        fread (&file_size, sizeof(file_size),1, ifile);
        fread (code,sizeof(unsigned short), 256, ifile);
        fread (code_length,sizeof(char), 256, ifile);
        build_decomp_tree ();
        if ((ofile = fopen (outfilename, "wb")) != NULL)
        {
            decompress_image();
            fclose (ofile);
        }
        else
        {
            printf ("/nERROR:  Couldn't create output file %s/n",outfilename);
            return 0;
        }
        fclose (ifile);
    }
    else
    {
        printf ("/nERROR:  %s -- File not found!/n", infilename);
        return 0;
    }
    return 1;
}
void  build_decomp_tree ()

{
   register unsigned short  loop1;
   register unsigned short  current_index;
   unsigned short  loop;
   unsigned short  current_node = 1;
   decomp_tree[1] = 1;
   for (loop = 0; loop < 256; loop++)
   {
      if (code_length[loop])
      {
   current_index = 1;
   for (loop1 = code_length[loop] - 1; loop1 > 0; loop1--)
   {
   current_index = (decomp_tree[current_index] << 1) +((code[loop] >> loop1) & 1);
   if (!(decomp_tree[current_index]))
      decomp_tree[current_index] = ++current_node;
   }
   decomp_tree[(decomp_tree[current_index] << 1) +
     (code[loop] & 1)] = -loop;
      }
   }
}
void  decompress_image ()

{
   register unsigned short  cindex = 1;
   register char            curchar;
   register short           bitshift;
   unsigned long  charcount = 0L;
   while (charcount < file_size)
   {
      curchar = (char) getc (ifile);
      for (bitshift = 7; bitshift >= 0; --bitshift)
      {
  cindex = (cindex << 1) + ((curchar >> bitshift) & 1);
  if (decomp_tree[cindex] <= 0)
   {
    putc ((int) (-decomp_tree[cindex]), ofile);
   if ((++charcount) == file_size)
       bitshift = 0;
    else
       cindex = 1;
   }
  else
   cindex = decomp_tree[cindex];
      }
   }
}

#endif


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值