一、实验项目名称
LZW 编解码算法实现与分析
二、实验目的
掌握词典编码的基本原理,实现LZW解码器并分析编解码算法。
三、实验原理
思想:
1、将词典初始化为包含所有的单字符,将当前前缀串p为空。
2、cw指向下一个字符流 pw为当前解码
如果p+c在词典中,将p+c赋予为当前解码
如果p+c不在词典中,新词条产生
代码LZWEncode
void LZWEncode( FILE *fp, BITFILE *bf)
{
int character;
int string_code;
int index;
unsigned long file_length;
fseek( fp, 0, SEEK_END);
file_length = ftell( fp);
fseek( fp, 0, SEEK_SET);
BitsOutput( bf, file_length, 4*8);
InitDictionary();
string_code = -1;
while( EOF!=(character=fgetc( fp)))
{
index = InDictionary( character, string_code);
if( 0<=index)
{ // string+character in dictionary
string_code = index;
}
else
{ // string+character not in dictionary
output( bf, string_code);
if( MAX_CODE > next_code)
{ // free space in dictionary
// add string+character to dictionary
AddToDictionary( character, string_code);
}
string_code = character;
}
}
output( bf, string_code); }
解码过程:
1、在开始译码时词典包含所有可能的前缀根。
2、令cw=码字流中的第一个码字。
3、输出当前缀-符tcw到码字流。
4、先前码字pw当前码字cw。
5、当前码字cw码字流的下一个码字。
6、判断当前缀-符串string.cw 是否在词典中。
(1)如果”是”,则把当前缀-符串cw输出到字符流。
当前前缀p:=先前缀-符串pw。 当前字符C:=当前前缀-符串cw的第一个字符。 把缀-符串p+c添加到词典。
(2)如果”否”,则当前前缀:=先前缀-符串pw。 当前字符C:=当前缀-符串cw的第一个字符。
输出缀-符串p+c到字符流,然后把它添加到词典中。
LZWDecode代码:
#include "LZWDecode.hpp"
void LZWDecode( BITFILE *bf, FILE *fp)
{
int character = 0;
int new_code, last_code;
int phrase_length;
unsigned long file_length;
file_length = BitsInput( bf, 4*8);
if( -1 == file_length)
file_length = 0;
InitDictionary();//初始化词典
last_code = -1;//pw=-1
while(0<file_length)
{
new_code = input(bf);
if(new_code >= next_code)
{ //如果读入的代号比字典最大代号大,即不再字典中
d_stack[0] =character;//将pw写入stack最后一个字符中
phrase_length = DecodeString( 1, last_code);//将pw写入stack中1的位置
}
else
{
phrase_length = DecodeString( 0, new_code);//如果不比字典最大代号大,即在字典中,则将pw写入stack
}
character = d_stack[phrase_length-1];//将cw首字符写入character中
while( 0<phrase_length)
{
phrase_length --;
fputc( d_stack[ phrase_length], fp);//输入当前字符的字符串
file_length--;
}
if( MAX_CODE>next_code)//如果词典还有空间
{
AddToDictionary( character, last_code);//将p+c写入词典
}
last_code = new_code;//更新词条数
}
}
四、其他代码
bitio.c
#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
BITFILE *OpenBitFileInput( char *filename)
{
BITFILE *bf;
bf = (BITFILE *)malloc( sizeof(BITFILE));
if( NULL == bf) return NULL;
if( NULL == filename)
bf->fp = stdin;
else bf->fp = fopen( filename, "rb");
if( NULL == bf->fp)
return NULL;
bf->mask = 0x80;
bf->rack = 0;
return bf;
}
BITFILE *OpenBitFileOutput( char *filename)
{
BITFILE *bf;
bf = (BITFILE *)malloc( sizeof(BITFILE));
if( NULL == bf)
return NULL;
if( NULL == filename)
bf->fp = stdout;
else
bf->fp = fopen( filename, "wb");
if( NULL == bf->fp)
return NULL;
bf->mask = 0x80;
bf->rack = 0;
return bf;
}
void CloseBitFileInput(BITFILE *bf)
{
fclose( bf->fp);
free( bf);
}
void CloseBitFileOutput(BITFILE *bf)
{
// Output the remaining bits
if( 0x80 != bf->mask)
fputc( bf->rack, bf->fp);
fclose( bf->fp);
free( bf);
}
int BitInput( BITFILE *bf)
{
int value;
if( 0x80 == bf->mask)
{
bf->rack = fgetc( bf->fp);
if( EOF == bf->rack)
{
fprintf(stderr, "Read after the end of file reached\n");
exit( -1);
}
}
value = bf->mask & bf->rack;
bf->mask >>= 1;
if( 0==bf->mask)
bf->mask = 0x80;
return( (0==value)?0:1);
}
unsigned long BitsInput( BITFILE *bf, int count)
{
unsigned long mask;
unsigned long value;
mask = 1L << (count-1);
value = 0L;
while( 0!=mask)
{
if( 1 == BitInput( bf))
value |= mask;
mask >>= 1;
}
return value;
}
void BitOutput( BITFILE *bf, int bit)
{
if( 0 != bit) bf->rack |= bf->mask;
bf->mask >>= 1;
if( 0 == bf->mask){ // eight bits in rack
fputc( bf->rack, bf->fp);
bf->rack = 0;
bf->mask = 0x80;
}
}
void BitsOutput( BITFILE *bf, unsigned long code, int count)
{
unsigned long mask;
mask = 1L << (count-1);
while( 0 != mask)
{
BitOutput( bf, (int)(0==(code&mask)?0:1));
mask >>= 1;
}
}
#if 0
int main( int argc, char **argv)
{
BITFILE *bfi, *bfo;
int bit;
int count = 0;
if( 1<argc){
if( NULL==OpenBitFileInput( bfi, argv[1])){
fprintf( stderr, "fail open the file\n");
return -1;
}
}
else
{
if( NULL==OpenBitFileInput( bfi, NULL))
{
fprintf( stderr, "fail open stdin\n");
return -2;
}
}
if( 2<argc)
{
if( NULL==OpenBitFileOutput( bfo, argv[2]))
{
fprintf( stderr, "fail open file for output\n");
return -3;
}
}
else
{
if( NULL==OpenBitFileOutput( bfo, NULL))
{
fprintf( stderr, "fail open stdout\n");
return -4;
}
}
while(1)
{
bit = BitInput(bfi);
fprintf(stderr, "%d", bit);
count ++;
if( 0==(count&7))
fprintf(stderr, " ");
BitOutput( bfo, bit);
}
return 0;
}
#endif
bitio.h
#ifndef __BITIO__
#define __BITIO__
#include <stdio.h>
typedef struct
{
FILE *fp;
unsigned char mask;
int rack;
}BITFILE;
BITFILE *OpenBitFileInput(char *filename);
BITFILE *OpenBitFileOutput(char *filename);
void CloseBitFileInput( BITFILE *bf);
void CloseBitFileOutput( BITFILE *bf);
int BitInput( BITFILE *bf);
unsigned long BitsInput( BITFILE *bf, int count);
void BitOutput( BITFILE *bf, int bit);
void BitsOutput( BITFILE *bf, unsigned long code, int count);
#endif // __BITIO__
主函数
#include <stdlib.h>
#include <stdio.h>
#include "bitio.h"
#define MAX_CODE 65535
struct
{
int suffix;
int parent, firstchild, nextsibling;
} dictionary[MAX_CODE+1];
int next_code;
int d_stack[MAX_CODE]; // stack for decoding a phrase
#define input(f) ((int)BitsInput( f, 16))
#define output(f, x) BitsOutput( f, (unsigned long)(x), 16)
int DecodeString( int start, int code);
void InitDictionary( void);
void PrintDictionary( void)
{
int n;
int count;
for( n=256; n<next_code; n++)
{
count = DecodeString( 0, n);
printf( "%4d->", n);
while( 0<count--)
printf("%c", (char)(d_stack[count]));
printf( "\n");
}
}
int DecodeString(int start, int code)
{
int count;
count = start;
while( 0<=code)
{
d_stack[ count] = dictionary[code].suffix;
code = dictionary[code].parent;
count ++;
}
return count;
}
void InitDictionary(void)
{
int i;
for( i=0; i<256; i++)
{
dictionary[i].suffix = i;
dictionary[i].parent = -1;
dictionary[i].firstchild = -1;
dictionary[i].nextsibling = i+1;
}
dictionary[255].nextsibling = -1;
next_code = 256;
}
/*
* Input: string represented by string_code in dictionary,
* Output: the index of character+string in the dictionary
* index = -1 if not found
*/
int InDictionary(int character, int string_code)
{
int sibling;
if( 0>string_code) return character;
sibling = dictionary[string_code].firstchild;
while( -1<sibling)
{
if( character == dictionary[sibling].suffix)
return sibling;
sibling = dictionary[sibling].nextsibling;
}
return -1;
}
void AddToDictionary( int character, int string_code)
{
int firstsibling, nextsibling;
if( 0>string_code)
return;
dictionary[next_code].suffix = character;
dictionary[next_code].parent = string_code;
dictionary[next_code].nextsibling = -1;
dictionary[next_code].firstchild = -1;
firstsibling = dictionary[string_code].firstchild;
if( -1<firstsibling)
{ // the parent has child
nextsibling = firstsibling;
while( -1<dictionary[nextsibling].nextsibling )
nextsibling = dictionary[nextsibling].nextsibling;
dictionary[nextsibling].nextsibling = next_code;
}
else
{// no child before, modify it to be the first
dictionary[string_code].firstchild = next_code;
}
next_code ++;
}
void LZWEncode( FILE *fp, BITFILE *bf)
{}
void LZWDecode( BITFILE *bf, FILE *fp)
{}
int main( int argc, char **argv)
{
FILE *fp;
BITFILE *bf;
if( 4>argc)
{
fprintf( stdout, "usage: \n%s <o> <ifile> <ofile>\n", argv[0]);
fprintf( stdout, "\t<o>: E or D reffers encode or decode\n");
fprintf( stdout, "\t<ifile>: input file name\n");
fprintf( stdout, "\t<ofile>: output file name\n");
return -1;
}
if('E' == argv[1][0])
{ // do encoding
fp = fopen( argv[2], "rb");
bf = OpenBitFileOutput( argv[3]);
if( NULL!=fp && NULL!=bf)
{
LZWEncode(fp, bf);
fclose(fp);
CloseBitFileOutput(bf);
fprintf( stdout, "encoding done\n");
}
}
else if( 'D' == argv[1][0])
{ // do decoding
bf = OpenBitFileInput( argv[2]);
fp = fopen( argv[3], "wb");
if( NULL!=fp && NULL!=bf)
{
LZWDecode(bf, fp);
fclose(fp);
CloseBitFileInput(bf);
fprintf(stdout, "decoding done\n");
}
}
else
{ // otherwise
fprintf(stderr, "not supported operation\n");
}
return 0;
}
实验步骤
1、首先调试LZW的编码程序,以一个文本文件作为输入,得到输出的LZW编码文件。首先调试LZW的编码程序,以一个文本文件作为输入,得到输出的LZW编码文件。
2. 以实验步骤一得到的编码文件作为输入,编写LZW的解码程序。在写解码程序时需 要对关键语句加上注释,并说明进行何操作。在实验报告中重点说明当前码字在词典中 不存在时应如何处理并解释原因。
编码转换成功
3、选择至少十种不同格式类型的文件,使用LZW编码器进行压缩得到输出的压缩比特 流文件。对各种不同格式的文件进行压缩效率的分析。