- // LZW.cpp : 定义控制台应用程序的入口点。
- //
- #include "stdafx.h"
- #pragma warning (disable:4996)
- /* Basic LZW Data Compression program published in DDJ October 1989 issue.
- * Original Author: Mark R. Nelson
- * Updated by: Shawn M. Regan, January 1990
- * Added: - Method to clear table when compression ratio degrades
- * - Self adjusting code size capability (up to 14 bits)
- * Updated functions are marked with "MODIFIED". main() has been updated also
- * Compile with -ml (large model) for MAX_BITS == 14 only
- */
- #define INIT_BITS 9
- #define MAX_BITS 14 /* Do not exceed 14 with this program */
- #define HASHING_SHIFT MAX_BITS - 8
- #if MAX_BITS == 14 /* Set the table size. Must be a prime */
- #define TABLE_SIZE 18041 /* number somewhat larger than 2^MAX_BITS.*/
- #elif MAX_BITS == 13
- #define TABLE_SIZE 9029
- #else
- #define TABLE_SIZE 5021
- #endif
- #define CLEAR_TABLE 256 /* Code to flush the string table */
- #define TERMINATOR 257 /* To mark EOF Condition, instead of MAX_VALUE */
- #define FIRST_CODE 258 /* First available code for code_value table */
- #define CHECK_TIME 100 /* Check comp ratio every CHECK_TIME chars input */
- #define MAXVAL(n) (( 1 <<( n )) -1) /* max_value formula macro */
- int *currnt_code; /* This is the code value array */
- unsigned int *prefix_code; /* This array holds the prefix codes */
- unsigned char *append_char; /* This array holds the appended chars */
- unsigned char decode_stack[4000]; /* This array holds the decoded string */
- unsigned long bytes_in = 0;
- unsigned long bytes_out = 0; /* Used to monitor compression ratio */
- unsigned long checkpoint= CHECK_TIME; /* For compression ratio monitoring */
- int num_bits = INIT_BITS; /* Starting with 9 bit codes */
- int max_code = 0; /* old MAX_CODE */
- void Encode(FILE *input, FILE *output);
- void Decode(FILE *input, FILE *output);
- void output_code(FILE *output, unsigned int code);
- int find_match(int hash_prefix, unsigned int hash_character);
- char *decode_string(unsigned char *buffer, unsigned int code);
- unsigned int input_code(FILE *input);
- int main(int argc, char *argv[]){
- FILE *EncodeFile = NULL;
- FILE *DecodeFile = NULL;
- FILE *LZWTempFile = NULL;
- __try {
- //分配压缩缓冲区
- currnt_code = (int*)malloc( TABLE_SIZE * sizeof(unsigned int) );
- prefix_code = (unsigned int*)malloc( TABLE_SIZE * sizeof(unsigned int) );
- append_char = (unsigned char*)malloc( TABLE_SIZE * sizeof(unsigned char) );
- if ( currnt_code == NULL || prefix_code == NULL || append_char == NULL ){
- __leave;
- }
- if ( argc < 2 ){
- __leave;
- }
- //获取将要压缩的文件名称
- EncodeFile = fopen( argv[1], "rb" );
- LZWTempFile = fopen( "test.lzw","wb" );
- if ( EncodeFile == NULL || LZWTempFile == NULL ){
- __leave;
- }
- max_code = MAXVAL( num_bits ); /* Initialize max_value & max_code */
- Encode( EncodeFile, LZWTempFile ); /* Call compression routine */
- fclose( LZWTempFile );
- //重新打开压缩数据,还原成原始文件
- LZWTempFile = fopen( "test.lzw", "rb" );
- DecodeFile = fopen( "test.out", "wb" );
- if ( LZWTempFile == NULL || DecodeFile == NULL ){
- __leave;
- }
- num_bits = INIT_BITS; /* Re-initialize for expansion */
- max_code = MAXVAL(num_bits);
- Decode( LZWTempFile, DecodeFile ); /* Call expansion routine */
- }
- __finally {
- if( EncodeFile != NULL ){
- fclose( EncodeFile );
- }
- if( DecodeFile != NULL ){
- fclose( DecodeFile );
- }
- if( LZWTempFile != NULL ){
- fclose( LZWTempFile );
- }
- if( prefix_code != NULL ){
- free( prefix_code );
- }
- if( append_char != NULL ){
- free( append_char );
- }
- if( currnt_code != NULL ){
- free( currnt_code );
- }
- }
- }
- /* MODIFIED This is the new compression routine. The first two 9-bit codes
- * have been reserved for communication between the compressor and expander.
- */
- void Encode(FILE *input, FILE *output){
- unsigned int next_code = FIRST_CODE;
- unsigned int character;
- unsigned int string_code;
- unsigned int index;
- int ratio_new; /* New compression ratio as a percentage */
- int ratio_old=100; /* Original ratio at 100% */
- //初始化字符串表。
- for ( int i = 0; i < TABLE_SIZE; i++ ){
- currnt_code[i] = -1;
- }
- puts( "正在压缩中,请稍后..." );
- //获取原文件第一个字节码
- string_code = getc(input);
- while( ( character = getc(input) ) != (unsigned)EOF ){
- index = find_match( string_code, character );
- if( currnt_code[index] != -1 ){
- string_code = currnt_code[index];
- continue;
- }
- if ((int)next_code <= max_code ) {
- currnt_code[index] = next_code++;
- prefix_code[index] = string_code;
- append_char[index] = character;
- }
- output_code( output, string_code ); /* Send out current code */
- string_code = character;
- if( (int)next_code <= max_code ){ /* Is table Full? */
- continue;
- }
- if ( num_bits < MAX_BITS ){ /* Any more bits? */
- max_code = MAXVAL( ++num_bits ); /* Increment code size then */
- }
- if( bytes_in > checkpoint ){ /* At checkpoint? */
- if ( num_bits == MAX_BITS ){
- ratio_new = bytes_out * 100 / bytes_in; /* New compression ratio */
- if( ratio_new > ratio_old ){ /* Has ratio degraded? */
- output_code( output, CLEAR_TABLE ); /* YES,flush string table */
- num_bits = INIT_BITS;
- next_code = FIRST_CODE; /* Reset to FIRST_CODE */
- max_code = MAXVAL(num_bits); /* Re-Initialize this stuff */
- bytes_in = bytes_out = 0;
- ratio_old = 100; /* Reset compression ratio */
- for( int i = 0;i < TABLE_SIZE; i++ ){ /* Reset code value array */
- currnt_code[i] = -1;
- }
- }else{ /* NO, then save new */
- ratio_old = ratio_new; /* compression ratio */
- }
- }
- checkpoint= bytes_in + CHECK_TIME; /* Set new checkpoint */
- }
- }
- output_code( output, string_code ); /* Output the last code */
- if ( next_code == max_code ){ /* Handles special case for bit */
- ++num_bits; /* increment on EOF */
- }
- output_code( output, TERMINATOR ); /* Output the end of buffer code */
- output_code( output, 0 ); /* Flush the output buffer */
- output_code( output, 0 );
- output_code( output, 0 );
- puts("压缩数据完成,输出数据文件名:test.lzw");
- }
- int find_match(int hash_prefix, unsigned int hash_character){
- int offset;
- int index;
- index = hash_character << (HASHING_SHIFT) ^ hash_prefix;
- if ( !index ){
- offset = 1;
- }else{
- offset = TABLE_SIZE - index;
- }
- while( true ){
- if( currnt_code[index] == -1 ){
- return index;
- }
- if( prefix_code[index] == hash_prefix && append_char[index] == hash_character ){
- return index;
- }
- index -= offset;
- if( index < 0 ){
- index += TABLE_SIZE;
- }
- }
- }
- void Decode(FILE *input, FILE *output){
- unsigned char *string;
- unsigned int next_code=FIRST_CODE;
- unsigned int new_code;
- unsigned int old_code;
- int character = 0;
- int clear_flag = 1; /* Need to clear the code value array */
- puts( "正在解压缩中,请稍后..." );
- while( ( new_code = input_code( input ) ) != TERMINATOR ){
- if( clear_flag ){ /* Initialize or Re-Initialize */
- clear_flag = 0;
- old_code = new_code; /* The next three lines have been moved */
- character = old_code; /* from the original */
- putc( old_code, output );
- continue;
- }
- if ( new_code == CLEAR_TABLE ){ /* Clear string table */
- clear_flag = 1;
- num_bits = INIT_BITS;
- next_code = FIRST_CODE;
- max_code = MAXVAL(num_bits);
- continue;
- }
- if( new_code >= next_code ){ /* Check for string+char+string */
- *decode_stack = character;
- string = (unsigned char*)decode_string( decode_stack+1, old_code );
- }else{
- string = (unsigned char*)decode_string( decode_stack, new_code );
- }
- character = *string; /* Output decoded string in reverse */
- while( string >= decode_stack ){
- putc( *string--, output );
- }
- if( (int)next_code <= max_code ){ /* Add to string table if not full */
- prefix_code[next_code] = old_code;
- append_char[next_code++] = character;
- if( next_code == max_code && num_bits < MAX_BITS ){
- max_code = MAXVAL( ++num_bits );
- }
- }
- old_code = new_code;
- }
- puts("解压缩数据完成,输出数据文件名:test.out");
- }
- char *decode_string(unsigned char *buffer, unsigned int code){
- int i = 0;
- while( code > 255 ){
- *buffer++ = append_char[code];
- code = prefix_code[code];
- if ( i++ >= 4000 ) {
- puts( "Error during code expansion" );
- exit(1);
- }
- }
- *buffer = code;
- return (char*)buffer;
- }
- unsigned int input_code(FILE *input){
- unsigned int return_value;
- static int input_bit_count = 0;
- static unsigned long input_bit_buffer = 0L;
- while( input_bit_count <= 24 ){
- input_bit_buffer |= (unsigned long)getc( input ) << ( 24 - input_bit_count );
- input_bit_count += 8;
- }
- return_value = input_bit_buffer >> (32-num_bits);
- input_bit_buffer <<= num_bits;
- input_bit_count -= num_bits;
- return return_value;
- }
- void output_code(FILE *output, unsigned int code){
- static int output_bit_count = 0;
- static unsigned long output_bit_buffer = 0L;
- output_bit_buffer |= (unsigned long) code << (32 - num_bits - output_bit_count);
- output_bit_count += num_bits;
- while( output_bit_count >= 8 ){
- putc( output_bit_buffer >> 24, output );
- output_bit_buffer <<= 8;
- output_bit_count -= 8;
- bytes_out++;
- }
- }