>一书中哈夫曼编码代码及笔记

本文详细介绍了哈夫曼编码的基本原理与实现方法,包括哈夫曼树的构建过程及压缩解压流程。文中还提供了具体的源代码实现,帮助读者深入理解哈夫曼编码在数据压缩中的应用。

由于没能找到书中的源代码,且书中有许多印刷错误,所以我把通过编译的代码摘录如下

/******************Start of BITIO.c **********************/

/*
 * This utility file contains all of routines needed to implement
 * bit oriented routines under either ANSI or K&R C. It needs to
 * be linked with every program used in the book
 */

#include <stdio.h>
#include <stdlib.h>
#include "bitio.h"
#include "errhand.h"

BIT_FILE *OpenOutputBitFile ( name )
char *name;
{
 BIT_FILE *bit_file;

 bit_file = (BIT_FILE *) calloc( 1, sizeof(BIT_FILE) );
 if ( bit_file == NULL )
  return ( bit_file );
 bit_file->file = fopen( name, "wb" );
 bit_file->rack = 0;
 bit_file->mask = 0x80;
 bit_file->pacifier_counter = 0;
 return ( bit_file );
}

BIT_FILE *OpenInputBitFile( name )
char *name;
{
 BIT_FILE *bit_file;

 bit_file = (BIT_FILE *) calloc( 1, sizeof(BIT_FILE) );
 if ( bit_file == NULL )
  return ( bit_file );
 bit_file->file = fopen( name, "rb" );

 bit_file->rack = 0;
 bit_file->mask = 0x80;
 bit_file->pacifier_counter = 0;
 return ( bit_file );
}

void CloseOutputBitFile( bit_file )
BIT_FILE *bit_file;
{
 if ( bit_file->mask != 0x80 )
  if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack )
   fatal_error( "Fatal error in CloseBitFile!/n" );
 fclose( bit_file->file );
 free( (char *)bit_file );
}

void CloseInputBitFile( bit_file )
BIT_FILE *bit_file;
{
 fclose( bit_file->file );
 free( (char*) bit_file );
}

void OutputBit( bit_file, bit )
BIT_FILE *bit_file;
int bit;
{
 if ( bit )
  bit_file->rack |= bit_file->mask;
 bit_file->mask >>= 1;
 if ( bit_file->mask == 0 )
 {
  if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack )
   fatal_error( "Fatal error in OutputBit!/n");
  else
   if ( (bit_file->pacifier_counter++ & 4095 ) == 0 ) /* 4095 eq 111111111111 */
    putc( '.', stdout);
  bit_file->rack = 0;
  bit_file->mask = 0x80;
 }
}

void OutputBits( bit_file, code, count )
BIT_FILE *bit_file;
unsigned long code;
int count;
{
 unsigned long mask;
 
 mask = 1L << ( count - 1 );
 while (mask != 0)
 {
  if ( mask & code )
   bit_file->rack |= bit_file->mask;
  bit_file->mask >>= 1;
  if ( bit_file->mask == 0 )
  {
   if ( putc( bit_file->rack, bit_file->file ) != bit_file->rack )
    fatal_error( "Fatal error in OutputBits!/n");
   else if ((bit_file->pacifier_counter++ & 2047)==0)
    putc( '.', stdout );
   bit_file->rack = 0;
   bit_file->mask = 0x80;
  }
  mask >>= 1;
 }
}

int InputBit( bit_file )
BIT_FILE *bit_file;
{
 int value;

 if ( bit_file->mask == 0x80 )
 {
  bit_file->rack = getc( bit_file->file );
  if ( bit_file->rack == EOF )
   fatal_error( "Fatal error in InputBit!/n");
  if ( ( bit_file->pacifier_counter++ & 2047 ) == 0 )
   putc( '.', stdout );
 }
 value = bit_file->rack & bit_file->mask;
 bit_file->mask >>= 1;
 if ( bit_file->mask == 0 )
  bit_file->mask = 0x80;
 return ( value ? 1 : 0 );
}

unsigned long InputBits( bit_file, bit_count )
BIT_FILE *bit_file;
int bit_count;
{
 unsigned long mask;
 unsigned long return_value;

 mask = 1L << ( bit_count -1 );
 return_value = 0;
 while ( mask != 0)
 {
  if ( bit_file->mask == 0x80 )
  {
   bit_file->rack = getc( bit_file->file );
   if ( bit_file->rack == EOF )
    fatal_error( "Fatal error in InputBit!/n" );
   if ( (bit_file->pacifier_counter++ & 2047 ) == 0 )
    putc( '.', stdout );
  }
  if ( bit_file->rack & bit_file->mask ) /* 如何相应位为1,则置1,恰好就是与mask或 */
   return_value |= mask;
  mask >>= 1;
  bit_file->mask >>=1;
  if ( bit_file->mask == 0 )
   bit_file->mask = 0x80;
 }
 return (return_value);
}

void FilePrintBinary( file, code, bits)
FILE *file;
unsigned int code;
int bits;
{
 unsigned int mask;
 mask = 1 << (bits - 1);
 while ( mask != 0 )
 {
  if ( code & mask )
   fputc( '1', file );
  else
   fputc( '0', file );
  mask >>= 1;
 }
}

/***************************End of BITTO.C*********************/

 

/************************ Start of ERRHAND.C ***********************/

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "errhand.h"

#ifdef __STDC__
void fatal_error( char *fmt, ... )
#else
#ifdef __UNIX__
void fatal_error( fmt )
char *fmt;
va_dcl
#else
void fatal_error( fmt )
#endif
#endif
{
 va_list argptr;
 va_start( argptr, fmt );
 printf( "Fatal error: " );
 vprintf( fmt, argptr );
 va_end( argptr );
 exit(-1);
}
/************************ End of ERRHAND.C ***********************/

 

/********************** Start of HUFF.C  *************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "bitio.h"
#include "errhand.h"
#include "main.h"

/*
 * The NODE structure is a node in the Huffman decoding tree.  It has a
 * count, which is its weight in the tree, and the node numbers of its
 * two children.  The saved_count member of the structure is only
 * there for debugging purposes, and can be safely taken out at any
 * time.  It just holds the intial count for each of the symbols, since
 * the count member is continually being modified as the tree grows.
*/

typedef struct tree_node {
 unsigned int count;
 unsigned int saved_count;
 int child_0;
 int child_1;
}NODE;

/*
 * A Huffman tree is set up for decoding, not encoding.  When encoding,
 * I first walk through the tree and build up a table of codes for
 * each symbol.  The codes are stored in this CODE structure.
*/

typedef struct code {
 unsigned int code;
 int code_bits;
}CODE;

/*
 * The special EOS symbol is 256, the first available symbol after all
 * of the possible bytes.  When decoding, reading this symbol
 * indicates that all of the data has been read in.
*/
#define END_OF_STREAM 256

/*
 * Local function prototypes, defined with or without ANSI prototypes.
*/

#ifdef __STDC__

void count_bytes( FILE *input, unsigned long *long_counts );
void scale_counts( unsigned long *long_counts, NODE *nodes );
int build_tree( NODE *nodes );
void convert_tree_to_code( NODE *nodes,
         CODE *codes,
         unsigned int code_so_far,
         int bits,
         int node );
void output_counts( BIT_FILE *output, NODE *nodes );
void input_counts( BIT_FILE *input, NODE *nodes );
void print_model( NODE *nodes, CODE *codes );
void compress_data( FILE *input, BIT_FILE *output, CODE *codes );
void expand_data( BIT_FILE *input, FILE *output, NODE *nodes,
      int root_node );
void print_char( int c );
#else /* __STDC__ */

void count_bytes();
void scale_counts();
int build_tree();
void convert_tree_to_code();
void output_counts();
void input_counts();
void print_model();
void compress_data();
void expand_data();
void print_char();

#endif /* __STDC__ */


/*
 * These two strings are used by MAIN-C.C and MAIN-E.C to print
 * messages of importance to the use of the program.
*/

char *CompressionName = "static order 0 model with Huffman coding";
char *Usage = "infile outfile [-d]/n/n Specifying -d will dump the modeling data/n";

/*
 * CompressFile is the compression routine called by MAIN-C.C.  It
 * looks for a single additional argument to be passed to it from
 * the command line:  "-d".  If a "-d" is present, it means the
 * user wants to see the model data dumped out for debugging
 * purposes.
 *
 * This routine works in a fairly straightforward manner.  First,
 * it has to allocate storage for three different arrays of data.
 * Next, it counts all the bytes in the input file.  The counts
 * are all stored in long int, so the next step is to scale them down
 * to single byte counts in the NODE array.  After the counts are
 * scaled, the Huffman decoding tree is built on top of the NODE
 * array.  Another routine walks through the tree to build a table
 * of codes, one per symbol. Finally, when the codes are all ready,
 * compressing the file is a simple matter.  After the file is
 * compressed, the storage is freed up, and the routine returns.
 *
*/
void CompressFile( input, output, argc, argv )
FILE *input;
BIT_FILE *output;
int argc;
char *argv[];
{
 unsigned long *counts;
 NODE *nodes;
 CODE *codes;
 int root_node;

 counts = ( unsigned long * )calloc(256, sizeof(unsigned long));
 if ( counts==NULL )
  fatal_error( "Error allocating counts array/n" );
 if ( ( nodes = (NODE *)calloc(514, sizeof( NODE ) ) ) == NULL )
  fatal_error( "Error allocating counts nodes/n" );
 if ( ( codes = (CODE *)calloc(257, sizeof( CODE ) ) ) == NULL )
  fatal_error( "Error allocating counts codes/n" );
 count_bytes( input, counts );
 scale_counts( counts, nodes );
 output_counts( output, nodes );
 root_node = build_tree( nodes );
 convert_tree_to_code( nodes, codes, 0, 0, root_node );
 if ( argc>0 && strcmp( argv[0], "-d" ) == 0 )
  print_model( nodes, codes );
 compress_data( input, output, codes );
 free( (char *) counts );
 free( (char *) nodes );
 free( (char *) codes );
}

/*
 * ExpandFile is the routine called by MAIN-E.C to expand a file that
 * has been compressed with order 0 Huffman coding.  This routine has
 * a simpler job than that of the Compression routine.  All it has to
 * do is read in the counts that have been stored in the compressed
 * file, then build the Huffman tree.  The data can then be expanded
 * by reading in a bit at a time from the compressed file.  Finally,
 * the node array is freed and the routine returns.
 *
*/

void ExpandFile( input, output, argc, argv )
BIT_FILE *input;
FILE *output;
int argc;
char *argv[];
{
 NODE *nodes;
 int root_node;

 if ( (nodes = (NODE *)calloc(514, sizeof( NODE ) ) ) ==  NULL )
  fatal_error( "Error allocating nodes array/n" );
 input_counts( input, nodes );
 root_node = build_tree( nodes );
 if ( argc>0 && strcmp( argv[0], "-d" ) == 0 )
  print_model( nodes, 0 );
 expand_data( input, output, nodes, root_node );
 free( (char *)nodes );
}
/*
 * In order for the compressor to build the same model, I have to
 * store the symbol counts in the compressed file so the expander can
 * read them in. In order to save space, I don't save all 256 symbols
 * unconditionally. The format used to store counts looks like this:
 *
 * start, stop, counts, start, stop, counts, ... 0
 *
 * This means that I store runs of counts, until all the non-zero
 * counts have been stored. At this time the list is terminated by
 * storing a start value of 0. Note that at least 1 run of counts has
 * to be stored, so even if the first start value is 0, I read it in.
 * It also means that even in an empty file that has no counts, I have
 * to pass at least one count, which will have a value of 0.
 *
 * In order to efficiently use this format, I have to identify runs of
 * non-zero counts. Because of the format used, I don't want to stop a
 * run because of just one or two zeros in the count stream. So I have
 * to sit in a loop looking for strings of three or more zero values
 * in a row.
 *
 * This is simple in concept, but it ends up being one of the most
 * complicated routines in the whole program. A routine that just
 * writes out 256 values without attempting to optimize would be much
 * simpler, but would hurt compression quite a bit on small files.
 *
*/
void output_counts( output, nodes )
BIT_FILE *output;
NODE *nodes;
{
 int first;
 int last;
 int next;
 int i;
 first = 0;
 while ( first<255 && nodes[first].count == 0 )
  first++;
/*
 * Each time I hit the start of the loop,  I assume that first is the
 * start of a run of non-zero values.  The rest of the loop is
 * concerned with finding the value for last, which is the end of the
 * run, and the value of next, which is the start of the next run.
 * At the end of the loop, I assign next to first, so it starts in on
 * the next run.
*/
 for ( ; first<256; first = next)
 {
  last = first + 1;
  for (;;)
  {
   for (; last<256; last++ )
    if ( nodes[last].count == 0 )
     break;
   last--;
   for ( next = last + 1; next<256; next++ )
    if ( nodes[next].count != 0 )
     break;
   if ( next > 255 )
    break;
   if ( (next-last)>3 )
    break;
   last = next;
  };
/*
 * Here is where I output first, last, and all the counts in between.
*/
  if ( putc( first, output->file ) != first )
   fatal_error( "Error writing byte counts/n" );
  if ( putc( last, output->file ) != last )
   fatal_error( "Error writing byte counts/n" );
  for ( i=first; i<=last; i++ )
  {
   if ( putc( nodes[i].count, output->file ) != (int) nodes[i].count )
    fatal_error( "Error writing byte counts/n" );
  }
 }
 if ( putc(0, output->file ) != 0 )
  fatal_error( "Error writing byte counts/n" );
}

/*
 * When expanding, I have to read in the same set of counts.  This is
 * quite a bit easier that the process of writing them out, since no
 * decision making needs to be done.  All I do is read in first, check
 * to see if I am all done, and if not, read in last and a string of
 * counts.
*/
void input_counts( input, nodes )
BIT_FILE *input;
NODE *nodes;
{
 int first;
 int last;
 int i;
 int c;

// for ( i=0; i<256; i++ ) //because use calloc ,here we may not operate
//  nodes[i].count = 0;
 
 if ( (first = getc( input->file ) ) == EOF )
  fatal_error( "Error reading byte counts/n");
 if ( (last = getc( input->file ) ) == EOF )
  fatal_error( "Error reading byte counts/n");
 for (;;)
 {
  for ( i=first; i<=last; i++)
   if ( (c=getc(input->file))==EOF )
    fatal_error( "Error reading byte counts/n");
   else
    nodes[i].count = (unsigned int)c;
  if ( (first = getc( input->file ) ) == EOF )
   fatal_error( "Error reading byte counts/n");
  if ( first == 0 )
   break;
  if ( (last = getc( input->file ) ) == EOF )
   fatal_error( "Error reading byte counts/n");
 }
 nodes[END_OF_STREAM].count = 1;
}

/*
 * This routine counts the frequency of occurence of every byte in
 * the input file.  It marks the place in the input stream where it
 * started, counts up all the bytes, then returns to the place where
 * it started.  In most C implementations, the length of a file
 * cannot exceed an unsigned long, so this routine should always
 * work.
*/
#ifndef SEEK_SET
#define SEEK_SET 0
#endif

void count_bytes( input, counts )
FILE *input;
unsigned long *counts;
{
 long input_marker; // record the file position
 int c;
 
 input_marker = ftell( input );
 while( ( c=getc( input ) )!=EOF )
  counts[c]++;
 fseek( input, input_marker, SEEK_SET );
}

/*
 * In order to limit the size of my Huffman codes to 16 bits, I scale
 * my counts down so they fit in an unsigned char, and then store them
 * all as initial weights in my NODE array.  The only thing to be
 * careful of is to make sure that a node with a non-zero count doesn't
 * get scaled down to 0.  Nodes with values of 0 don't get codes.
*/
void scale_counts( counts, nodes )
unsigned long *counts;
NODE *nodes;
{
 unsigned long max_count;
 int i;

 max_count = 0;
 for ( i=0; i<256; i++ )
  if ( counts[i]>max_count )
   max_count = counts[i];
 if ( max_count == 0 )
 {
  counts[0] = 1;
  max_count = 1;
 }
 max_count = max_count /255;
 max_count = max_count + 1;
 for ( i=0; i<256; i++ )
 {
  nodes[i].count = (unsigned int)(counts[i]/max_count);
  if ( nodes[i].count == 0 && counts[i]!=0 )
   nodes[i].count = 1;
 }
 nodes[ END_OF_STREAM ].count = 1;
}

/*
 * Building the Huffman tree is fairly simple.  All of the active nodes
 * are scanned in order to locate the two nodes with the minimum
 * weights.  These two weights are added together and assigned to a new
 * node.  The new node makes the two minimum nodes into its 0 child
 * and 1 child.  The two minimum nodes are then marked as inactive.
 * This process repeats until there is only one node left, which is
 * the root node.  The tree is done, and the root node is passed back
 * to the calling routine.
 *
 * Node 513 is used here to arbitratily provide a node with a guaran
 * teed maximum value.  It starts off being min_1 and min_2.  After all
 * active nodes have been scanned, I can tell if there is only one
 * active node left by checking to see if min_1 is still 513.
*/
int build_tree( nodes )
NODE *nodes;
{
 int next_free;
 int i;
 int min_1;
 int min_2;

 nodes[513].count = 0xffff;
 for ( next_free = END_OF_STREAM + 1; ; next_free++ )
 {
  min_1 = 513;
  min_2 = 513;
  for ( i=0; i<next_free; i++ )
   if ( nodes[i].count!=0 )
   {
    if ( nodes[i].count<nodes[min_1].count )
    {
     min_2 = min_1;
     min_1 = i;
    }
    else if ( nodes[i].count<nodes[min_2].count)
     min_2 = i;
   }
  if ( min_2 == 513 )
   break;
  nodes[next_free].count = nodes[min_1].count+nodes[min_2].count;
  nodes[min_1].saved_count = nodes[min_1].count;
  nodes[min_1].count = 0;
  nodes[ min_2 ].saved_count = nodes[ min_2 ].count;
        nodes[ min_2 ].count = 0;
  nodes[next_free].child_0 = min_1;
  nodes[next_free].child_1 = min_2;
 }
 next_free--;
 nodes[next_free].saved_count = nodes[next_free].count;
 return (next_free);
}

/*
 * Since the Huffman tree is built as a decoding tree, there is
 * no simple way to get the encoding values for each symbol out of
 * it.  This routine recursively walks through the tree, adding the
 * child bits to each code until it gets to a leaf.  When it gets
 * to a leaf, it stores the code value in the CODE element, and
 * returns.
*/

void convert_tree_to_code( nodes, codes, code_so_far, bits, node )
NODE *nodes;
CODE *codes;
unsigned int code_so_far;
int bits;
int node;
{
 if ( node<=END_OF_STREAM )
 {
  codes[node].code = code_so_far;
  codes[node].code_bits = bits;
  return;
 }
 code_so_far <<=1;
 bits++;
 convert_tree_to_code( nodes, codes, code_so_far, bits,
        nodes[node].child_0 );
 convert_tree_to_code( nodes, codes, code_so_far|1,
        bits, nodes[node].child_1 );
}

/*
 * If the -d command line option is specified, this routine is called
 * to print out some of the model information after the tree is built.
 * Note that this is the only place that the saved_count NODE element
 * is used for anything at all, and  in this case it is just for
 * diagnostic information.  By the time I get here, and the tree has
 * been built, every active element will have 0 in its count.
*/
void print_model( nodes, codes )
NODE *nodes;
CODE *codes;
{
 int i;

 for ( i=0; i<513; i++ )
 {
  if ( nodes[i].saved_count != 0 )
  {
   printf( "node=");
   print_char(i);
   printf( " count=%3d", nodes[i].saved_count );
   printf( " child_0=" );
   print_char( nodes[i].child_0 );
   printf( " child_1=" );
   print_char( nodes[i].child_1 );
   if ( codes&&i<=END_OF_STREAM )
   {
    printf( " Huffman code=" );
    FilePrintBinary( stdout, codes[i].code,
         codes[i].code_bits );
   }
   printf( "/n" );
  }
 }
}

/*
 * The print_model routine uses this function to print out node num
 * bers.  The catch is if it is a printable character, it gets printed
 * out as a character.  This makes the debug output a little easier to
 * read.
*/
void print_char( c )
int c;
{
 if ( c>=0x20&&c<127 )
  printf("%c", c);
 else
  printf("%3d", c);
}

/*
* Once the tree gets built, and the CODE table is built, compressing
* the data is a breeze.  Each byte is read in, and its corresponding
* Huffman code is sent out.
*/
void compress_data( input, output, codes )
FILE *input;
BIT_FILE *output;
CODE *codes;
{
 int c;

 while ((c = getc( input ) ) != EOF )
  OutputBits( output, (unsigned long) codes[c].code, codes[c].code_bits );
 OutputBits( output, (unsigned long) codes[END_OF_STREAM].code, codes[END_OF_STREAM].code_bits );
}

/*
 * Expanding compressed data is a little harder than the compression
 * phase.  As each new symbol is decoded, the tree is traversed,
 * starting at the root node, reading a bit in, and taking either the
 * child_0 or child_1 path.  Eventually, the tree winds down to a
 * leaf node, and the corresponding symbol is output.  If the symbol
 * is the END_OF_STREAM symbol, it doesn't get written out, and
 * instead the whole process terminates.
*/
void expand_data( input, output, nodes, root_node )
BIT_FILE *input;
FILE *output;
NODE *nodes;
int root_node;
{
 int node;

 for(;;)
 {
  node = root_node;
  do {
   if ( InputBit(input) )
    node = nodes[node].child_1;
   else
    node = nodes[node].child_0;
  }while( node > END_OF_STREAM );
  if ( node == END_OF_STREAM )
   break;
  if ( (putc( node, output) ) != node )
   fatal_error( "Error trying to write byte to output" );
 }
}
/******************************End of HUFF.C***************************/


/***********************Start of MAIN-C.C*****************************/
/*
 * This is the driver program used when testing compression algorithms.
 * In order to cut back on repetitive code, this version of main is
 * used with all of the compression routines. In order to turn it into
 * a real pragram, it needs to have another module that supplies one
 * routine and two strings, namely:
 *
 * void CompressFile( File *input, BIT_FILE *output,
 *      int argc, char*argv );
 * char *Usage;
 * char *CompressionName;
 *
 * The main() routine supplied here has the job of checking for valid
 * input and output files, opening them, and then calling the
 * compression routine. If the files are not present, or no arguments
 * are supplied, it prints out an error message, which includes the
 * Usage string supplied by the compression module. All of the
 * routines and strings needed by this routine are defined in the
 * main.h header file.
 *
 * After this is built into a compression program of any sort, the
 * program can be called like this:
 *
 * main-c infile outfile [ options ]
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "bitio.h"
#include "errhand.h"
#include "main.h"
#ifdef __STDC__

void usage_exit( char *prog_name );
void print_ratios( char *input, char *output);
long file_size( char *name );

#else

void usage_exit();
void print_ratios();
long file_size();

#endif

int main( argc, argv )
int argc;
char *argv[];
{
 BIT_FILE *output;
 FILE *input;

 setbuf( stdout, NULL );
 if ( argc < 3 )
  usage_exit( argv[0] );
 input = fopen((char*)argv[1], "rb");
 if ( input == NULL )
  fatal_error( "Error opening %s for input/n", argv[1] );
 output = OpenOutputBitFile( argv[2] );
 if ( output == NULL )
  fatal_error( "Error opening %s for output/n", argv[2] );
 printf( "/nCompressing %s to %s/n", argv[1], argv[2] );
 printf( "Using %s/n", CompressionName );
// argc -= 3;
// argv += 3;
 CompressFile( input, output, argc-3, argv+3 );
 CloseOutputBitFile( output );
 fclose( input );
 print_ratios( argv[1], argv[2] );
 return 0;
}

/*
 * This routine just wants to print out the usage message that is
 * called for when the program is run with no parameters. The first
 * part of the Usage statement is supposed to be just the program
 * name. argv[0] generally holds the fully qualified path name
 * of the program being run. I make a half-hearted attempt to strip
 * out that path info and file extension before printing it. It should
 * get the general idea across.
 */

void usage_exit( prog_name )
char *prog_name;
{
 char *short_name;
 char *extension;

 short_name = strrchr( prog_name, '//' );
 if ( short_name == NULL )
  short_name = strrchr( prog_name, '/' );
 if ( short_name == NULL )
  short_name = strrchr( prog_name, ':' );
 if ( short_name != NULL )
  short_name++;
 else
  short_name = prog_name;
 extension = strrchr( short_name, '.' );
 if ( extension != NULL )
  *extension = '/0';
 printf( "/nUsage: %s %s/n", short_name, Usage );
 exit( 0 );
}

/*
 * This routine is used by main to get the size of a file after it has
 * been closed. It does all the work, and returns a long. The main
 * program gets the file size for the plain text, and the size of the
 * compressed file, and prints the ratio.
 */
#ifndef SEEK_END
#define SEEK_END 2
#endif

long file_size( name )
char *name;
{
 long eof_ftell;
 FILE *file;

 file = fopen( name, "r" );
 if ( file == NULL )
  return (0L);
 fseek( file, 0L, SEEK_END );
 eof_ftell = ftell( file );
 fclose( file );
 return (eof_ftell ) ;
}

/*
 * This routine prints out the compression ratios after the input and
 * output files have been closed.
 */
void print_ratios( input, output )
char *input;
char *output;
{
 long input_size;
 long output_size;
 int ratio;
 input_size = file_size( input );
 if ( input_size == 0 )
  input_size = 1;
 printf( "/nInput bytes:        %ld/n", input_size );
 output_size = file_size( output );
 if ( output_size == 0 )
         output_size = 1;
 printf( "Output bytes:      %ld/n", output_size );
 ratio = 100 - (int)(output_size * 100L / input_size );
    printf( "Compression ratio:  %d%%/n", ratio );
}

/*********************** End of MAIN-C.C *************************/

/******************Start of BITIO.h**************************/
#ifndef _BITIO_H
#define _BITIO_H
#include <stdio.h>

/* rack:
   contains the current byte of data either read in from
   the file or waiting to be written out to the file
   mask:
   contains a single bit mask used either to set or clear
   the current output bit or to mask in the current input bit.
*/

typedef struct bit_file {
     FILE *file;
     unsigned char mask;
     int rack;
     int pacifier_counter;
} BIT_FILE;


#ifdef __STDC__
/* The routine open input files for bit I/O */
BIT_FILE *OpenInputBitFile( char *name );

/* The routine open output files for bit I/O */
BIT_FILE *OpenOutputBitFile( char *name );

void OutputBit( BIT_FILE *bit_file, int bit );
void OutputBits( BIT_FILE *bit_file, unsigned long code, int count);
int  InputBit( BIT_FILE *bit_file );
unsigned long InputBits( BIT_FILE *bit_file, int bit_count );

void CloseInputBitFile( BIT_FILE *bit_file );
void CloseOutputBitFile( BIT_FILE *bit_file );

void FilePrintBinary( FILE *file, unsigned int code, int bits );

#else /* __STDC__ */

BIT_FILE* OpenInputBitFile();
BIT_FILE* OpenOutputBitFile();
void OutputBit();
void OutputBits();
int  InputBit();
int  InputBit();
unsigned long InputBits();
void CloseInputBitFile();
void CloseOutputBitFile();
void FilePrintBinary();
#endif /* __STDC__ */

#endif /* _BITIO_H */

/**********************End of BITIO.h**********************/

/********************** Start of ERRHAND.H **********************/
#ifndef _ERRHAND_H
#define _ERRHAND_H

#ifdef __STDC__

void fatal_error( char *fmt, ... );

#else /* __STDC__ */

void fatal_error();

#endif /* __STDC__ */

#endif /* _ERRHAND_H */

/********************** End of ERRHAND.H *************************/

/********************** Start of MAIN.H ***********************/

#ifndef _MAIN_H
#define _MAIN_H

#ifdef _STDC_
void CompressFile( FILE *input, BIT_FILE *output, int argc, char *argv[] );
void ExpandFile( BIT_FILE *input, FILE *output, int argc, char *argv[] );

#else /* __STDC__ */

void CompressFile();
void ExpandFile();

#endif /* __STDC__ */

extern char *Usage;
extern char *CompressionName;
#endif /* _MAIN_H */

/************************* End of MAIN.H ************************/

/********************************Start of MAIN_E.c******************************/
/* This driver program tests compression algorithms. To cut back on
 * repetitive code, this version of main is used with all the expansion
 * routines. The main() routine supplied here checks for valid input and
 * output files, opens them, then calls the compression routine.
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "bitio.h"
#include "errhand.h"
#include "main.h"

#ifdef __STDC__
void usage_exit( char *prog_name );
#else
void usage_exit();
#endif

int main( argc, argv )
int argc;
char *argv[];
{
 FILE *output;
 BIT_FILE *input;
 
 setbuf( stdout, NULL );
 if ( argc < 3 )
  usage_exit( argv[0] );
 input = OpenInputBitFile( argv[1] );
 if ( input == NULL )
  fatal_error( "Error opening %s for input/n", argv[1] );
 output = fopen( argv[2], "wb" );
 if ( output == NULL )
  fatal_error( "Error opening %s for output/n", argv[2] );
 printf( "/nExpanding %s to %s for output/n", argv[2], argv[1] );
 printf( "Using %s/n", CompressionName );
 //argc -= 3;
 //argv += 3;
 ExpandFile( input, output, argc-3, argv+3 );
 CloseInputBitFile( input );
 fclose( output );
 putc('/n', stdout );
 return 0;
}

/*
 * This routine wants to print out the usage message called for when the
 * program is run with no parameters. The first part of the Usage statement
 * is supposed to be just the programname. argv[0] generally holds
 * the fully qualified path name of the program being run.
 */
void usage_exit( prog_name )
char *prog_name;
{
 char *short_name;
 char *extension;

  short_name = strrchr( prog_name, '//' );
 if ( short_name == NULL )
  short_name = strrchr( prog_name, '/' );
 if ( short_name == NULL )
  short_name = strrchr( prog_name, ':' );
 if ( short_name != NULL )
  short_name++;
 else
  short_name = prog_name;
 extension = strrchr( short_name, '.' );
 if ( extension != NULL )
  *extension = '/0';
 printf( "/nUsage: %s %s/n", short_name, Usage );
 exit( 0 );
}
/********************** End of MAIN-E.C**************************/

学习笔记:

一个字符的熵被定义为它概率的负对数.为能以位为单位计算一个信息的熵,我们以2为底的对数定义: Number of bits = - Log base 2 (probability),而每个信息的熵刚是简单地通过把所有的字符的熵加总.
假设,A出现15,而一个消息一共是39个字符,那么其熵就是-log base 2 (15/39) = 1.38

统计模型:每次读入一个字符就使用该字符出现的概率来编码.

基于字典模型:使用一个编码代替字符串.

Order0静态表需要256字节,Order1静态表则需要65536(256*256)个字节.

因此,采用自适应的方法解决静态表对于不满足的输入字符流的不好压缩性能”.但是自适应也有缺点,就是自适应需要一个过程,一般要求1000个字符后,才能产生出好的压缩比率.而自适模型有一个优点就是,就是能够很好地适应局部压缩条件.

在信息论开始时一个最流行的观念就是如果在一个信息中的符号出现概念已知,那么应该存在一个字符的编码方式使得信息的保存仅需更少的空间.

Given the probabilities, a table of codes could be constructed that has several important properties:

  Different codes have different numbers of bits.

  Codes for symbols with low probabilities have more bits, and codes for symbols with high probabilities have fewer bits.

  Though the codes are of different bit lengths, they can be uniquely decoded.

 

The Shannon-Fano Algorithm

A Shannon-Fano tree is built according to a specification designed to define an effective code table. The actual algorithm is simple:

1.  For a given list of symbols, develop a corresponding list of probabilities or frequency counts so that each symbol’s relative frequency of occurrence is known.

2.  Sort the lists of symbols according to frequency, with the most frequently occuring symbols at the top and the least common at the bottom.

3.  Divide the list into two parts, with the total frequency counts of the upper half being as close to the total of the bottom half as possible.

4.  The upper half of the list is assigned the binary digit 0, and the lower half is assigned the digit 1. This means that the codes for the symbols in the first half will all start with 0, and the codes in the second half will all start with 1.

5.  Recursively apply the steps 3 and 4 to each of the two halves, subdividing groups and adding bits to the codes until each symbol has become a corresponding code leaf on the tree.

 

Huffman codes have the unique prefix attribute, which means they can be correctly decoded despite being variable length.

 

The Shannon-Fano tree is built from the top down, starting by assigning the most significant bits to each code and working down the tree until finished. Huffman codes are built from the bottom up, starting with the leaves of the tree and working progressively closer to the root.

 

The procedure for building the tree is simple and elegant. The individual symbols are laid out as a string of leaf nodes that are going to be connected by a binary tree. Each node has a weight, which is simply the frequency or probability of the symbol’s appearance. The tree is then built with the following steps:

  The two free nodes with the lowest weights are located.

  A parent node for these two nodes is created. It is assigned a weight equal to the sum of the two child nodes.

  The parent node is added to the list of free nodes, and the two child nodes are removed from the list.

  One of the child nodes is designated as the path taken from the parent node when decoding a 0 bit. The other is arbitrarily set to the 1 bit.

  The previous steps are repeated until only one free node is left. This free node is designated the root of the tree.

 

To determine the code for a given symbol, we have to walk from the leaf node to the root of the Huffman tree, accumulating new bits as we pass through each parent node. Unfortunately, the bits are returned to us in the reverse order that we want them, which means we have to push the bits onto a stack, then pop them off to generate the code.

 

the codes have the unique prefix property. Since no code is a prefix to another code

 

And Huffman was able to prove that this coding method cannot be improved on with any other integral bit-width coding stream.

 

解决EOF压缩的方法有两种,一种是采用特殊的代码处理这一问题,第二种就是把文件的大小当代压缩数据进行一起压缩.

在哈夫曼编码中,由于压缩和解压的哈夫曼树是相同的,采用缩放并只保存每个字母对应的频率我们就可以在解压中重构哈夫曼树.同时由于每个字母的频率通过缩放后均采用无符号字符的精度保存,所以相对于一个4K的哈夫曼树结构是很省的.另一方面,我们也发现,文件中可能并不全部出现所有的字母,所以我们保存也只需要保存相应的字母即可,其余则频率值为0.这样我们就可以通过以下的结构来保存,首先指出范围,起始的ASCII,和终止的ASCII,接着就是在这个范围里面的每个字母对应的频率值.最后字母表以0为终结.所以一张以压缩文件头部的字母表可以如下所示:

 

next free node will be at 257.

There is an extra member in the node structure called saved_count. When a node is taken off the active list by having its count set to zero, the previous count is stored in saved_count.

Nodes with a weight of 0 are considered to be unused and will never again be selected to represent a minimum.

 

Using the Tree

解压

l         Starting at the root node, a single bit at a time is read in by the decoder.

l         If the bit is a 0, the next node is the one pointed to by the child_0 index.

l         If the bit is a 1, the next node is the one pointed to by the child_1 index.

l         If the new node is 256 or less, we have reached a leaf of the tree and can output the corresponding symbol.

l         If the symbol was the special end-of-stream symbol, we can exit instead of sending it out.

 

压缩

这里存在一个问题就是对于每个字符要想得到其压缩码,我们就必须从树的叶子结点开始向上一直找到父亲结点,这可以通过增加一个指向双亲结点的指针实现,然而这还要增加一个栈用于保存在遍历过程中的01编码,然后在到达根结点时,再一次性出栈输出压缩码.这又增加缓冲空间.

存在更好解决这个问题的方法,那就是通过一次的递归回溯遍历树,得到每个字符所对应的压缩码表,那么在压缩过程中就是简单的对表的查找,这就能提高效率.

 

 

 

【最优潮流】直流最优潮流(OPF)课设(Matlab代码实现)内容概要:本文档主要围绕“直流最优潮流(OPF)课设”的Matlab代码实现展开,属于电力系统优化领域的教学与科研实践内容。文档介绍了通过Matlab进行电力系统最优潮流计算的基本原理与编程实现方法,重点聚焦于直流最优潮流模型的构建与求解过程,适用于课程设计或科研入门实践。文中提及使用YALMIP等优化工具包进行建模,并提供了相关资源下载链接,便于读者复现与学习。此外,文档还列举了大量与电力系统、智能优化算法、机器学习、路径规划等相关的Matlab仿真案例,体现出其服务于科研仿真辅导的综合性平台性质。; 适合人群:电气工程、自动化、电力系统及相关专业的本科生、研究生,以及从事电力系统优化、智能算法应用研究的科研人员。; 使用场景及目标:①掌握直流最优潮流的基本原理与Matlab实现方法;②完成课程设计或科研项目中的电力系统优化任务;③借助提供的丰富案例资源,拓展在智能优化、状态估计、微电网调度等方向的研究思路与技术手段。; 阅读建议:建议读者结合文档中提供的网盘资源,下载完整代码与工具包,边学习理论边动手实践。重点关注YALMIP工具的使用方法,并通过复现文中提到的多个案例,加深对电力系统优化问题建模与求解的理解。
本程序为针对江苏省中医院挂号系统设计的自动化预约工具,采用Python语言编写。项目压缩包内包含核心配置文件与主执行文件。 配置文件conf.ini中,用户需根据自身情况调整身份验证参数:可填写用户名与密码,或直接使用有效的身份令牌(若提供令牌则无需填写前两项)。其余配置项通常无需更改。 主文件main.py包含两项核心功能: 1. 预约测试模块:用于验证程序运行状态及预约流程的完整性。执行后将逐步引导用户选择院区、科室类别、具体科室、医师、就诊日期、时段及具体时间,最后确认就诊卡信息。成功预约后将返回包含预约编号及提示信息的结构化结果。 2. 监控预约模块:可持续监测指定医师在设定日期范围内的可预约时段。旦检测到空闲号源,将自动完成预约操作。该模块默认以10秒为间隔循环检测,成功预约后仍会持续运行直至手动终止。用户需注意在预约成功后及时完成费用支付以确认挂号。 程序运行时会显示相关技术支持信息,包括采用的验证码识别组件及训练数据来源。操作界面采用分步交互方式,通过输入序号完成各环节选择。所有网络请求均经过结构化处理,返回结果包含明确的状态码与执行耗时。 资源来源于网络分享,仅用于学习交流使用,请勿用于商业,如有侵权请联系我删除!
### 哈夫曼编码 PDF 文档获取方式 对于希望下载有关哈夫曼编码的 PDF 文档的学习者来说,可以通过多种途径找到高质量的相关资料。以下是几种常见的方法: #### 1. 学术资源平台 许多学术网站提供免费或付费的技术文档和论文,这些资源通常具有较高的权威性和准确性。例如,在 Google Scholar 中输入关键词“Huffman Coding”可以检索到大量研究文章和技术说明[^1]。 #### 2. 开源教育项目 像 MIT OpenCourseWare 或 Stanford Online 提供的课程材料中可能包含详细的 Huffman 编码讲解及其应用实例。这类开放教育资源不仅限于理论部分还涉及实际编程练习指导[^2]。 #### 3. 技术博客与个人主页分享 些技术爱好者会在自己的博客或者 GitHub 上发布精心整理过的学习笔记甚至是完整的讲义形式文件(PDF),通过搜索引擎尝试查找特定作者的作品也是种有效手段[^3]。 #### 示例代码片段展示基本原理 下面给出段简单的 Python 实现来帮助理解该算法的核心概念: ```python import heapq from collections import defaultdict, namedtuple Node = namedtuple('Node', ['frequency', 'char', 'left', 'right']) def huffman_tree(string): freq_dict = defaultdict(int) for char in string: freq_dict[char] += 1 heap = [] for value, frequency in freq_dict.items(): node = Node(frequency=frequency, char=value, left=None, right=None) heapq.heappush(heap, (frequency, node)) while len(heap) != 1: freql, nodel = heapq.heappop(heap) freqr, noder = heapq.heappop(heap) merged_node = Node(freql + freqr, None, nodel, noder) heapq.heappush(heap, (freql + freqr, merged_node)) return heap[0][1] tree_root = huffman_tree("this is an example of a huffman tree") print(tree_root) ```
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值