常见的无损压缩有:香农编码,行程码压缩 编码,.霍夫曼编码,LZW编码,算术编码等。 其核心思想无非是降低数据冗余,把重复的 较长的编码用较短的编码来替换,实现整体 尺寸的下降,下面以zlib中的压缩核心代码
为例加以说明:
int ZEXPORT deflate (strm, flush)
z_streamp strm;
int flush;
{
int old_flush; /* value of flush param
for previous deflate call */
deflate_state *s;
if (strm == Z_NULL || strm->state ==
Z_NULL ||
flush > Z_FINISH || flush < 0) {
return Z_STREAM_ERROR;
}
s = strm->state;
if (strm->next_out == Z_NULL ||
(strm->next_in == Z_NULL &&
strm->avail_in != 0) ||
(s->status == FINISH_STATE &&
flush != Z_FINISH)) {
ERR_RETURN(strm, Z_STREAM_ERROR);
}
if (strm->avail_out == 0)
ERR_RETURN(strm, Z_BUF_ERROR);
s->strm = strm; /* just in case */
old_flush = s->last_flush;
s->last_flush = flush;
/* Write the header */
if (s->status == INIT_STATE) {
#ifdef GZIP
if (s->wrap == 2) {
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, s->level == 9 ? 2 :
(s->strategy >=
Z_HUFFMAN_ONLY || s->level < 2 ?
4 : 0));
put_byte(s, 255);
s->status = BUSY_STATE;
strm->adler = crc32(0L, Z_NULL,
0);
}
else
#endif
{
uInt header = (Z_DEFLATED +
((s->w_bits-8)<<4)) << 8;
uInt level_flags;
if (s->strategy >=
Z_HUFFMAN_ONLY || s->level < 2)
level_flags = 0;
else if (s->level < 6)
level_flags = 1;
else if (s->level == 6)
level_flags = 2;
else
level_flags = 3;
header |= (level_flags << 6);
if (s->strstart != 0) header |=
PRESET_DICT;
header += 31 - (header % 31);
s->status = BUSY_STATE;
putShortMSB(s, header);
/* Save the adler32 of the
preset dictionary: */
if (s->strstart != 0) {
putShortMSB(s,
(uInt)(strm->adler >> 16));
putShortMSB(s,
(uInt)(strm->adler & 0xffff));
}
strm->adler = adler32(0L,
Z_NULL, 0);
}
}
/* Flush as much pending output as
possible */
if (s->pending != 0) {
flush_pending(strm);
if (strm->avail_out == 0) {
/* Since avail_out is 0, deflate
will be called again with
* more output space, but
possibly with both pending and
* avail_in equal to zero. There
won't be anything to do,
* but this is not an error
situation so make sure we
* return OK instead of
BUF_ERROR at next call of deflate:
*/
s->last_flush = -1;
return Z_OK;
}
/* Make sure there is something to do and
avoid duplicate consecutive
* flushes. For repeated and useless
calls with Z_FINISH, we keep
* returning Z_STREAM_END instead of
Z_BUF_ERROR.
*/
} else if (strm->avail_in == 0 && flush
<= old_flush &&
flush != Z_FINISH) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* User must not provide more input after
the first FINISH: */
if (s->status == FINISH_STATE &&
strm->avail_in != 0) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* Start a new block or continue the
current one.
*/
if (strm->avail_in != 0 ||
s->lookahead != 0 ||
(flush != Z_NO_FLUSH && s->status !=
FINISH_STATE)) {
block_state bstate;
bstate =
(*(configuration_table[s->level].func))(s,
flush);
if (bstate == finish_started ||
bstate == finish_done) {
s->status = FINISH_STATE;
}
if (bstate == need_more || bstate ==
finish_started) {
if (strm->avail_out == 0) {
s->last_flush = -1; /*
avoid BUF_ERROR next call, see above */
}
return Z_OK;
/* If flush != Z_NO_FLUSH &&
avail_out == 0, the next call
* of deflate should use the
same flush parameter to make sure
* that the flush is complete.
So we don't have to output an
* empty block here, this will
be done at next call. This also
* ensures that for a very small
output buffer, we emit at most
* one empty block.
*/
}
if (bstate == block_done) {
if (flush == Z_PARTIAL_FLUSH) {
_tr_align(s);
} else { /* FULL_FLUSH or
SYNC_FLUSH */
_tr_stored_block(s,
(char*)0, 0L, 0);
/* For a full flush, this
empty block will be recognized
* as a special marker by
inflate_sync().
*/
if (flush == Z_FULL_FLUSH)
{
CLEAR_HASH(s);
/* forget history */
}
}
flush_pending(strm);
if (strm->avail_out == 0) {
s->last_flush = -1; /* avoid
BUF_ERROR at next call, see above */
return Z_OK;
}
}
}
Assert(strm->avail_out > 0, "bug2");
if (flush != Z_FINISH) return Z_OK;
if (s->wrap <= 0) return Z_STREAM_END;
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
put_byte(s, (Byte)(strm->adler &
0xff));
put_byte(s, (Byte)((strm->adler >>
8) & 0xff));
put_byte(s, (Byte)((strm->adler >>
16) & 0xff));
put_byte(s, (Byte)((strm->adler >>
24) & 0xff));
put_byte(s, (Byte)(strm->total_in
& 0xff));
put_byte(s, (Byte)((strm->total_in
>> 8) & 0xff));
put_byte(s, (Byte)((strm->total_in
>> 16) & 0xff));
put_byte(s, (Byte)((strm->total_in
>> 24) & 0xff));
}
else
#endif
{
putShortMSB(s, (uInt)(strm->adler
>> 16));
putShortMSB(s, (uInt)(strm->adler
& 0xffff));
}
flush_pending(strm);
/* If avail_out is zero, the application
will call deflate again
* to flush the rest.
*/
if (s->wrap > 0) s->wrap = -s->wrap; /*
write the trailer only once! */
return s->pending != 0 ? Z_OK :
Z_STREAM_END;
}
其核心算法中采用的就是哈夫曼编码。
…
为例加以说明:
int ZEXPORT deflate (strm, flush)
z_streamp strm;
int flush;
{
int old_flush; /* value of flush param
for previous deflate call */
deflate_state *s;
if (strm == Z_NULL || strm->state ==
Z_NULL ||
flush > Z_FINISH || flush < 0) {
return Z_STREAM_ERROR;
}
s = strm->state;
if (strm->next_out == Z_NULL ||
(strm->next_in == Z_NULL &&
strm->avail_in != 0) ||
(s->status == FINISH_STATE &&
flush != Z_FINISH)) {
ERR_RETURN(strm, Z_STREAM_ERROR);
}
if (strm->avail_out == 0)
ERR_RETURN(strm, Z_BUF_ERROR);
s->strm = strm; /* just in case */
old_flush = s->last_flush;
s->last_flush = flush;
/* Write the header */
if (s->status == INIT_STATE) {
#ifdef GZIP
if (s->wrap == 2) {
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, s->level == 9 ? 2 :
(s->strategy >=
Z_HUFFMAN_ONLY || s->level < 2 ?
4 : 0));
put_byte(s, 255);
s->status = BUSY_STATE;
strm->adler = crc32(0L, Z_NULL,
0);
}
else
#endif
{
uInt header = (Z_DEFLATED +
((s->w_bits-8)<<4)) << 8;
uInt level_flags;
if (s->strategy >=
Z_HUFFMAN_ONLY || s->level < 2)
level_flags = 0;
else if (s->level < 6)
level_flags = 1;
else if (s->level == 6)
level_flags = 2;
else
level_flags = 3;
header |= (level_flags << 6);
if (s->strstart != 0) header |=
PRESET_DICT;
header += 31 - (header % 31);
s->status = BUSY_STATE;
putShortMSB(s, header);
/* Save the adler32 of the
preset dictionary: */
if (s->strstart != 0) {
putShortMSB(s,
(uInt)(strm->adler >> 16));
putShortMSB(s,
(uInt)(strm->adler & 0xffff));
}
strm->adler = adler32(0L,
Z_NULL, 0);
}
}
/* Flush as much pending output as
possible */
if (s->pending != 0) {
flush_pending(strm);
if (strm->avail_out == 0) {
/* Since avail_out is 0, deflate
will be called again with
* more output space, but
possibly with both pending and
* avail_in equal to zero. There
won't be anything to do,
* but this is not an error
situation so make sure we
* return OK instead of
BUF_ERROR at next call of deflate:
*/
s->last_flush = -1;
return Z_OK;
}
/* Make sure there is something to do and
avoid duplicate consecutive
* flushes. For repeated and useless
calls with Z_FINISH, we keep
* returning Z_STREAM_END instead of
Z_BUF_ERROR.
*/
} else if (strm->avail_in == 0 && flush
<= old_flush &&
flush != Z_FINISH) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* User must not provide more input after
the first FINISH: */
if (s->status == FINISH_STATE &&
strm->avail_in != 0) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* Start a new block or continue the
current one.
*/
if (strm->avail_in != 0 ||
s->lookahead != 0 ||
(flush != Z_NO_FLUSH && s->status !=
FINISH_STATE)) {
block_state bstate;
bstate =
(*(configuration_table[s->level].func))(s,
flush);
if (bstate == finish_started ||
bstate == finish_done) {
s->status = FINISH_STATE;
}
if (bstate == need_more || bstate ==
finish_started) {
if (strm->avail_out == 0) {
s->last_flush = -1; /*
avoid BUF_ERROR next call, see above */
}
return Z_OK;
/* If flush != Z_NO_FLUSH &&
avail_out == 0, the next call
* of deflate should use the
same flush parameter to make sure
* that the flush is complete.
So we don't have to output an
* empty block here, this will
be done at next call. This also
* ensures that for a very small
output buffer, we emit at most
* one empty block.
*/
}
if (bstate == block_done) {
if (flush == Z_PARTIAL_FLUSH) {
_tr_align(s);
} else { /* FULL_FLUSH or
SYNC_FLUSH */
_tr_stored_block(s,
(char*)0, 0L, 0);
/* For a full flush, this
empty block will be recognized
* as a special marker by
inflate_sync().
*/
if (flush == Z_FULL_FLUSH)
{
CLEAR_HASH(s);
/* forget history */
}
}
flush_pending(strm);
if (strm->avail_out == 0) {
s->last_flush = -1; /* avoid
BUF_ERROR at next call, see above */
return Z_OK;
}
}
}
Assert(strm->avail_out > 0, "bug2");
if (flush != Z_FINISH) return Z_OK;
if (s->wrap <= 0) return Z_STREAM_END;
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
put_byte(s, (Byte)(strm->adler &
0xff));
put_byte(s, (Byte)((strm->adler >>
8) & 0xff));
put_byte(s, (Byte)((strm->adler >>
16) & 0xff));
put_byte(s, (Byte)((strm->adler >>
24) & 0xff));
put_byte(s, (Byte)(strm->total_in
& 0xff));
put_byte(s, (Byte)((strm->total_in
>> 8) & 0xff));
put_byte(s, (Byte)((strm->total_in
>> 16) & 0xff));
put_byte(s, (Byte)((strm->total_in
>> 24) & 0xff));
}
else
#endif
{
putShortMSB(s, (uInt)(strm->adler
>> 16));
putShortMSB(s, (uInt)(strm->adler
& 0xffff));
}
flush_pending(strm);
/* If avail_out is zero, the application
will call deflate again
* to flush the rest.
*/
if (s->wrap > 0) s->wrap = -s->wrap; /*
write the trailer only once! */
return s->pending != 0 ? Z_OK :
Z_STREAM_END;
}
其核心算法中采用的就是哈夫曼编码。
…