LZW编码解码 C++

Neo Wordsworth

已于 2022-04-26 11:26:47 修改

阅读量1.5k

点赞数

CC 4.0 BY-SA版权

文章标签： lzw c++ arduino esp

于 2022-04-24 14:42:06 首次发布

本文链接：https://blog.youkuaiyun.com/neo_Wordsworth/article/details/124382241

作者改进了用于单片机的LZW压缩算法，通过动态调整16位bit数据的使用，减少存储浪费。展示了编码和解码过程，并分享了关键代码段。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

20220406修改

#1 !=号错写为==

为了给单片机的一些常量数据压缩下，找到了简单的lzw压缩算法

网上找了一个源码，发现没达到最优的压缩，又改了下，

主要是bit数据的存放统一用16位，太浪费，变为动态调整

测试代码如下

    uint8_t org[]={
        //'q','q','q','q','q','q','q','w','c'
        //'O','n','e','d','a','y'
		//'X','y','m','y','m','o','t'
		'A','B','R','A','C','A','D','A','B','R','A','B','R','A','B','R','A'
        ,0x00
        };
    //为方便测试，这里只测试了字母，用0x00结尾。
    //其实可以是任意16进制数，数据结尾自定义
    uint8_t* c=org;
	int orgSize=0;
	while(*c){
		orgSize++;
		c++;
	}
	printf("orgSize:%d\r\n",orgSize);

    //把org[]编码压缩到endatas[]中
    uint8_t endatas[300];
    Biter bitPuter(endatas);

    LzwCode lzwCode(200);
    lzwCode.Encode(org,orgSize,&bitPuter);
    
    Serial.printf("encode data:\r\n");
    for(int i=0;i<bitPuter.Size();i++){
		Serial.printf("%02x ",endatas[i]);
	}
    Serial.printf("\r\n");
    
    Biter bitGeter(endatas,bitPuter.Size());
    //把endatas[]解码到dedatas[]中
    uint8_t dedatas[300];
    int size=lzwCode.Decode(&bitGeter,dedatas);
    Serial.printf("code data:%d\r\n",size);
    for(int i=0;i<size;i++){
		Serial.printf("%c",dedatas[i]);
	}
    Serial.printf("\r\n");

源文件如下：

编解码代码 lzw.h lzw.cpp

#ifndef _MY_LZW_H
#define _MY_LZW_H

#include <Arduino.h>
#include "Biter.h"

class DictItem {
  public:
    //0x00-0xff为【基本元素】，不超过8位
    uint8_t suffix;

    int parent;
    int firstchild;
    int nextsibling;
  private:
};

class LzwCode {
  public:
    LzwCode(uint16_t dsize0);
    ~LzwCode();

    void Init(uint16_t dsize0);
    void Dispose();

    void Encode(uint8_t* fp,int size, Biter *bf);
    int Decode(Biter *bf,uint8_t *fp);

    void PrintDictionary();

  private:
    DictItem* dictionary=0;
    uint16_t dictSize;
    //下一个字典的编码
    uint16_t next_code;

    uint8_t bitw;//读取、写入的bit位数
    uint32_t bitm;//掩码

    void InitDictionary();
    int DecodeString(int start, int code,uint8_t* list);
    int InDictionary(int character, int string_code);
    void AddToDictionary(int character, int string_code,uint8_t decode=0);
};

#endif

#include "lzw.h"

LzwCode::LzwCode(uint16_t dsize0){
    Init(dsize0);
}

LzwCode::~LzwCode(){
    Dispose();
}

/**
 * @brief 初始化LZW
 * 
 * @param {dsize0} 基本表外的字典大小
 **/
void LzwCode::Init(uint16_t dsize0){
    Dispose();

	dictSize=dsize0+256;
    dictionary=new DictItem[dictSize];
}

void LzwCode::Dispose(){
    if(dictionary){
        delete[] dictionary;
        dictionary=0;
    }
}

/**
 * @brief 初始化 基础压缩表
 **/
void LzwCode::InitDictionary(){
    //以0x00-0xff为【基本元素】，所以有256个元素
	for(int i=0; i<256; i++){
		dictionary[i].suffix = i;
		dictionary[i].parent = -1;
		dictionary[i].firstchild = -1;
		dictionary[i].nextsibling = i+1;
	}
	dictionary[255].nextsibling = -1;

	next_code = 256;
}

/**
 * @brief 打印基本表之外的扩展压缩表
 **/
void LzwCode::PrintDictionary(){
	int n;
	int count;
	//存放扩展元素（每个是基本元素），
	//单个扩展元素包含的基本元素不超过50个
	uint8_t list[50];

	Serial.printf("dict:\r\n");
	for(n=256; n<next_code; n++){
		Serial.printf("%4d->", n);
		count = DecodeString(0, n, list);
		while(count-- > 0)
			Serial.printf("%c", (char)(list[count]));
		Serial.printf( "\r\n");
	}
}

/**
 * @brief 获取扩展元素的所有 基本元素
 * 
 * @param {start} list起始存放位置
 * @param {code} 基节点
 **/
int LzwCode::DecodeString(int start, int code,uint8_t* list){
	int index = start;

	while(code>=0){
		list[index] = dictionary[code].suffix;
		code = dictionary[code].parent;
		index++;
	}
	return index;
}

/**
 * @brief 判断{string_code}+{character}是否在字典
 * 		判断{编码对应的元素}+{当前元素}是否在字典
 * 
 * @param {character} 当前字符
 * @param {string_code} 上一次的字典编码
 * 
 * @return 未找到：-1  找到：字典编码
 **/
int LzwCode::InDictionary(int character, int string_code){
	int sibling;//下一个
	if(string_code<0)
        return character;

	sibling = dictionary[string_code].firstchild;
	while(sibling>-1){
		if(character == dictionary[sibling].suffix)
            return sibling;
		sibling = dictionary[sibling].nextsibling;
	}
	return -1;
}

/**
 * @brief {string_code}+{character}加入字典
 * 		{编码对应的元素}+{当前元素}加入字典
 * 
 * @param {character} 当前字符
 * @param {string_code} 上一次的字典编码
 * @param {decode} 是否处于解码操作
 **/
void LzwCode::AddToDictionary(int character, int string_code,uint8_t decode){
	int firstsibling, nextsibling;

	if(decode){
		if((next_code&bitm)!=0){
			bitm<<=1;
			bitw++;
			Serial.printf("bitw %d\r\n", bitw);
		}
	}

	if(0>string_code)
        return;

	if(next_code>=dictSize){
		Serial.printf("Dictionary overflow!!!!\r\n");
		return;
	}

	dictionary[next_code].suffix = character;
	dictionary[next_code].parent = string_code;
	dictionary[next_code].nextsibling = -1;
	dictionary[next_code].firstchild = -1;
	firstsibling = dictionary[string_code].firstchild;
	if(-1<firstsibling){	// the parent has child
		nextsibling = firstsibling;
		while( -1<dictionary[nextsibling].nextsibling ) 
			nextsibling = dictionary[nextsibling].nextsibling;
		dictionary[nextsibling].nextsibling = next_code;
	}else{// no child before, modify it to be the first
		dictionary[string_code].firstchild = next_code;
	}

	if(!decode){
		if((next_code&bitm)!=0){
			bitm<<=1;
			bitw++;
			Serial.printf("bitw %d\r\n", bitw);
		}
	}

	next_code++;
}

/**
 * @brief 编码、压缩
 * 
 * @param {datas} 要压缩的数据
 * @param {dsize} 要压缩数据的大小
 * @param {bp} 压缩后的数据
 **/
void LzwCode::Encode(uint8_t* datas,int dsize, Biter *bp){
	int character;
	int string_code;
	int index;
	int n=0;

	bitw=8;
	bitm=1<<8;

    bp->Put(dsize,3*8);
	
	InitDictionary();

	string_code = -1;
	while(n<dsize){
		character=datas[n++];
		
		//Serial.printf("%d\r\n", character);
		index = InDictionary(character, string_code);
		if(0<=index){
            // 在字典中
			string_code = index;
		}else{
            // 不在字典中
            bp->Put(string_code,bitw);

            AddToDictionary(character, string_code);
			
			string_code = character;
		}
	}
    bp->Put(string_code,bitw);
	bp->Flush();
}

/**
 * @brief 解码、解压缩
 * 
 * @param {bf} 压缩的数据
 * @param {fp} 解压的数据
 **/
int LzwCode::Decode(Biter *bf,uint8_t *fp){
	int character;
	int new_code, last_code;
	int phrase_length;

	//存放扩展元素（每个是基本元素），
	//单个扩展元素包含的基本元素不超过50个
	uint8_t list[50];
	
	int n=0;
	bitw=8;
	bitm=1<<8;

	int size=bf->Get(3*8);
	int orgsize=size;
	Serial.printf("size:%d\r\n", size);
	
	InitDictionary();
	last_code = -1;
	while (size>0) {
		new_code = (int)bf->Get(bitw);
		if (new_code >= next_code) { // this is the case CSCSC( not in dict)
			list[0] = character;
			phrase_length = DecodeString(1, last_code,list);
		} else {
			phrase_length = DecodeString(0, new_code,list);
		}
		character = list[phrase_length - 1];
		while (0 < phrase_length) {
			phrase_length--;
			fp[n++]=list[phrase_length];
			size--;
		}

        AddToDictionary(character, last_code,1);
            
		last_code = new_code;
	}

	return orgsize;
}

比特操作代码 Biter.h Biter.cpp

#ifndef _MY_BITER_H
#define _MY_BITER_H

#include <Arduino.h>

class Biter {
  public:
    Biter(uint8_t* datas0,uint32_t datasize0=0);
    void Init(uint8_t* datas0,uint32_t datasize0=0);

    uint32_t Get(int bitWidth);
    void Put(uint32_t data, int bitWidth);
    void Flush();
    int Size();

  private:
    uint8_t* datas;
	  uint32_t datasize;

    uint32_t index;//datas的存放位置
    uint8_t mask;//当前bit存放的位置

    uint8_t cache;//写入的缓存，满8位存入datas、或一次读取8位
    
    uint8_t GetBit();
    void PutBit(uint8_t bit);
};

#endif

#include "Biter.h"

Biter::Biter(uint8_t *datas0, uint32_t datasize0)
{
	Init(datas0, datasize0);
}

void Biter::Init(uint8_t *datas0, uint32_t datasize0)
{
	this->datas = datas0;
	this->datasize = datasize0;
	this->index = 0;
	this->mask = 0x80;
	this->cache = 0;
}

/*******************Get*************************/

/**
 * @brief 获取1bit数据
 *
 * @return 0或1
 **/
uint8_t Biter::GetBit()
{
	if (this->mask == 0x80)
	{
		if (this->index == this->datasize)
		{
			return 0;
		}
		this->cache = this->datas[this->index++];
	}

	uint8_t value = this->mask & this->cache;

	if (this->mask == 0x01)
		this->mask = 0x80;
	else
		this->mask >>= 1;

	return value ? 1 : 0;
}

/**
 * @brief 获取n比特数据
 *
 * @param {bitWidth} 低bitWidth位
 **/
uint32_t Biter::Get(int bitWidth)
{
	uint32_t maskv;
	uint32_t value;

	maskv = 1L << (bitWidth - 1);
	value = 0L;
	while (maskv != 0)
	{
		if (GetBit())
			value |= maskv;
		maskv >>= 1;
	}

	return value;
}

/*******************Set*************************/

/**
 * @brief 放入1bit数据
 *
 * @param {bit} 0或1
 **/
void Biter::PutBit(uint8_t bit)
{
	// 1则写入，0不变
	if (bit != 0)
		this->cache |= this->mask;

	this->mask >>= 1;
	if (this->mask == 0)
	{
		// 满8位存入
		this->datas[this->index++] = this->cache;
		this->cache = 0;
		this->mask = 0x80;
	}
}

/**
 * @brief 放入n比特数据
 *
 * @param {data} 数据
 * @param {bitWidth} 低bitWidth位
 **/
void Biter::Put(uint32_t data, int bitWidth)
{
	uint32_t maskv;

	maskv = 1L << (bitWidth - 1);
	while (maskv != 0)
	{
		PutBit((data & maskv) ? 1 : 0);

		maskv >>= 1;
	}
}

void Biter::Flush()
{
	// 如果填充没完成，把剩余的放入数据
	if (this->mask != 0x80)//#1 ==改为!=
	{
		this->datas[this->index++] = this->cache;
	}
}

/**
 * @brief 获取存入数据的字节大小
 **/
int Biter::Size()
{
	return this->index;
}