Huffman编码是数据压缩常见的压缩方法。即将不同概率出现的字符以不同长度的二进制位进行编码,概率出现的越高的字符使用长度越短的编码,概率出现越低的字符使用长度越长的编码。
下面是对保存字符出现概率的文件CharactorSheet.txt,每一行的左边是字符出现的概率,单位是0.01,右边是要编码的字符。文件内容如下:
45 a
13 b
12 c
16 d
9 e
5 f
本篇代码实现对a、b、c、d、e、f六个出现概率不同的字符进行编码。
实现思路是:先构建一个最小堆,取出最小堆中最小的两个元素构成一棵二叉树,该二叉树的根结点的概率为两个元素的概率和。然后再将二叉树根结点存入最小堆中。再取出二叉树中两个最小的元素,进行构建二叉树,直至堆中元素被取光。
代码如下:
/*
*huffman编码
*使用最小堆排序来获取概率最小的元素
* Author: StoryMonster
*last change date: 2016/6/28
*/
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
typedef struct HuffmanNode
{
int percent;
char charactor;
int index;
unsigned char code;
struct HuffmanNode *father;
struct HuffmanNode *left;
struct HuffmanNode *right;
struct HuffmanNode *HuffFather;
struct HuffmanNode *HuffLeft;
struct HuffmanNode *HuffRight;
} HuffmanNode;
static void ReadCharactorSheet(void);
static void InsertToMinPeal(HuffmanNode *node);
static bool InsertRecursion(HuffmanNode *r, HuffmanNode *node);
static void FixMinPeal(HuffmanNode *r);
static void GetMinNode(HuffmanNode *r, HuffmanNode *node);
static HuffmanNode* GetMaxIndexNode(HuffmanNode *r);
static void BuildHuffmanTree(void);
static void PrintHuffmanCode(HuffmanNode *r, int hight);
static void PrintThePeal(HuffmanNode *r);
HuffmanNode *root = (HuffmanNode *)malloc(sizeof(HuffmanNode));
HuffmanNode *HuffmanRoot = (HuffmanNode *)malloc(sizeof(HuffmanNode));
int NodeCount = 0;
void PrintHuffmanCode(HuffmanNode *r,int hight)
{
if(r->HuffLeft == NULL)
{
std::cout <<"Percent:"<<r->percent<<std::endl;
int kk = hight;
kk--;
while(kk>=0)
{
unsigned char temp = (0x01)<<kk;
kk--;
if(temp &(r->code)) std::cout << "1";
else std::cout << "0";
}
std::cout << " "<<r->charactor<<std::endl;
return ;
}
else
{
(r->HuffLeft)->code = (r->code)<<1;
PrintHuffmanCode(r->HuffLeft,hight+1);
}
if(r->HuffRight != NULL)
{
(r->HuffRight)->code = ((r->code)<<1)+1;
PrintHuffmanCode(r->HuffRight,hight+1);
}
}
void PrintThePeal(HuffmanNode *r)
{
if(r == NULL)
{
return ;
}
std::cout << r->percent << ":";
if(r->left != NULL) std::cout<<(r->left)->percent;
else std::cout << "NULL";
if(r->right!= NULL) std::cout<< " "<<(r->right)->percent;
else std::cout << " NULL";
std::cout << std::endl;
PrintThePeal(r->left);
PrintThePeal(r->right);
}
void PrintTheHuffmanTree(HuffmanNode *root)
{
if(root == NULL) return;
PrintTheHuffmanTree(root->HuffLeft);
std::cout << root->percent<< " ";
PrintTheHuffmanTree(root->HuffRight);
}
void BuildHuffmanTree(void)
{
while(1)
{
HuffmanNode *NewRoot = (HuffmanNode *)malloc(sizeof(HuffmanNode));
HuffmanNode *p1 = (HuffmanNode *)malloc(sizeof(HuffmanNode));
GetMinNode(root,p1);
std::cout << "p1 "<<p1->percent<<std::endl;
HuffmanNode *p2 = (HuffmanNode *)malloc(sizeof(HuffmanNode));
GetMinNode(root,p2);
std::cout << "p2 "<<p2->percent<<std::endl;
NewRoot->HuffFather = NULL;
p1->HuffFather = NewRoot;
NewRoot->HuffLeft = p1;
p2->HuffFather = NewRoot;
NewRoot->HuffRight = p2;
NewRoot->right = NULL;
NewRoot->left = NULL;
NewRoot->father= NULL;
NewRoot->percent= p1->percent + p2->percent;
NewRoot->index = NodeCount+1;
HuffmanRoot = NewRoot;
std::cout << "father:" << NewRoot->percent <<std::endl;
if(NewRoot->percent >= 100) break;
InsertToMinPeal(NewRoot);
std::cout << "---------------------------"<<std::endl;
}
}
HuffmanNode *GetMaxIndexNode(HuffmanNode *r)
{
if(r->index == NodeCount)
{
return r;
}
if(r->left == NULL) return NULL;
HuffmanNode *p = GetMaxIndexNode(r->left);
if(p!=NULL) return p;
p = GetMaxIndexNode(r->right);
if(p!=NULL) return p;
}
void GetMinNode(HuffmanNode *r,HuffmanNode *node)
{
HuffmanNode *p = GetMaxIndexNode(r);
HuffmanNode *p_fa = p->father;
if(p_fa == NULL) ;
else
{
if(p_fa->left == p) p_fa->left = NULL;
else p_fa->right = NULL;
p->father = NULL;
}
node->charactor = r->charactor;
node->index = r->index;
node->percent = r->percent;
node->code = r->code;
node->HuffLeft = r->HuffLeft;
node->HuffRight = r->HuffRight;
node->HuffFather = r->HuffFather;
r->percent = p->percent;
r->charactor = p->charactor;
r->code = p->code;
r->HuffLeft = p->HuffLeft;
r->HuffRight = p->HuffRight;
r->HuffFather = NULL;
p->left = NULL;
p->right = NULL;
NodeCount--;
free(p);
p = NULL;
FixMinPeal(r);
}
bool InsertRecursion(HuffmanNode *r, HuffmanNode *node)
{
if(node->index == 2*(r->index))
{
r->left = node;
node->father = r;
return true;
}
if(node->index == (2*(r->index)+1))
{
r->right = node;
node->father = r;
return false;
}
if(r->left == NULL||r->right == NULL) return false;
bool result = InsertRecursion(r->left,node);
if(result == false)
result = InsertRecursion(r->right,node);
return result;
}
void FixMinPeal(HuffmanNode *r)
{
HuffmanNode *r_left = r->left;
HuffmanNode *r_right = r->right;
if(r_left == NULL) return ;
else
{
if(r_left->percent < r->percent)
{
int temp = r_left->percent;
char ch = r_left->charactor;
HuffmanNode* p_HuffLeft = r_left->HuffLeft;
HuffmanNode* p_HuffRight= r_left->HuffRight;
HuffmanNode* p_HuffFather=r_left->HuffFather;
r_left->percent = r->percent;
r_left->charactor = r->charactor;
r_left->HuffLeft = r->HuffLeft;
r->HuffLeft = p_HuffLeft;
r_left->HuffRight = r->HuffRight;
r->HuffRight = p_HuffRight;
r_left->HuffFather = r->HuffFather;
r->HuffFather = p_HuffFather;
r->percent = temp;
r->charactor = ch;
if(r->father != NULL) FixMinPeal(r->father);
}
FixMinPeal(r->left);
}
if(r_right == NULL) return ;
else
{
if(r_right->percent < r->percent)
{
int temp = r_right->percent;
char ch = r_right->charactor;
HuffmanNode* p_HuffLeft = r_right->HuffLeft;
HuffmanNode* p_HuffRight= r_right->HuffRight;
HuffmanNode* p_HuffFather=r_right->HuffFather;
r_right->percent = r->percent;
r_right->charactor = r->charactor;
r_right->HuffLeft = r->HuffLeft;
r->HuffLeft = p_HuffLeft;
r_right->HuffRight = r->HuffRight;
r->HuffRight = p_HuffRight;
r_right->HuffFather = r->HuffFather;
r->HuffFather = p_HuffFather;
r->percent = temp;
r->charactor = ch;
if(r->father != NULL) FixMinPeal(r->father);
}
FixMinPeal(r->right);
}
}
void InsertToMinPeal(HuffmanNode *node)
{
NodeCount++;
if(root == NULL)
{
root = node;
return ;
}
InsertRecursion(root,node);
//FixMinPeal(node);
//PrintThePeal(root);
}
void ReadCharactorSheet()
{
FILE *fp = fopen("CharactorSheet.txt","rb");
if(!fp)
{
std::cout << "Open CharactorSheet.txt failed!" << std::endl;
fp = NULL;
fclose(fp);
}
int index = 1;
while(1)
{
char ch;
int percent;
int n = fscanf(fp,"%d %c",&percent,&ch);
if(n <= 1) break;
HuffmanNode *node = (HuffmanNode *)malloc(sizeof(HuffmanNode));
node->percent = percent;
node->charactor = ch;
node->index = index++;
node->father = NULL;
node->left = NULL;
node->right = NULL;
node->code = 0;
node->HuffFather = NULL;
node->HuffLeft = NULL;
node->HuffRight= NULL;
InsertToMinPeal(node);
}
fclose(fp);
fp = NULL;
}
int main()
{
root = NULL;
HuffmanRoot = NULL;
ReadCharactorSheet();
FixMinPeal(root);
//PrintThePeal(root);
BuildHuffmanTree();
PrintTheHuffmanTree(HuffmanRoot);
PrintHuffmanCode(HuffmanRoot,0);
}
这篇代码写得有些糟糕,但是功能是实现了的。之后找时间再重新修改一下代码。