小白进阶之贪心算法-赫夫曼编码

本文介绍了一种有效的数据压缩方法——赫夫曼编码，并提供了详细的实现代码。通过使用字符出现频率构建二叉树，文章展示了如何为不同字符生成唯一的编码。此外，还分享了在实现过程中遇到的问题及解决技巧。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

赫夫曼编码是根据字符的使用频率对字符进行编码的一种编码方法，其用于压缩数据的效果非常好。一般用二叉树表示赫夫曼编码，使用频率越低的字符，其深度越大。其中q表示一个按频率从小到大排序的优先队列。

在赫夫曼编码的表示中，通常用前缀码的表达方法。前缀码即没有任何码字是其他码字的前缀，其作用是简化解码过程。用二叉树表示前缀码时，0表示转向左孩子，1表示转向右孩子。文件的最优编码方案总是对应一棵满二叉树。

下面是源码：

huffman.h

#include<stdio.h>
#include<stdlib.h>
struct node{
	struct node *left;
	struct node *right;
	int freq;
	char e;
};//二叉树的一个节点的结构体
struct queue{
	struct node *con;
	struct queue *next;
};//队列的结构体
struct node * extractMin(struct queue *q);//返回队列q中最小的元素并删除它
void insert(struct queue *q,struct node *z);//将节点z插入到队列q中
void huffman(struct queue *q,int n);//对队列q进行赫夫曼编码，n为字符的个数
void print(struct node *q);//打印赫夫曼编码的二叉树

huffman.cpp

#include"huffman.h"
struct node * extractMin(struct queue *q){
	struct node *p;
	p=(struct node *)malloc(sizeof(struct node));
	p=q->con;
	if(q->next==NULL){
		q->con=NULL;
		q->next=NULL;
	}
	else{
		q->con=q->next->con;
		q->next=q->next->next;
	}
	return p;
}
void insert(struct queue *q,struct node *z){
	if(q->con==NULL){
		q->con=z;
		q->next=NULL;
	}
	else{
		struct queue *m,*n;
		m=(struct queue *)malloc(sizeof(struct queue));
		n=(struct queue *)malloc(sizeof(struct queue));
		m=q;
		struct queue *p;
		p=(struct queue *)malloc(sizeof(struct queue));
		if(m->con->freq>z->freq){
			p->con=z;
			p->next=m;
			q=p;
		}
		else{
			while(m!=NULL&&m->con->freq<=z->freq){
				n=m;
				m=m->next;
			}
			if(m==NULL){
				p->con=z;
				p->next=NULL;
				n->next=p;
			}
			else{
				p->con=z;
				p->next=m;
				n->next=p;
			}
		}
	}
}
void print(struct node *q){
	if(q->left==NULL&&q->right==NULL){
		printf("%c->%d ",q->e,q->freq);
	}
	else{
		print(q->left);
		printf("%c->%d ",q->e,q->freq);
		print(q->right);
	}
}
void huffman(struct queue *q,int n){
	int i;
	for(i=1;i<n;i++){
		struct node *z,*x,*y;
		z=(struct node *)malloc(sizeof(struct node));
		x=(struct node *)malloc(sizeof(struct node));
		y=(struct node *)malloc(sizeof(struct node));
		x=extractMin(q);
		z->left=x;
		y=extractMin(q);
		z->right=y;
		z->freq=x->freq+y->freq;
		z->e=' ';
		insert(q,z);
	}
	struct node *res;
	res=(struct node *)malloc(sizeof(struct node));
	res=q->con;
	print(res);
}

main.cpp

#include"huffman.h"
int main(){
	struct node *no[6];
	struct queue *head,*p;
	int i;
	for(i=0;i<6;i++){
		no[i]=(struct node *)malloc(sizeof(struct node));
	}
	no[0]->left=NULL;
	no[0]->right=NULL;
	no[0]->freq=5;
	no[0]->e='f';
	no[1]->left=NULL;
	no[1]->right=NULL;
	no[1]->freq=9;
	no[1]->e='e';
	no[2]->left=NULL;
	no[2]->right=NULL;
	no[2]->freq=12;
	no[2]->e='c';
	no[3]->left=NULL;
	no[3]->right=NULL;
	no[3]->freq=13;
	no[3]->e='b';
	no[4]->left=NULL;
	no[4]->right=NULL;
	no[4]->freq=16;
	no[4]->e='d';
	no[5]->left=NULL;
	no[5]->right=NULL;
	no[5]->freq=45;
	no[5]->e='a';
	head=(struct queue *)malloc(sizeof(struct queue));
	p=(struct queue *)malloc(sizeof(struct queue));
	head->con=no[0];
	head->next=NULL;
	p=head;
	for(i=1;i<6;i++){
		char a=p->con->e;
		struct queue *res;
		res=(struct queue *)malloc(sizeof(struct queue));
		res->con=no[i];
		res->next=NULL;
		p->next=res;
		p=p->next;
	}
	huffman(head,6);
	system("pause");
	return 0;
}

收获有三个：

第一个是各种结构体，结构体数组，结构体指针调来调去，很容易混乱。而且结构体指针一定要初始化，使用之前一定要分配内存空间；

第二个是and语句的执行顺序，有时候会导致程序出错。比如