目录
概念
哈夫曼树(Huffman Tree)是一种用于数据压缩的最优二叉树,基于哈夫曼编码算法构建。它是一种带权路径长度最短的二叉树,常用于文件压缩和编码等场景。
相关概念
带权路径长度
WPL(Weighted Path Length,带权路径长度) 是哈夫曼树的一个重要性质,指的是所有叶子节点的权值与其到根节点路径长度的乘积之和。
公式为:
- wi: 第 i个叶子节点的权值。
- li: 第 i个叶子节点到根节点的路径长度。
哈夫曼树的特点是其 WPL 是最小的,因此被称为最优二叉树。
若想要wpl最小,则让权值越大的结点离根节点更近,所以在构造哈夫曼树的时候,自底向下,让w小的先合并到一起,一次往上构建,即可满足该要求
构造步骤
- 将每个权值看作一个节点。
- 找出当前权值最小的两个节点,作为左右子节点,构造一个新的父节点。父节点的权值为左右子节点权值之和。
- 重复步骤 2,直到所有节点合并成一棵树。
性质
Ⅰ)n个结点合并成一个树需要合并n-1次——>每合并一次需要添加1个结点——>合成一个哈夫曼树需要n-1个结点
Ⅱ)构建的哈夫曼树的wpl是唯一的,但树不唯一
应用场景:
哈夫曼编码:与定长编码不同,哈夫曼编码是一种边长编码,为频率较高的字符分配较短的编码,频率较低的字符分配较长的编码,以节省空间
代码实现
这里我们省略了统计每个字符出现次数的过程,分别用字符数组和权值数组来记录每个字符的权值
char s[105];
int w[105];
为减少不必要麻烦,下标我们这里从0开始
构建结构体存储结点信息
typedef struct {
int weight;//权值
//注意,这里存储的是下标信息,所以是int类型
int left;//左孩子
int right;//右孩子
int parent;//父亲
}HuffmanNode,*HuffmanTree;
哈夫曼树的创建
由前面的性质可知,n个数据创建一个哈夫曼树需要m=2n-1个结点,所以在申请大小的时候为m+1个(下标从1开始,所以+1)
HuffmanTree Create_HuffmanTree(int w[], int n) {
int m = 2 * n - 1; // 哈夫曼树的总节点数
HuffmanTree tree = (HuffmanNode*)malloc((m + 1) * sizeof(HuffmanNode));
if (!tree) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
// 初始化节点
for (int i = 1; i <= n; i++) {
tree[i].weight = w[i];
tree[i].left = tree[i].right = tree[i].parent = 0;
}
for (int i = n + 1; i <= m; i++) {
tree[i].weight = 0;
tree[i].left = tree[i].right = tree[i].parent = 0;
}
// 构造哈夫曼树
int s1, s2;
for (int i = n + 1; i <= m; i++) {
find_Node(tree, i - 1, &s1, &s2);
tree[i].weight = tree[s1].weight + tree[s2].weight;
tree[i].left = s1;
tree[i].right = s2;
tree[s1].parent = tree[s2].parent = i;
}
return tree;
}
// 查找权值最小的两个节点
void find_Node(HuffmanTree tree, int length, int* s1, int* s2) {
*s1 = *s2 = -1;
for (int i = 1; i <= length; i++) {
if (tree[i].parent == 0) { // 未被选择的节点
if (*s1 == -1 || tree[i].weight < tree[*s1].weight) {
*s2 = *s1;
*s1 = i;
}
else if (*s2 == -1 || tree[i].weight < tree[*s2].weight) {
*s2 = i;
}
}
}
}
构建编码的时候,采用的是自底向上的方法,所以这里的start采用的是倒序的形式。而在保存编码这里,一个编码的存储采用一位字符数组,所以多个编码采用了二维字符数组
整体代码:
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <cstdlib>
#include <cstring>
using namespace std;
char s[105]; // 存储字符
int w[105]; // 存储权值
typedef struct {
int weight; // 权值
int left; // 左孩子
int right; // 右孩子
int parent; // 父亲
} HuffmanNode, * HuffmanTree;
void find_Node(HuffmanTree tree, int length, int* s1, int* s2);
HuffmanTree Create_HuffmanTree(int w[], int n);
char** Create_HuffmanCodes(HuffmanTree tree, int n);
int main() {
cout << "请输入结点个数:";
int n;
cin >> n;
// 输入字符和对应的权值
for (int i = 1; i <= n; i++) {
cin >> s[i] >> w[i];
}
// 构造哈夫曼树
HuffmanTree tree = Create_HuffmanTree(w, n);
// 输出哈夫曼树的权值
cout << "哈夫曼树的权值数组:";
for (int i = 1; i < 2 * n; i++) {
cout << tree[i].weight << " ";
}
cout << endl;
// 创建哈夫曼编码
char** codes = Create_HuffmanCodes(tree, n);
cout << "哈夫曼编码:" << endl;
for (int i = 1; i <= n; i++) {
cout << s[i] << ": " << codes[i] << endl;
}
// 释放动态分配的内存
for (int i = 1; i <= n; i++) {
free(codes[i]);
}
free(codes);
free(tree);
return 0;
}
// 查找权值最小的两个节点
void find_Node(HuffmanTree tree, int length, int* s1, int* s2) {
*s1 = *s2 = -1;
for (int i = 1; i <= length; i++) {
if (tree[i].parent == 0) { // 未被选择的节点
if (*s1 == -1 || tree[i].weight < tree[*s1].weight) {
*s2 = *s1;
*s1 = i;
}
else if (*s2 == -1 || tree[i].weight < tree[*s2].weight) {
*s2 = i;
}
}
}
}
// 构造哈夫曼树
HuffmanTree Create_HuffmanTree(int w[], int n) {
int m = 2 * n - 1; // 哈夫曼树的总节点数
HuffmanTree tree = (HuffmanNode*)malloc((m + 1) * sizeof(HuffmanNode));
if (!tree) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
// 初始化节点
for (int i = 1; i <= n; i++) {
tree[i].weight = w[i];
tree[i].left = tree[i].right = tree[i].parent = 0;
}
for (int i = n + 1; i <= m; i++) {
tree[i].weight = 0;
tree[i].left = tree[i].right = tree[i].parent = 0;
}
// 构造哈夫曼树
int s1, s2;
for (int i = n + 1; i <= m; i++) {
find_Node(tree, i - 1, &s1, &s2);
tree[i].weight = tree[s1].weight + tree[s2].weight;
tree[i].left = s1;
tree[i].right = s2;
tree[s1].parent = tree[s2].parent = i;
}
return tree;
}
// 创建哈夫曼编码
char** Create_HuffmanCodes(HuffmanTree tree, int n) {
char* t = (char*)malloc(n * sizeof(char));
char** codes = (char**)malloc((n + 1) * sizeof(char*));
if (!t || !codes) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
for (int i = 0; i <= n; i++) {
codes[i] = NULL;
}
for (int i = 1; i <= n; i++) {
int start = n - 1;
t[start] = '\0';
int current = i;
int parent = tree[i].parent;
while (parent != 0) {
start--;
if (tree[parent].left == current) {
t[start] = '1';
}
else {
t[start] = '0';
}
current = parent;
parent = tree[current].parent;
}
codes[i] = (char*)malloc((n - start) * sizeof(char));
if (!codes[i]) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
strcpy(codes[i], &t[start]);
}
free(t);
return codes;
}
运行结果:
也可以结合easyx库绘制图形
#define _CRT_SECURE_NO_DEPRECATE
#include <iostream>
#include <cstdlib>
#include <cstring>
#include <graphics.h> // 引入 EasyX 库
using namespace std;
char s[105]; // 存储字符
int w[105]; // 存储权值
typedef struct {
int weight; // 权值
int left; // 左孩子
int right; // 右孩子
int parent; // 父亲
} HuffmanNode, * HuffmanTree;
void find_Node(HuffmanTree tree, int length, int* s1, int* s2);
HuffmanTree Create_HuffmanTree(int w[], int n);
char** Create_HuffmanCodes(HuffmanTree tree, int n);
void DrawHuffmanTree(HuffmanTree tree, int root, int x, int y, int xOffset, int n);
void DrawTree(HuffmanTree tree, int n);
int main() {
cout << "请输入结点个数:";
int n;
cin >> n;
// 输入字符和对应的权值
for (int i = 1; i <= n; i++) {
cin >> s[i] >> w[i];
}
// 构造哈夫曼树
HuffmanTree tree = Create_HuffmanTree(w, n);
// 初始化 EasyX 图形窗口
initgraph(800, 600); // 创建 800x600 的绘图窗口
settextstyle(20, 0, _T("Consolas")); // 设置字体样式和大小
cleardevice(); // 清屏
// 绘制哈夫曼树
DrawTree(tree, n);
// 暂停,等待用户关闭窗口
system("pause");
closegraph();
// 输出哈夫曼树的权值
cout << "哈夫曼树的权值数组:";
for (int i = 1; i < 2 * n; i++) {
cout << tree[i].weight << " ";
}
cout << endl;
// 创建哈夫曼编码
char** codes = Create_HuffmanCodes(tree, n);
cout << "哈夫曼编码:" << endl;
for (int i = 1; i <= n; i++) {
cout << s[i] << ": " << codes[i] << endl;
}
// 释放动态分配的内存
for (int i = 1; i <= n; i++) {
free(codes[i]);
}
free(codes);
free(tree);
return 0;
}
// 查找权值最小的两个节点
void find_Node(HuffmanTree tree, int length, int* s1, int* s2) {
*s1 = *s2 = -1;
for (int i = 1; i <= length; i++) {
if (tree[i].parent == 0) { // 未被选择的节点
if (*s1 == -1 || tree[i].weight < tree[*s1].weight) {
*s2 = *s1;
*s1 = i;
}
else if (*s2 == -1 || tree[i].weight < tree[*s2].weight) {
*s2 = i;
}
}
}
}
// 构造哈夫曼树
HuffmanTree Create_HuffmanTree(int w[], int n) {
int m = 2 * n - 1; // 哈夫曼树的总节点数
HuffmanTree tree = (HuffmanNode*)malloc((m + 1) * sizeof(HuffmanNode));
if (!tree) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
// 初始化节点
for (int i = 1; i <= n; i++) {
tree[i].weight = w[i];
tree[i].left = tree[i].right = tree[i].parent = 0;
}
for (int i = n + 1; i <= m; i++) {
tree[i].weight = 0;
tree[i].left = tree[i].right = tree[i].parent = 0;
}
// 构造哈夫曼树
int s1, s2;
for (int i = n + 1; i <= m; i++) {
find_Node(tree, i - 1, &s1, &s2);
tree[i].weight = tree[s1].weight + tree[s2].weight;
tree[i].left = s1;
tree[i].right = s2;
tree[s1].parent = tree[s2].parent = i;
}
return tree;
}
// 创建哈夫曼编码
char** Create_HuffmanCodes(HuffmanTree tree, int n) {
char* t = (char*)malloc(n * sizeof(char));
char** codes = (char**)malloc((n + 1) * sizeof(char*));
if (!t || !codes) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
for (int i = 0; i <= n; i++) {
codes[i] = NULL;
}
for (int i = 1; i <= n; i++) {
int start = n - 1;
t[start] = '\0';
int current = i;
int parent = tree[i].parent;
while (parent != 0) {
start--;
if (tree[parent].left == current) {
t[start] = '1';
}
else {
t[start] = '0';
}
current = parent;
parent = tree[current].parent;
}
codes[i] = (char*)malloc((n - start) * sizeof(char));
if (!codes[i]) {
cerr << "内存分配失败" << endl;
exit(EXIT_FAILURE);
}
strcpy(codes[i], &t[start]);
}
free(t);
return codes;
}
// 绘制哈夫曼树的节点和连接线
void DrawHuffmanTree(HuffmanTree tree, int root, int x, int y, int xOffset, int n) {
if (root == 0) return;
// 绘制节点圆形
circle(x, y, 20);
// 绘制权值
wchar_t weightStr[10];
swprintf(weightStr, 10, L"%d", tree[root].weight);
outtextxy(x - 10, y - 25, weightStr);
// 显示字符(仅限叶子节点)
if (root <= n) {
wchar_t charStr[10];
swprintf(charStr, 10, L"%c", s[root]);
outtextxy(x - 10, y + 5, charStr);
}
// 左子树
if (tree[root].left != 0) {
int leftX = x - xOffset;
int leftY = y + 80;
line(x, y, leftX, leftY); // 绘制连线
DrawHuffmanTree(tree, tree[root].left, leftX, leftY, xOffset / 2, n);
}
// 右子树
if (tree[root].right != 0) {
int rightX = x + xOffset;
int rightY = y + 80;
line(x, y, rightX, rightY); // 绘制连线
DrawHuffmanTree(tree, tree[root].right, rightX, rightY, xOffset / 2, n);
}
}
// 绘制整个哈夫曼树
void DrawTree(HuffmanTree tree, int n) {
int root = 2 * n - 1; // 哈夫曼树的根节点
DrawHuffmanTree(tree, root, 400, 50, 200, n); // 居中绘制
}