基于哈夫曼树的数据压缩算法
发布时间: 2017年10月30日 19:30 时间限制: 1000ms 内存限制: 128M
输入一串字符串,根据给定的字符串中字符出现的频率建立相应哈夫曼树,构造哈夫曼编码表,在此基础上可以对待压缩文件进行压缩(即编码),同时可以对压缩后的二进制编码文件进行解压(即译码)。
多组数据,每组数据一行,为一个字符串(只考虑26个小写字母即可)。当输入字符串为“0”时,输入结束。
每组数据输出2n+3行(n为输入串中字符类别的个数)。第一行为统计出来的字符出现频率(只输出存在的字符,格式为:字符:频度),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第二行至第2n行为哈夫曼树的存储结构的终态(形如教材139页表5.2(b),一行当中的数据用空格分隔)。第2n+1行为每个字符的哈夫曼编码(只输出存在的字符,格式为:字符:编码),每两组字符之间用一个空格分隔,字符按照ASCII码从小到大的顺序排列。第2n+2行为编码后的字符串,第2n+3行为解码后的字符串(与输入的字符串相同)。
复制
aaaaaaabbbbbccdddd aabccc 0
a:7 b:5 c:2 d:4 1 7 7 0 0 2 5 6 0 0 3 2 5 0 0 4 4 5 0 0 5 6 6 3 4 6 11 7 2 5 7 18 0 1 6 a:0 b:10 c:110 d:111 00000001010101010110110111111111111 aaaaaaabbbbbccdddd a:2 b:1 c:3 1 2 4 0 0 2 1 4 0 0 3 3 5 0 0 4 3 5 2 1 5 6 0 3 4 a:11 b:10 c:0 111110000 aabccc
代码如下:
#include<iostream>
#include<cstring>
#include<string>
using namespace std;
struct Num
{
char ch;
int num;
}an[7000], temp;
void sort1(Num *bn)
{
Num te;
int i, j;
for (j = 0; j<26; j++)
for (i = 0; i < 25; i++)
{
if (bn[i].num < bn[i + 1].num)
{
te = bn[i];
bn[i] = bn[i + 1];
bn[i + 1] = te;
}
}
}
void sort2(Num *bn, int n)
{
Num te;
int i, j;
for (j = 0; j<26; j++)
for (i = 0; i < 25; i++)
{
if (bn[i].ch > bn[i + 1].ch&&bn[i + 1].num > 0)
{
te = bn[i];
bn[i] = bn[i + 1];
bn[i + 1] = te;
}
}
}
typedef struct
{
int weight;
int parent, lchild, rchild;
int vis;
char ch;
char strc[60];
int len;
int num;
}HTNode, *HuffmanTree;
void Select(HuffmanTree &HT, int k, int &s1, int &s2)
{
int i;
HTNode h1, h2;
int t1, t2;
t1 = t2 = 0;
h1 = HT[1];
h1.weight = 999999;
for (i = 1; i <= k; i++)
{
if (HT[i].weight <= h1.weight &&HT[i].vis == 0)
{
h1 = HT[i];
t1 = i;
}
}
h2 = HT[2];
h2.weight = 999999;
for (i = 1; i <= k; i++)
{
if (HT[i].weight <= h2.weight&&t1 != i&&HT[i].vis == 0)
{
h2 = HT[i];
t2 = i;
}
}
if (h1.weight == h2.weight&&HT[t1].num > HT[t2].num)
{
int temp;
temp = t1;
t1 = t2;
t2 = temp;
}
s1 = t1;
s2 = t2;
HT[s1].vis = 1;
HT[s2].vis = 1;
return;
}
void code(HuffmanTree &HT, int len, char str[], int n, char cc)
{
int i;
for (i = 0; i < len - 1; i++)
HT[n].strc[i] = str[i];
HT[n].strc[i] = cc;
HT[n].strc[i + 1] = '\0';
if (HT[n].lchild != 0)
code(HT, len + 1, HT[n].strc, HT[n].lchild, '0');
if (HT[n].rchild != 0)
code(HT, len + 1, HT[n].strc, HT[n].rchild, '1');
}
void CreateHuffmanTree(HuffmanTree &HT, int n, char *str, int n1)
{
int s1, s2;
s1 = s2 = 0;
if (n <= 1)
return;
int m = 2 * n - 1;
HT = new HTNode[m + 1];
for (int i = 1; i <= m; i++)
{
HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; HT[i].vis = 0; HT[i].len = 0; HT[i].num = i; HT[i].weight = 0;
}
for (int i = 1; i <= n; i++)
{
HT[i].weight = an[i - 1].num;
HT[i].ch = an[i - 1].ch;
}
for (int i = n + 1; i <= m; i++)
{
Select(HT, i - 1, s1, s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
int len = HT[m].len;
HT[m].strc[len] = '1';
if (HT[m].lchild)
code(HT, 1, "", HT[m].lchild, '0');
if (HT[m].rchild)
code(HT, 1, "", HT[m].rchild, '1');
return;
}
int main()
{
HuffmanTree h;
int n, n1;
char str[700], str2[700];
while (cin >> str)
{
if (str[0] == '0'&&str[1] == '\0')
break;
n1 = 0;
for (int i = 0; i < 27; i++)
{
an[i].num = 0;
an[i].ch = 'a' + i;
}
n = strlen(str);
for (int i = 0; i < n; i++)
{
an[str[i] - 'a'].num++;
str2[i] = str[i];
str2[i + 1] = '\0';
}
sort1(an);
for (int i = 0; i < 26; i++)
{
if (an[i].num != 0)
n1++;
}
sort2(an, n1);
CreateHuffmanTree(h, n1, str, n1);
if (n1 == 1)
{
h = new HTNode[3];
h[1].weight = n; h[1].parent = h[1].lchild = h[1].rchild = 0;
h[1].ch = str[0]; h[1].strc[0] = '0'; h[1].strc[1] = '\0';
}
int f = 0;
for (int i = 0; i < 26; i++)
if (an[i].num > 0)
{
f++;
cout << an[i].ch << ":" << an[i].num ;
if (f == n1)
cout << endl;
else
cout << " ";
}
for (int i = 1; i <= 2 * n1 - 1; i++)
{
cout << i << " " << h[i].weight << " " << h[i].parent << " " << h[i].lchild << " " << h[i].rchild << endl;
}
for (int i = 1; i < n1; i++)
cout << h[i].ch << ":" << h[i].strc << " ";
cout << h[n1].ch << ":" << h[n1].strc;
cout << endl;
for (int i = 0; i < n; i++)
{
for (int j = 1; j <= n1; j++)
if (h[j].ch == str[i])
cout << h[j].strc;
}
cout << endl;
cout << str << endl;
for (int i = 1; i <= 2 * n1 - 1; i++)
{
h[i].parent = 0; h[i].lchild = 0; h[i].rchild = 0; h[i].vis = 0; h[i].len = 0; h[i].num = i;
}
}
return 0;
}