In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
题目要求判断学生给出的编码是否为最优编码,但可以不是哈夫曼编码。
这要求学生提交的编码满足:
1.带权路径长度最小(跟哈夫曼编码一样小);
2.无歧义编码——是前缀码:数据仅存在于叶子结点中;
3.没有度为1的结点
因为满足1,2必然有3,所以我们只要证明学生提交的编码满足1,2条件即可。
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
typedef struct TreeNode *HuffmanTree;
typedef struct TreeNode{
char ch; //要编码的字符
int Weight; //权值
HuffmanTree Left;
HuffmanTree Right;
}HuffmanNode;
#define MinData -1 //随着堆元素的具体值而改变
typedef struct HeapStruct *MinHeap;
struct HeapStruct{
HuffmanTree *data; //存储堆元素的数组 存储时从下标1开始
int Size; //堆的当前元素的个数
int Capacity; //堆的最大容量
};
#define MaxN 64
int N,w[MaxN];
char ch[MaxN];
int code_length; //最优编码的带权路径长度
void PreOrderTraversal(HuffmanTree BST);
HuffmanTree CreateTree();
MinHeap CreateMinHeap(int MaxSize);
bool Insert(MinHeap H,HuffmanTree item);
HuffmanTree DeleteMin(MinHeap H);
HuffmanTree Huffman(MinHeap H);
int WPL(HuffmanTree BST,int depth);
int Judge();
int main()
{
int i,M;
MinHeap h;
HuffmanTree T,BT = NULL;
scanf("%d",&N);
h = CreateMinHeap(2*N); //创建最小堆 //N个叶子节点最终形成的哈夫曼树最多有2N-1个树结点
for(i=0; i<N; i++){/*最小堆元素赋值*/
T = CreateTree();
getchar();//吸收换行符及空格
scanf("%c %d",&ch[i],&w[i]);
T->ch = ch[i];
T->Weight = w[i];
Insert(h,T);
}
BT = Huffman(h); //构造哈夫曼树
//PreOrderTraversal(BT);
code_length = WPL(BT,0);
scanf("%d",&M);
while(M--){
if(Judge()) printf("Yes\n");
else printf("No\n");
}
return 0;
}
/*****先序遍历*****/
void PreOrderTraversal(HuffmanTree BST)
{
if( BST ){
printf("%d ",BST->Weight); //先访问根节点
PreOrderTraversal(BST->Left); //再访问左子树
PreOrderTraversal(BST->Right); //最后访问右子树
}
}
HuffmanTree CreateTree()
{
HuffmanTree BST = (HuffmanTree)malloc(sizeof(HuffmanNode));
BST->ch = '\0'; //空字符
BST->Weight = 0;
BST->Left = BST->Right = NULL;
return BST;
}
/*哈夫曼树构造算法*/
HuffmanTree Huffman(MinHeap H)
{
int i,num;
HuffmanTree T = NULL;
/*此处必须将H->Size的值交给num,因为后面做DeleteMin()和 Insert()函数会改变H->Size的值*/
num = H->Size;
for(i=1; i<num; i++){ //做 H->Size-1次合并
T = CreateTree(); //建立一个新的根结点
T->Left = DeleteMin(H); //从最小堆中删除一个节点,作为新T的左子结点
T->Right = DeleteMin(H); //从最小堆中删除一个节点,作为新T的右子结点
T->Weight = T->Left->Weight+T->Right->Weight; //计算新权值
//printf("%3d 0x%x 0x%x\n",T->Weight,T->Left,T->Right);
Insert(H,T); //将新T插入到最小堆
}
T = DeleteMin(H);
return T;
}
/*带权路径长度计算算法*/
int WPL(HuffmanTree BST,int depth) //depth为目前编码到哈夫曼树的深度(层次)
{
if( BST ){
if( !BST->Left && !BST->Right){
return depth*BST->Weight;
}else{
return WPL(BST->Left,depth+1)+WPL(BST->Right,depth+1);
}
}else{
return 0;
}
}
int Judge()
{
int i,j,weight;
int flag = 1; //判断是否为前缀码的标志,flag=表示是前缀码
char s1[MaxN],s2[MaxN];
HuffmanTree T = CreateTree();
HuffmanTree pt = NULL;
for(i=0; i<N; i++){
scanf("%s%s",s1,s2);
/*最坏情况下,N个叶子结点构成的Huffman树编出的码字长度为N-1*/
if(strlen(s2) > N-1) return 0;
for(j=0; s1[0] != ch[j]; j++);
weight = w[j];
pt = T;//每次建树前先将指针移动到根节点上
for(j=0; s2[j] != '\0'; j++){
if(s2[j] == '0'){
if(!pt->Left) pt->Left = CreateTree();
pt = pt->Left;
}
if(s2[j] == '1'){
if(!pt->Right) pt->Right = CreateTree();
pt = pt->Right;
}
if(pt->Weight) flag = 0; // 判断叶结点之前的结点权值是否为0,若不为0,则为非前缀码
if(s2[j+1] == '\0'){ //倒数第一个码值 说明此时应该到达叶结点位置
if(pt->Left || pt->Right) flag = 0; //非前缀码
pt->Weight = weight;
}
}
}
if( !flag ) return 0;
if(code_length == WPL(T,0)) return 1; //判断带权路径长度是否相等
else return 0;
}
MinHeap CreateMinHeap(int MaxSize)
{ /*创建容量为MaxSize的最小堆*/
MinHeap H = (MinHeap)malloc(sizeof(struct HeapStruct));
H->data = (HuffmanTree *)malloc((MaxSize+1) * sizeof(HuffmanTree));
H->Size = 0;
H->Capacity = MaxSize;
HuffmanTree T = CreateTree();
T->Weight = MinData; /*定义哨兵-为小于堆中所有可能元素权值的值,便于以后更快操作*/
H->data[0] = T;
return H;
}
bool IsFull(MinHeap H)
{
return (H->Size == H->Capacity);
}
bool IsEmpty(MinHeap H)
{
return (H->Size == 0);
}
/*插入算法-将新增结点插入到从其父结点到根结点的有序序列中*/
bool Insert(MinHeap H,HuffmanTree item)
{/*将元素item插入到最小堆H中,其中H->data[0]已被定义为哨兵*/
int i;
if( IsFull(H) ){
printf("最小堆已满\n");
return false;
}
i = ++H->Size; //i指向插入后堆中的最后一个元素的位置
for(; H->data[i/2]->Weight > item->Weight; i/=2) //无哨兵,则增加判决条件 i>1
H->data[i] = H->data[i/2]; //向下过滤结点
H->data[i] = item; //将item插入
return true;
}
HuffmanTree DeleteMin(MinHeap H)
{/*从最小堆H中取出权值为最小的元素,并删除一个结点*/
int parent,child;
HuffmanTree MinItem,temp = NULL;
if( IsEmpty(H) ){
printf("最小堆为空\n");
return NULL;
}
MinItem = H->data[1]; //取出根结点-最小的元素-记录下来
/*用最小堆中的最后一个元素从根结点开始向上过滤下层结点*/
temp = H->data[H->Size--]; //最小堆中最后一个元素,暂时将其视为放在了根结点
for(parent=1; parent*2<=H->Size; parent=child){
child = parent*2;
if((child != H->Size) && (H->data[child]->Weight > H->data[child+1]->Weight)){/*有右儿子,并且左儿子权值大于右儿子*/
child++; //child指向左右儿子中较小者
}
if(temp->Weight > H->data[child]->Weight){
H->data[parent] = H->data[child]; //向上过滤结点-temp存放位置下移到child位置
}else{
break; //找到了合适的位置
}
}
H->data[parent] = temp; //temp存放到此处
return MinItem;
}

本文探讨哈夫曼编码的原理,展示如何构造哈夫曼树并计算最优编码长度。通过实例,讲解如何判断学生提交的编码是否满足哈夫曼编码的特性,即带权路径长度最小和无歧义编码。
715

被折叠的 条评论
为什么被折叠?



