完全二叉树也可以用数组表示的。
/*Compressor.java -- 压缩一个数组内的数组,使用huffman方法
*/
class BinaryHeap<T extends Comparable<T>>
{
/*堆的二叉树数组表示:
根节点i=0,节点i的左儿子是2*i+1,右儿子是2*i+2,父亲节点是(i-1)/2。
节点的值都比儿子值小,称为最小堆。
*/
T[] arr;
int cap;
int last; /*[0,last)为当前的堆*/
public BinaryHeap(T[] _arr)
{
cap = _arr.length;
arr = _arr;//(T[])new Object[cap];
last = 0;
}
void dump()
{
int i;
for(i = 0; i < last; ++i){
System.out.print(" " + arr[i]);
}
System.out.println();
}
public void push(T val)
{/*从叶到某个内部目标节点,下移一位*/
int j, i = last;
//todo: check capacity
while(i > 0){
j = (i-1)/2;
if(val.compareTo(arr[j]) >= 0) break;
arr[i] = arr[j];
i = j;
}
arr[i] = val;
++last;
}
public T pop()
{/*从根到某个节点,上移一位*/
T tmp, ret;
int j, i = 0;
if(last < 1){return null;}
ret = arr[0]; arr[0] = arr[last-1];
arr[last-1] = ret; /*R1 加上这句是为了原地逆序*/
j = 2*i+1;
while(j < last-1){
/*除开最后一个节点,如果有右儿子比左儿子小*/
if( j+1 < last-1 && arr[j].compareTo(arr[j+1]) > 0)
j = j+1;
if(arr[i].compareTo(arr[j]) <= 0){break;}
tmp = arr[i]; arr[i] = arr[j]; arr[j] = tmp; /*若优化此处,则必须保留R1*/
i = j;
j = 2*i+1;
}
--last;
return ret;
}
}
public class Compressor
{
static class Node implements Comparable<Node>
{
int val;
int cnt;
Node left, right;
public Node(int _val, int _cnt, Node _l, Node _r){
val = _val; cnt = _cnt; left = _l; right = _r;
}
public int compareTo(Node b){
return (int)this.cnt - b.cnt;
}
public String toString(){
return ""+this.val+":"+this.cnt;
}
}
static void dumpCode(Node root, String code){
if(null == root.left && null == root.right){
System.out.println(root.val + ":" + code);
return;
}
if(null != root.left){dumpCode(root.left, code+"0");}
if(null != root.right){dumpCode(root.right, code+"1"); }
}
public static void huffmanEncoding(byte[] data)
{
int[] cnt = new int[256];
int i, diff = 0;
Node x, y, z = null;
Node[] arr = new Node[256];
BinaryHeap<Node> bh = null;
for(i = 0; i< data.length; ++i){cnt[data[i]]++;}
for(i = 0; i < cnt.length; ++i){
if(0 == cnt[i])continue;
arr[diff] = new Node(i, cnt[i], null, null);
++diff;
}
bh = new BinaryHeap<Node>(arr);
for(i = 0; i < diff; ++i){
bh.push(arr[i]);
}
bh.dump();
for(i = 0; i < diff-1; ++i){
x = bh.pop(); y = bh.pop();
//System.out.println("x,y= "+x.val+":"+x.cnt + ", "+y.val +":" + y.cnt);
z = new Node(255, x.cnt+y.cnt, x, y);
bh.push(z);
//bh.dump();
}
dumpCode(z,"");
}
public static void main(String[] arg)
{
byte[] data = {48,49,50,50,50, 51,51, 52,52,52};
huffmanEncoding(data);
}
public static void main_(String[] arg)
{
Integer[] arr = {5,10,50,11,20,52,55,25,22};
int x, i, len = arr.length;
BinaryHeap<Integer> bh = new BinaryHeap<Integer>(arr);
for(i = 0; i < len; ++i){
bh.push(arr[i]);
bh.dump();
}
while(len-- > 0){
x = bh.pop();
System.out.print(x+", "); //bh.dump();
}
System.out.println();
for(i = 0; i < bh.cap; ++i){System.out.print(bh.arr[i]+", ");}
}
}
/*
$ javac -encoding UTF-8 Compressor.java && java Compressor
48:1 49:1 50:3 51:2 52:3
51:00
48:010
49:011
50:10
52:11
*/
/*
上面的编码过程有个缺点,就是要读两遍原始数据。
第一遍是为了统计频率建表。对于实时流,在实际编码例子中mpeg12/h264,这个表是规范根据普遍情形统计规定死的。运用huffman编码其实是最后一步,在之前针对变换后的系数块,还做了其他压缩工作:mpeg[1]有(run,level),run 是非零值level前零的个数;h264有CAVLC(TotalCoeff, TrailingOnes)[2],在毕厚杰的书里面,把8x8 Bytes压缩到了25bits。
编解码时虽说我们只查表就行了,实际上不这么简单,因为不止一张表要查,并且bit位宽可达16。对于[2], 采用三层分级表,我也没完全理解。
[1]http://www.bretl.com/mpeghtml/huffRL.HTM
[2]ffmpeg/h264_cavlc.c: decode_residual()/get_vlc2()
*/