理解与实现赫夫曼编码-优快云博客

本文链接：https://blog.youkuaiyun.com/touteng55/article/details/115260738

赫夫曼编码是基于频率的变长编码方法，常用于数据压缩。它通过构建赫夫曼树来生成无前缀编码，确保高频字符编码短，低频字符编码长。在Java中，实现赫夫曼编码包括统计字符频率、构建赫夫曼树及生成编码。该过程涉及将字符串转为字节、构建赫夫曼树节点、遍历树生成编码等步骤。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

赫夫曼编码

赫夫曼编码也翻译为哈夫曼编码(Huffman Coding)，又称霍夫曼编码，是一种编码方式, 属于一种程序算法。
赫夫曼编码是赫哈夫曼树在电讯通信中的经典的应用之一。
赫夫曼编码广泛地用于数据文件压缩。其压缩率通常在 20%～90%之间。
赫夫曼码是可变字长编码(VLC)的一种。Huffman 于 1952 年提出一种编码方法，称之为最佳编码。

定长编码：把直接字符翻译成对应的Ascli编码。然后再把Ascli编码直接翻译成2进制。
在这里插入图片描述
变长编码：比如统计各字符出现的次数，根据次数进行编码，出现次数越多的，编码越小。
前缀编码：指每个字符的编码都不能是其他字符的前缀，这就保证了编码的可读性，不会发生一对多的现象。

赫夫曼编码的思路：
赫夫曼编码是一种前缀编码。它的思路是，首先统计要传递的各字符的数量，然后根据数量创建赫夫曼树，然后根据赫夫曼树，给每一个字符编码，比如指定向左是0向右是1，根据每个字符的路径为它编码。这样，由于赫夫曼树的特点，出现次数多的字符排在上面，这就保证了它的编码较小，而每个字符都会排在叶子节点，这就保证了编码不会有重复前缀。

代码

将一句英文句子转为赫夫曼编码主要分为几步：

将字符串转化为bytes数组。
统计各字符的出现次数。使用map，循环bytes数组，数字作为key，出现的次数为value。
将得到的map转化为list，这样便于创建赫夫曼树。需要创建好树节点，然后遍历map，把数据放到树节点里，再把树节点放到list里。
创建赫夫曼树。根据赫夫曼树的规则创建树，新建的根节点值为null，最后返回树的根节点。
根据树的路径，得到编码的map。左0右1，根据StringBuilder拼接路径，寻找叶子节点。
根据编码表map，先获得编码字符串，再将字符串按照8个1位转为bytes数组。

package hufffmanTree;

import java.util.*;

/**
 * 赫夫曼编码压缩
 */
public class HuffmanCodeDemo {
    static Map<Byte,String> codeMap=new HashMap<>();
    static StringBuilder stringBuilder=new StringBuilder();
    public static void main(String[] args) {
        //原字符串
        String str="i like like like java do you like a java";
        //转化成byte用来传输
        byte[] bytes=str.getBytes();
        System.out.println(Arrays.toString(bytes));
        //得到赫夫曼编码结果
        byte[] code = huffmanCode(bytes);
        System.out.println(Arrays.toString(code));
    }
    //封装方法
    public static byte[] huffmanCode(byte[] bytes){
        //获得了各字符的出现次数
        Map<Byte, Integer> map = getMap(bytes);
        //包含树节点的list
        List<TreeNode> nodes = getList(map);
        //得到赫夫曼树
        TreeNode root=getTree(nodes);
        //得到赫夫曼编码的map
        getCode(root);
        //根据编码表map生成编码字符串
        byte[] code = getBytes(bytes, codeMap);
        return code;
    }
    //根据编码表map和原字节数组生成编码字符串,再把字符串改成byte数组
    public static byte[] getBytes(byte[] bytes,Map<Byte,String> codeMap){
        String code="";
        for(byte b:bytes){
            code+=codeMap.get(b);
        }
        //8位1字节
        int len=(code.length()+7)/8;
        byte[] codeBytes=new byte[len];
        int index=0;
        for (int i = 0; i <code.length() ; i+=8) {
            String str="";
            if(i+8<code.length()) {
                str = code.substring(i, i + 8);
            }else {
                str=code.substring(i);
            }
            codeBytes[index]=(byte)Integer.parseInt(str,2);
            index++;
        }
        return codeBytes;
    }
    //根据赫夫曼树得到赫夫曼编码map
    public static void getCode(TreeNode root){
        if(root==null){
            System.out.println("空树");
            return;
        }else {
            getCode(root,"",stringBuilder);
        }

    }

    /**
     * 根据赫夫曼树得到赫夫曼编码map
     * @param node 当前节点
     * @param path 左0右1
     * @param stringBuilder 用于拼接路径
     */
    public static void getCode(TreeNode node,String path,StringBuilder stringBuilder){
        StringBuilder stringBuilder2=new StringBuilder(stringBuilder);
        stringBuilder2.append(path);
        if(node!=null){
            //非叶子节点
            if(node.value==null){
                getCode(node.left,"0",stringBuilder2);
                getCode(node.right,"1",stringBuilder2);
            }else {
                //是叶子节点
                codeMap.put(node.value,stringBuilder2.toString());
            }
        }

    }
    //根据list创建赫夫曼树
    public static TreeNode getTree(List<TreeNode> nodes){
        Collections.sort(nodes);
        while (nodes.size()>1){
            TreeNode leftNode = nodes.get(0);
            TreeNode rightNode = nodes.get(1);
            TreeNode root = new TreeNode(null,leftNode.count + rightNode.count);
            root.left=leftNode;
            root.right=rightNode;
            nodes.remove(leftNode);
            nodes.remove(rightNode);
            nodes.add(root);
            Collections.sort(nodes);
        }
        return nodes.get(0);
    }
    //根据map创建树节点,返回一个list，里面是所以的树节点
    public static List<TreeNode> getList(Map<Byte,Integer> map){
        List<TreeNode> nodes=new ArrayList<>();
        for(Map.Entry<Byte,Integer> entry:map.entrySet()){
            nodes.add(new TreeNode(entry.getKey(),entry.getValue()));
        }
        return nodes;
    }
    //把原字符串转化来的byte数组用map统计出现次数
    public static Map<Byte,Integer> getMap(byte[] bytes){
        Map<Byte,Integer> map=new HashMap<>();
        for(byte b:bytes){
            if(map.containsKey(b)){
                map.put(b,map.get(b)+1);
            }else {
                map.put(b,1);
            }
        }
        return map;
    }
}
//树节点
class TreeNode implements Comparable<TreeNode>{
    Byte value;
    Integer count;
    TreeNode left;
    TreeNode right;

    public TreeNode(Byte value, Integer count) {
        this.value = value;
        this.count = count;
    }

    @Override
    public String toString() {
        return "TreeNode{" +
                "value=" + value +
                ", count=" + count +
                '}';
    }

    @Override
    public int compareTo(TreeNode o) {
        //从小到大排序
        return this.count-o.count;
    }
}