1、赫夫曼编码
2、使用赫夫曼编码生成编码数据
3、将编码数据解压回原文



压缩思路分析
解压思路分析
1、把压缩后得到的byte转换为二进制字符串
2、把字符串按照指定的编码进行解码(把赫夫曼编码表进行一次调换,因为要反向查询)
3、创建一个集合存放byte
4、通过newString方法把原字符串对应的byte数组转回字符串
package Tree.HuffManTree.HuffManCode;
import java.util.*;
import static java.lang.Integer.*;
public class demo02 {
public static void main(String[] args) {
String str = "i like like like java do you like a java";
byte[] strbytes = str.getBytes();
byte[] huffmanCodeTypes = huffmanZip(strbytes);
System.out.println(Arrays.toString(huffmanZip(strbytes)));
// System.out.println(strbytes.length);//40
// List<Node> nodes = getNodes(strbytes);
System.out.println(nodes);//byte数组转化为节点数组
// Node root = createTree(nodes);
// System.out.println(root);//非叶子节点没有data值
preOrder(root);
// getCodes(root,"",stringBuilder);//根节点传的值为空
System.out.println(huffmanCodes);
System.out.println(getCodes(root));
// byte[] zip = zip(strbytes, huffmanCodes);
// System.out.println(Arrays.toString(zip));//17
byte[] sourceBytes = decode(huffmanCodes, huffmanCodeTypes);
System.out.println(new String(sourceBytes));
}
//前序遍历
public static void preOrder(Node root) {
if (root != null) {
root.preOrder();
} else {
System.out.println("空树,不能遍历");
}
}
//重载
private static Map<Byte, String> getCodes(Node root) {
if (root == null) {
return null;
}
//处理root的左子树
getCodes(root.left, "0", stringBuilder);
getCodes(root.right, "1", stringBuilder);
return huffmanCodes;
}
//生成赫夫曼树对应的赫夫曼编码
//思路:将这个编码表存放在map中<Byte,String>
//生成赫夫曼编码表时,需要拼接路径,定义一个StringBuilder
static Map<Byte, String> huffmanCodes = new HashMap<>();
static StringBuilder stringBuilder = new StringBuilder();
/**
* 功能: 将传入的node节点的所有叶子节点的赫夫曼编码得到,并放到huffmanCodes集合中
*
* @param node
* @param code 路径:左0右1
* @param stringBuilder 用于拼接路径
*/
private static void getCodes(Node node, String code, StringBuilder stringBuilder) {
StringBuilder stringBuilder2 = new StringBuilder(stringBuilder);
stringBuilder2.append(code);
if (node != null) {
if (node.data == null) {//说明是一个非叶子节点
//递归处理
getCodes(node.left, "0", stringBuilder2);
getCodes(node.right, "1", stringBuilder2);
} else {//找到了叶子节点
huffmanCodes.put(node.data, stringBuilder2.toString());
}
}
}
//将字符串对应的byte数组,通过生成的赫夫曼编码表,转换为经赫夫曼编码压缩后的byte[]
private static byte[] zip(byte[] bytes, Map<Byte, String> huffmanCodes) {
StringBuilder stringBuilder = new StringBuilder();
for (Byte b : bytes) {
stringBuilder.append(huffmanCodes.get(b));
}
int len = (stringBuilder.length() + 7) / 8;//这个就是压缩后byte数组里面元素的个数
byte[] huffmanCodeBytes = new byte[len];
int index = 0;
for (int i = 0; i < stringBuilder.length(); i += 8) {
String strByte;
if (i + 8 > stringBuilder.length()) {
strByte = stringBuilder.substring(i);
} else {
strByte = stringBuilder.substring(i, i + 8);//substring取值范围左闭右开
}
huffmanCodeBytes[index] = (byte) parseInt(strByte, 2);
index++;
}
return huffmanCodeBytes;
}
public static List<Node> getNodes(byte[] bytes) {
ArrayList<Node> nodes = new ArrayList<>();
//存储每一个byte出现的次数->map[key,value]
Map<Byte, Integer> counts = new HashMap<>();
for (byte b : bytes) {
Integer count = counts.get(b);
if (count == null) {
counts.put(b, 1);
} else {
counts.put(b, count + 1);
}
}
//把map中的键值对转换为node对象并存储在nodes中
for (Map.Entry<Byte, Integer> entry : counts.entrySet()) {
Node node = new Node(entry.getKey(), entry.getValue());
nodes.add(node);
}
return nodes;
}
public static Node createTree(List<Node> nodes) {
while (nodes.size() > 1) {
Collections.sort(nodes);
Node leftNode = nodes.get(0);
Node rightNode = nodes.get(1);
//注:parent不属于叶子节点,是要创建的,而不是从nodes中获取
Node parent = new Node(null, leftNode.weight + rightNode.weight);
//记得给父节点的左右节点关联
parent.left = leftNode;
parent.right = rightNode;
nodes.remove(leftNode);
nodes.remove(rightNode);
nodes.add(parent);
}
return nodes.get(0);//赫夫曼树的根节点
}
//使用一个方法将前面的方法封装起来便于调用
public static byte[] huffmanZip(byte[] bytes) {//原始字符串对应的字节数组:strBytes
//1、获取data,生成Node对象
List<Node> nodes = getNodes(bytes);
//2、根据nodes创建赫夫曼树
Node huffmanTreeRoot = createTree(nodes);
//3、根据返回的根节点递归创建赫夫曼编码
Map<Byte, String> huffmanCodes = getCodes(huffmanTreeRoot);
//4、得到根据赫夫曼编码压缩的byte数组
byte[] huffmanCodeBytes = zip(bytes, huffmanCodes);
return huffmanCodeBytes;
}
//完成数据的解压
/**
* @param huffmanCodes 赫夫曼编码表
* @param huffmanCodeBytes 压缩后得到的赫夫曼字节数组
* @return 返回的时原字符串对应的byte数组
*/
private static byte[] decode(Map<Byte, String> huffmanCodes, byte[] huffmanCodeBytes) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = 0; i < huffmanCodeBytes.length; i++) {
if (i == huffmanCodeBytes.length - 1) {
stringBuilder.append(byteToBitString(false, huffmanCodeBytes[i]));
} else {
stringBuilder.append(byteToBitString(true, huffmanCodeBytes[i]));
}
}
//把字符串按照指定的编码进行解码
//把赫夫曼编码表进行一次调换,因为要反向查询
Map<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> entry : huffmanCodes.entrySet()) {
map.put(entry.getValue(), entry.getKey());
}
//创建一个集合存放byte
List<Object> list = new ArrayList<>();
for (int i = 0; i < stringBuilder.length(); i++) {
int count = 1;
boolean flag = true;
Byte b = null;
while (flag){
String key = stringBuilder.substring(i,i+count);//i不动,让count移动,直到匹配到一个字符
b = map.get(key);
if (b!=null){
flag=false;
} else {
count++;
}
}
list.add(b);
i+=count-1;//下次从截取后的位置开始找
}
//for循环结束后,list中存放了所有字符
//把list中的数据放入到byte[]中并返回
byte[] bytes = new byte[list.size()];
for (int i = 0; i < bytes.length; i++) {
bytes[i]= (byte) list.get(i);
}
return bytes;
}
//把一个byte转换成为二进制的字符串
private static String byteToBitString(boolean flag, byte b) {//这个flag主要用来判断最后一位byte对应的二进制字符串不足8位的情况
//使用变量保存b
int temp = b;//将b转换为int
//如果是正数还存在补高位的步骤
if (flag) {
temp |= 256;//按位或 1 0000 0000 | 0000 0001 => 1 0000 0001
}
String str = Integer.toBinaryString(temp);
if (flag) {
return str.substring(str.length() - 8);//会根据int类型补码,所以很长,我们截取最后8位
} else {
return str;//最后的不需要补码,也就不需要截取
}
}
}
class Node implements Comparable<Node> {
Byte data;//存放数据本身,如:a=>97
int weight;//权值,表示字符出现的次数
Node left;
Node right;
public Node(Byte data, int weight) {
this.weight = weight;
this.data = data;
}
@Override
public String toString() {
return "Node{" +
"data=" + data +
", weight=" + weight +
'}';
}
@Override
public int compareTo(Node o) {
return this.weight - o.weight;
}
public void preOrder() {
System.out.println(this);
if (this.left != null) {
this.left.preOrder();
}
if (this.right != null) {
this.right.preOrder();
}
}
}