1)步骤一:统计文本词频并获得权值
2)步骤二:通过获得的词频权值构建哈夫曼树
3)步骤三:通过哈夫曼树获得哈夫曼编码
4)步骤四:通过哈夫曼编码构建一个哈夫曼表(哈希表HashMap)key = 字符对应的ASUII码,value = 对应的哈夫曼编码
5)步骤五:通过哈夫曼编码获得哈夫曼编码字符串(就是将所有的哈夫曼编码字符串拼接)
6)步骤六:我们按照,每八个01串切割一次并转换成十进制数存入byte[ ]数组中,的规则去获得我们的压缩后的byte数组,即压缩完毕。这里我们需要注意如果最后剩下的01串小于8个那么我们就全部接收并转换
package 算法and数据结构.数据结构.哈夫曼树;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*;
import java.util.List;
public class Node implements Comparable<Node>{
Node left ;
Node right;
String coding;
Integer weigh;
Byte date;
public Node(){
left = right = null;
coding = null ;
}
public Node(Integer val){
this.weigh =val;
}
public Node(Integer val,Byte date){
this.weigh =val;
this.date =date;
}
public Node(Node left , Node right){
this.left = left;
this.right = right;
}
public void pre(){
System.out.println("coding--> "+coding+" weight--> "+weigh+" isLeaf--> "+(this.left==null&&this.right==null));
if(this.left != null){
this.left.pre();
}
if(this.right != null){
this.right.pre();
}
}
@Override
public int compareTo(Node o) {
return this.weigh - o.weigh;
}
}
class HuffmanCoding{
HashMap<Byte,Integer> map = new HashMap<>();
public static void main(String[] args) {
String path = "/Users/lemt/Desktop/CP_Text/CP.text";
FileInputStream inputStream = null;
byte[] bytes = null;
try {
inputStream = new FileInputStream(path);
bytes = new byte[inputStream.available()];
int i = 0;
while (inputStream.available() != 0){
bytes[i++] = (byte) inputStream.read();
}
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
String s = "i like like like java do dou like java ?";
bytes = s.getBytes();
HuffmanCoding h = new HuffmanCoding();
Node root = h.creatHuffmanCoding(bytes);
System.out.println("以下为构建出的哈夫曼树:");
HashMap<Byte,String> codetable = new HashMap<>();
h.Coding(root,"",codetable);
root.pre();
Set<Map.Entry<Byte,String>> ste = codetable.entrySet();
System.out.println("哈夫曼编码表如下 :");
System.out.println(ste.toString());
byte[] D = h.zip(bytes,codetable);
System.out.println("压缩得到的编码如下 : ");
System.out.println(Arrays.toString(D));
}
public byte[] zip(byte[] bytes,HashMap<Byte,String> codetable){
StringBuilder stringBuilder = new StringBuilder();
for(byte date : bytes){
stringBuilder.append(codetable.get(date));
}
System.out.println("哈夫曼编码字符串 :");
System.out.println(stringBuilder.toString());
int byte_length = stringBuilder.length()%8 == 0 ? stringBuilder.length()/8 : stringBuilder.length()/8+1;
byte[] date = new byte[byte_length];
int index = 0 ;
for (int i = 0; i < stringBuilder.length() ; index++) {
if((stringBuilder.length() - i) < 8){
date[index] = (byte) Integer.parseInt(stringBuilder.substring(i,stringBuilder.length()),2);
i = stringBuilder.length();
}else {
date[index] = (byte) Integer.parseInt(stringBuilder.substring(i,i+8),2);
i += 8 ;
}
}
return date;
}
public void Coding(Node root,String coding,HashMap<Byte,String> codetable){
if(root == null)return;
if(root.left == null && root.right == null){
root.coding = coding;
codetable.put(root.date,root.coding);
}
Coding(root.left,coding+"0",codetable);
Coding(root.right, coding + "1",codetable);
}
public Set<Map.Entry<Byte,Integer>> getValue(byte[] s){
for (byte b : s) {
if (!map.containsKey(b)) {
map.put(b, 1);
} else {
map.put(b, (map.get(b) + 1));
}
}
return map.entrySet();
}
public List<Node> getNode(Set<Map.Entry<Byte, Integer>> k){
List<Node> list = new ArrayList<>();
for (Map.Entry<Byte,Integer> e : k){
list.add(new Node(e.getValue(),e.getKey()));
}
return list;
}
public Node creatHuffmanCoding(byte[] s){
Set<Map.Entry<Byte, Integer>> coding_number = getValue(s);
List<Node> HNode = getNode(coding_number);
while (HNode.size() > 1){
Collections.sort(HNode);
Node left_node = HNode.remove(0);
Node right_node = HNode.remove(0);
Integer k1 = left_node.weigh;
Integer k2 = right_node.weigh;
Node node = new Node((k1+k2));
node.left = left_node;
node.right = right_node;
HNode.add(node);
}
return HNode.get(0);
}
}