应工作需要,研究了下压缩算法思想,分享如下:
-
Compress radio = compressedSize/origSize
-
重复现象是具有局部性的,这是压缩算法思想的基础
-
基础操作:
8bit转byte:
Integer i = Integer.parseInt(8bitStr, 2);
// byte 与 int 的相互转换
public static byte intToByte(int x) {
return (byte) x;
}
byte中取bit:
public static String getBit(byte by, int n) {
StringBuffer sb = new StringBuffer();
for (int i = n - 1; i >= 0; i–) {
sb.append((by >> i) & 0x1);
}
return sb.toString();
} -
io流
dis = new DataInputStream(new FileInputStream(f)); // 实例化数据输入流对象
while (-1 != (len = dis.read(orig, 0, fileSize))) {
encode(orig);
}
OR
byte[] buffer = new byte[(int) fileSize];int offset = 0; int numRead = 0; while (offset < buffer.length && (numRead = fi.read(buffer, offset, buffer.length - offset)) >= 0) { offset += numRead; } // 确保所有数据均被读取 if (offset != buffer.length) { fi.close(); throw new IOException("Could not completely read file " + file.getName()); }
-
LZ77算法代码:
public void encode(byte[] orig) {
int step = 0;
int origLength = orig.length;
while (!isLookHeadEmpty(origLength)) {
winMove();
int[] info = findMaxMatch(orig);
setMoveSteps(info[1]);
if (info[1] == 0) {
byte next = orig[windSpiltIndex];
nextEncode = "1" + getBit(next);
} else {
nextEncode = "01" + getBit(intToByte(info[0]), 6) + getBit(intToByte(info[1]), 3);
}
step++;
commit();
}
writeFile();
}
private boolean isLookHeadEmpty(int length) {
if (windSpiltIndex + move > length - 1) {
return true;
}
return false;
}
private void winMove() {
windSpiltIndex += move;
}
public int[] findMaxMatch(byte[] orig) {
int[] maxInfo = { 0, 0 };
int maxLen = 0;
int markStart = 0;
int tmp = -1;
System.out.println(windSpiltIndex);
for (int i = windSpiltIndex + minMatchNum; i <= minEdge(orig.length); i++) {
System.out.print("str1: ");
print(orig, getWinStartIndex(), windSpiltIndex - 1);
System.out.print("str2: ");
print(orig, windSpiltIndex, i - 1);
tmp = lastIndexOf(orig, getWinStartIndex(), windSpiltIndex - getWinStartIndex(), orig, windSpiltIndex,
i - windSpiltIndex, windSpiltIndex);
System.out.println("tmp = " + tmp);
if (tmp > -1) {
markStart = windSpiltIndex - getWinStartIndex() - tmp - 1;
maxLen = i - windSpiltIndex;
} else {
break;
}
}
maxInfo[0] = markStart;
maxInfo[1] = maxLen;
move = maxLen;
return maxInfo;
}
private int getWinStartIndex() {
if (windSpiltIndex - searchSize < 1) {
return 0;
}
return windSpiltIndex - searchSize;
}
private int minEdge(int length) {
if (windSpiltIndex + lookAheadBufferSize >= length) {
return length;
}
return windSpiltIndex + lookAheadBufferSize;
}
public static String getBit(byte by, int n) {
StringBuffer sb = new StringBuffer();
for (int i = n - 1; i >= 0; i--) {
sb.append((by >> i) & 0x1);
}
return sb.toString();
}
private void commit() {
String tmp = restBuf + nextEncode;
System.out.println("tmp length : " + tmp.length());
int i = 0;
for (i = 0; i < tmp.length() / 8; i++) {
putToArray(tmp.substring(i * 8, i * 8 + 8));
}
restBuf = tmp.substring(i * 8, tmp.length());
}
private void putToArray(String tmp) {
System.out.print("byte : " + tmp);
Integer i = Integer.parseInt(tmp, 2);
System.out.print(" " + i);
// Integer i = Integer.valueOf(tmp, 2); // tmp为二进制字符串
// byte[] b = i.toString().getBytes();
System.out.print(" " + intToByte(i) + " \n");
byteArr[cur++] = intToByte(i);
}
private void setMoveSteps(int i) {
if (i == 0) {
move = 1;
} else {
move = i;
}
}
private void writeFile() {
System.out.println();
System.out.println("pair sum :" + pair);
System.out.println("small :" + small);
System.out.println("pairlength :" + pairLength);
System.out.println("rest length: " + restBuf.length());
for (int i = 0; i < pairArr.length; i++) {
System.out.println(i + ":" + pairArr[i]);
}
int tmp = 8 - restBuf.length();
for (int i = 0; i < tmp; i++) {
restBuf += "0";
}
putToArray(restBuf);
String fileName = lz77encodeOutPath;
try {
// 将DataOutputStream与FileOutputStream连接可输出不同类型的数据
// FileOutputStream类的构造函数负责打开文件kuka.dat,如果文件不存在,
// 则创建一个新的文件,如果文件已存在则用新创建的文件代替。然后FileOutputStream
// 类的对象与一个DataOutputStream对象连接,DataOutputStream类具有写
// 各种数据类型的方法。
DataOutputStream out = new DataOutputStream(new FileOutputStream(fileName));
out.write(byteArr, 0, cur);
avgList.add(cur);
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
- 解码算法
public void decode() {
Out();
int offset = 0;
int length = 0;
while (true) {
if (getBits(1).equals("1")) {
String tmp = getBits(8);
System.out.println("out: " + tmp);
decoded[outCur++] = bit2byte(tmp);
addElement(tmp);
} else if (getBits(1).equals("1")) {
offset = Integer.valueOf(bit2byte(getBits(6)));
length = Integer.valueOf(bit2byte(getBits(3)));
if (length == 0) {
length = 8;
}
System.out.println(offset);
offset = getEdge() / 8 - offset - 1;
System.out.println("offset: " + offset);
System.out.println("length: " + length);
String tmp = winStr.substring(offset * 8, offset * 8 + length * 8);
System.out.println("out: " + tmp);
for (int i = 0; i < length; i++) {
String ttmp = winStr.substring((offset + i) * 8, (offset + i + 1) * 8);
decoded[outCur++] = bit2byte(ttmp);
}
addElement(tmp);
} else {
System.out.println("end of file");
break;
}
}
writeFile();
}