//*******************************controll.h**********************
//*****************压缩命令和解压命令**********
#ifndef CONTROLL_H
#define CONTROLL_H
class Controll //:public HuffmanTree
{
public:
void compress_file(const char *,const char *,const char *);
void uncompress_file(const char *,const char *,const char *);
void freFromFile(const char *,char **,char *,int *);
int powmy(int,int);
};
#endif //controll.h end
//*******************************huffman.h**********************
//*****************叶子结点为n的哈夫曼树共有2n-1个结点**********
#ifndef HUFFMAN_H
#define HUFFMAN_H
class HuffmanNode {
public:
char info; //结点信息
double weight; //结点权值
int parent, lchild, rchild; //父亲结点,左右孩子结点
HuffmanNode() {
parent=lchild=rchild=-1;
}
HuffmanNode(const char &data, const double &wt, const int &pa=-1, const int &lch=-1, const int &rch=-1) {
info=data;
weight=wt;
parent=pa;
lchild=lch;
rchild=rch;
}
}; //class HuffmanNode end
/*现在我把它调出来为的就是在controlll类当中使用这个类*/
class Code { //HuffmanTree的私有类,编码类
public:
Code():length(10) { ptr=new char[length]; }
~Code() { delete[] ptr; }
char *ptr;
const int length;
};
class HuffmanTree {
public:
HuffmanTree(const int &s=100) {
maxSize=(s>100?s:100);
arrayTree=new HuffmanNode[maxSize];
currentSize=0;
codeArray=0;
}
~HuffmanTree() {
delete[] arrayTree;
if (codeArray!=0)
delete[] codeArray;
}
void run(const char*, const char*);
int getSumBytes();//定义待压缩文件的总bytes数
int currentSize; //当前数组大小
HuffmanNode *arrayTree; //哈夫曼结点数组
Code *codeArray; //数组大小为currentSize
int sum_bits;//定义压缩后文件的总bit数
private:
int maxSize; //数组最大值
//int sum_bytes;
void insert(const char&, const double&); //插入结点
void createHuffmanTree(); //创建哈夫曼树
void createHuffmanCode(); //创建哈夫曼编码
void writeCodeToFile(const char *);//将Huffman编码写入到词频表文件当中
int findPosition(const char &) const; //返回字符在arrayTree[]中的位置
int isEqual(const char *s) const; //判断s是否存在于编码系统中,若存在则返回s在编码系统中的位置,否则返回-1
void reverse(char arr[]);
}; //class HuffmanTree end
#endif //huffman.h end
//**************************controll.cpp**********************
#include <iostream>
#include <limits>
#include <cstring>
#include<cstdlib>
#include <cstdio>
#include "controll.h"
#include "huffman.h"
using namespace std;
int Controll::powmy(int a,int b)
{
if(b==0) return 1;
int i = 0;
int result = 1;
for(;i<b;i++)
{
result *=a;
}
return result;
}
void Controll::freFromFile(const char *codeFilename,char **HC,char value[],int res[])
{
FILE *fe = fopen(codeFilename,"rb");
if(fe == NULL)
{
printf("打开文件失败!");
return;
}
int num[10];
int m,i;
char * cd = (char *)malloc((100+1)*sizeof(char)); //临时保存的编码文件名
char c;
for(i=1;;i++)
{
c = fgetc(fe);
if(c=='#') break;
int j = 0;
while(c!=' ')
{
cd[j++] = c;
c = fgetc(fe);
}
cd[j] = '\0';
HC[i] = (char *)malloc((j+1)*sizeof(char));
strcpy(HC[i],&cd[0]);
/*以上完成了huffman编码的读出 下面将Huffman编码对应的字符写进codeArray*/
c = fgetc(fe);
value[i] = c;
fgetc(fe);
}
int k;
c = fgetc(fe); //头一个读取#,后一个才开始读取数据
k = 0;
while(c!='#')
{
num[k++] = c-'0';
c = fgetc(fe);
}
m = 0;
res[0] = 0;
for(k=k-1;k>=0;k--)
{
//printf("powmy(10,m)=%d ",powmy(10,m));
res[0]+=(num[k]*powmy(10,m));
//printf("%d sum_bit=%d m=%d ",num[k],sum_bit,m);
m = m + 1;
}
printf("sum_bits=%d ",res[0]);
c = fgetc(fe); c = fgetc(fe);//头一个读取#,后一个才开始读取数据
k = 0;
while(c!='#')
{
num[k++] = c-'0';
c = fgetc(fe);
}
m = 0; res[1] = 0;
for(k=k-1;k>=0;k--)
{
res[1]+=num[k]*powmy(10,m);
m++;
}
printf("currentSize =%d\n",res[1]);
fclose(fe);
//果然是这个问题的原因导致的。 没有关闭文件会导致出错
/* fclose(fe);
FILE *fww = fopen("C:\\out4.txt","wb");//打开词频表文件
for(i=0;i<currentSize;i++)
{
fprintf(fww,"%s %c\n",HC[i],value[i]);
}
printf("hehe");
fprintf(fww,"#%d#",sum_bit);
fprintf(fww,"#%d#",currentSize);*/
}
void Controll::compress_file(const char *sourceFilename,const char *codeFilename,const char *geneFilename)
{
HuffmanTree tree(620);
tree.run(sourceFilename,codeFilename);
FILE *fo = fopen(sourceFilename,"rb");
FILE *fw = fopen(geneFilename,"wb");
if(fo == NULL || fw == NULL)
{
printf("文件打开失败!");
return;
}
int aa = 0;
int sum_bytes = tree.getSumBytes(); //得到文件的总字节数,用于计算压缩百分比
int sum = 0; //用于计算八位的值,从而写进压缩文件当中
int i,flag = 0,j,k=0;
int temp[1000];
memset(temp,0,sizeof(temp));
printf("before compress sumbytes=%d after compress sumytes=%d\n",sum_bytes,tree.sum_bits/8);
printf("The compress efficiency is %4.2f%%\n",(double)tree.sum_bits/8*1.0/(sum_bytes*1.0)*100);
while(!feof(fo))
{
sum = 0;
char one_byte = fgetc(fo);
aa++;
for(i=0;i<tree.currentSize;i++)
{
if(one_byte == tree.arrayTree[i].info)
{
flag +=strlen(tree.codeArray[i].ptr);
int len = strlen(tree.codeArray[i].ptr);
if(flag<8)
{
for(j=0;j<len;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
}
else if(flag>=8)
{
for(j=0;k<8;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
for(;j<len;j++)
temp[k++] = tree.codeArray[i].ptr[j] - '0';
sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
+temp[5]*4+temp[6]*2+temp[7]*1;
for(j=0;j<8;j++)
temp[j] = 0;
for(j=8;j<k;j++)
temp[j-8] = temp[j];
k = flag = j-8;
char c = sum;
fputc(c,fw);
if(aa%1000==0)
{
printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
}
fflush(fw);
i = tree.currentSize+1;
}
}
}
}
aa = sum_bytes;
printf("\r%4.2f%%",(double)aa/sum_bytes*100.0);
//printf("压缩成功!");
/*考虑到最后可能没有凑够八位的情况*/
if(flag)
{
sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8
+temp[5]*4+temp[6]*2+temp[7]*1;
char c = sum;
fputc(c,fw);
fflush(fw);
}
fclose(fw);
fclose(fo);
}
void Controll::uncompress_file(const char *geneFilename,const char *codeFilename,const char *backFilename)
{
char **HC = (char**)malloc(260*sizeof(char*));//用于保存从文件当中读取的huffman编码
char value[270];
int res[2];
//果然,还是数组地址符比较好的 关于整形的值如何变化,我记得我也是实验过的,是可以的
freFromFile(codeFilename,HC,value,res);
int sum_bits = res[0];
int currentSize = res[1];
FILE *fo = fopen(geneFilename,"rb");
FILE *fw = fopen(backFilename,"wb");
if(fo==NULL || fw==NULL)
{
printf("文件打开失败!");
return;
}
char str[1000];
int i,j,k,temp = 0;
int index;
int sum_bit2 = sum_bits;
int num[10];
while(!feof(fo))
{
if(sum_bit2<0) break;
sum_bit2 -=8;
int data = fgetc(fo);
if(data == -1) break;
if(sum_bit2<0)
{
index = 0-sum_bit2;
}
else
{
index = 0;
}
memset(num,0,sizeof(num));
/*这是可以综合出一个函数的*/
i = 0;
while(data)
{
num[i++] = data%2;
data = data/2;
}
i = temp;
for(k=7;k>=index;i++,k--)
{
if(num[k])
str[i] = '1';
else
str[i] = '0';
str[i+1] ='\0';
for(j=1;j<=currentSize;j++)
{
if(strcmp(str,HC[j])==0)
{
fputc(value[j],fw);
if((sum_bits-sum_bit2)%8000==0)
{
//cout<<'\r'<<(double)(sum_bits-sum_bit2)/sum_bits*100.0<<'%%';
printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
}
fflush(fw);
j = currentSize+1;
i = -1;
}
}
}
if(i)
{
temp = i;
}
else
{
temp = 0;
}
}
sum_bit2 = 0;
printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0);
fclose(fw);
fclose(fo);
}
//**************************huffman.cpp**********************
#include <iostream>
#include <fstream> //for ofstream ifstream
#include <limits> //for numeric_limits<double>::max()
#include <cstdlib> //for exit()
#include <cstring> //for strlen(), strcpy(), strcmp()
#include <cstdio>
#include "huffman.h"
using namespace std;
void HuffmanTree::insert(const char &data, const double &wt) { //插入结点
if (2*currentSize-1 >= maxSize) //叶子结点为n的哈夫曼树共有2n-1个结点
return;
arrayTree[currentSize].info=data;
arrayTree[currentSize].weight=wt;
currentSize++;
}
/*将词频表存入相应的文件当中*/
void HuffmanTree::writeCodeToFile(const char *outFilename)
{
int i;
FILE *fw = fopen(outFilename,"wb");//打开词频表文件
for(i=0;i<currentSize;i++)
{
fprintf(fw,"%s %c\n",codeArray[i].ptr,arrayTree[i].info);
}
sum_bits = 0;
for(i=0;i<currentSize;i++)
{
sum_bits += arrayTree[i].weight*strlen(codeArray[i].ptr);
}
fprintf(fw,"#%d#",sum_bits);
fprintf(fw,"#%d#",currentSize);
fclose(fw);
}
int HuffmanTree::getSumBytes()
{
int sum_bytes = 0;
int i = 0;
for(i=0;i<currentSize;i++)
{
sum_bytes+=arrayTree[i].weight;
}
return sum_bytes;
}
void HuffmanTree::reverse(char arr[]) { //反转字符串
const int len=strlen(arr);
char *p;
p=new char[len+1];
strcpy(p, arr);
p[len]='\0';
int k=0;
for (int i=len-1; i>=0; i--)
arr[i]=p[k++];
arr[len]='\0';
delete[] p;
}
int HuffmanTree::findPosition(const char &ch) const { //返回字符ch在arrayTree[]中的位置
for (int i=0; i<currentSize; i++)
if (arrayTree[i].info == ch)
return i;
return -1;
}
int HuffmanTree::isEqual(const char *s) const { //判断s的编码是否存在,若存在返回编码在数组codeArray[]中的位置,否则返回-1
for (int i=0; i<currentSize; i++)
if (strlen(s) == strlen(codeArray[i].ptr)) //可以去掉此行
if (strcmp(s, codeArray[i].ptr) == 0)
return i;
return -1;
}
void HuffmanTree::createHuffmanTree() { //构造huffmanTree
int i=currentSize;
int k;
double wt1, wt2;
int lnode = 0, rnode = 0;
while (i < 2*currentSize-1) {
wt1=wt2=numeric_limits<double>::max();
k=0;
while (k < i) {
if (arrayTree[k].parent==-1) {
if (arrayTree[k].weight<wt1) {
wt2=wt1;
rnode=lnode;
wt1=arrayTree[k].weight;
lnode=k;
}
else if (arrayTree[k].weight<wt2) {
wt2=arrayTree[k].weight;
rnode=k;
}
}
k++;
}
arrayTree[i].weight = arrayTree[lnode].weight+arrayTree[rnode].weight;
arrayTree[i].lchild=lnode;
arrayTree[i].rchild=rnode;
arrayTree[lnode].parent=arrayTree[rnode].parent=i;
i++;
}
}
void HuffmanTree::createHuffmanCode() { //构造huffmanCode,即哈夫曼编码
codeArray=new Code[currentSize];
int i=0;
int k, n, m;
while (i < currentSize) {
k = arrayTree[i].parent;
n=0;
m=i;
while (k!=-1 && k<currentSize*2-1) {
if (arrayTree[k].lchild==m)
codeArray[i].ptr[n++]='0';
else if (arrayTree[k].rchild==m)
codeArray[i].ptr[n++]='1';
m=k;
k=arrayTree[m].parent;
}
codeArray[i].ptr[n]='\0';
reverse(codeArray[i].ptr); //反转字符串,使之变成正确的编码
i++;
}
}
void HuffmanTree::run(const char *inFilename,const char *outFilename) { //run函数的实现
FILE *fo = fopen(inFilename,"rb");//读入待压缩文件
if(fo==NULL)
{
cerr<<"\""<<inFilename<<"\" could not open."<<endl;
exit(1);
}
char ch;
int pos;
// 从文件当中读入字符,并且统计字符个数
ch = fgetc(fo);
while(!feof(fo))
{
//printf("hehe ");
pos = findPosition(ch);
if (pos != -1)
arrayTree[pos].weight++;
else
insert(ch, 1);
ch = fgetc(fo);
}
cout<<endl;
createHuffmanTree(); //构造huffman树
createHuffmanCode(); //对统计字符进行编码
writeCodeToFile(outFilename); //将编码表存入文件
fclose(fo);
}
//huffman.cpp end
//*****************************main.cpp*************************
#include "huffman.h"
#include "controll.h"
#include<cstring>
int main(int argc,char **argv) {
const char *codeFileName ="C:\\out1.txt"; //词频表文件名
Controll controller;
//执行压缩命令
if(strcmp("-c",argv[1])==0)
{
controller.compress_file(argv[2],codeFileName,argv[3]);
}
//执行解压命令
else if(strcmp("-u",argv[1])==0)
{
controller.uncompress_file(argv[2],codeFileName,argv[3]);
}
return 0;
} //main.cpp end