#include <iostream>
#include <fstream>
#include <cstring>
using namespace std;
#define MaxSize 1024
#define OK 1
#define ERROR 0
typedef int Status;
//记录文本出现的字母和次数
typedef struct wordcnt {
char ch;
int cnt = 0;
}Count;
//记录哈夫曼编码
typedef struct NumCount {
Count count[MaxSize];
int length = 0;
}NumCount;
//节点的权重,数据和左右子节点和父节点
typedef struct HTree {
char data;
int weight;
int parent, lchild, rchild;
}HTNode, * HuffmanTree;
typedef struct HCode {
char data;
char* str;
}*HuffmanCode;
//将文本内容读入source数组
Status ReadData(char* source) {
//打开文件读入数据
ifstream infile;
infile.open("in.txt",ios::in);
if (!infile.is_open()) {
cout << "文件打开失败" << endl;
}
else cout << "文件打开成功" << endl;
cout << "Reading..." << endl;
cout << "the input file is:" << endl;
infile.getline(source, MaxSize);
cout << source << endl;
infile.close();
cout << endl;
return OK;
}
//记录字母数
Status WordCount(char* data, NumCount* paraCnt) {
int flag;
int len = strlen(data);
for (int i = 0; i < len; i++) {
flag = 0;
for (int j = 0; j < paraCnt->length; j++) {
if (paraCnt->count[j].ch == data[i]) {
++paraCnt->count[j].cnt;
flag = 1;
break;
}
}
if (!flag) {
paraCnt->count[paraCnt->length].ch = data[i];
++paraCnt->count[paraCnt->length].cnt;
++paraCnt->length;
}
}
return OK;
}
//打印记录下的字母和次数
Status Show(NumCount* paraCnt) {
cout << "the length is " << paraCnt->length << endl;
for (int i = 0; i < paraCnt->length; i++) {
cout << "The character " << paraCnt->count[i].ch << " appears " << paraCnt->count[i].cnt << endl;
}
cout << endl;
return OK;
}
//找到当前剩下的权重最小的两个节点值
Status select(HuffmanTree HT, int top, int* s1, int* s2)
{
int min = INT_MAX;
for (int i = 1; i <= top; ++i) // 选择没有双亲的节点中,权重最小的节点
{
if (HT[i].weight < min && HT[i].parent == 0)
{
min = HT[i].weight;
*s1 = i;
}
}
min = INT_MAX;
for (int i = 1; i <= top; ++i) // 选择没有双亲的节点中,权重次小的节点
{
if (HT[i].weight < min && i != *s1 && HT[i].parent == 0)
{
min = HT[i].weight;
*s2 = i;
}
}
return OK;
}
//根据权重值创建哈夫曼树
Status CreateHuffmanTree(HuffmanTree& HT, int length, NumCount cntarray) {
if (length <= 1) return ERROR;
int s1, s2;
int m = length * 2 - 1;
HT = new HTNode[m + 1];
for (int i = 1; i <= m; i++) {
HT[i].parent = 0;
HT[i].lchild = 0;
HT[i].rchild = 0;
}
for (int i = length + 1; i <= m; i++) {
select(HT, i - 1, &s1, &s2);
HT[s1].parent = i;
HT[s2].parent = i;
HT[i].lchild = s1;
HT[i].rchild = s2;
HT[i].weight = HT[s1].weight + HT[s2].weight;
}
return OK;
}
//创建哈夫曼编码
Status CreateHuffmanCode(HuffmanTree HT, HuffmanCode& HC, int length) {
HC = new HCode[length + 1];
char* cd = new char[length];
cd[length - 1] = '\0';
int c, f, start;
for (int i = 1; i <= length; i++) {
start = length - 1;
c = i;
f = HT[c].parent;
while (f != 0) {
--start;
if (HT[f].lchild == c) {
cd[start] = '0';
}
else cd[start] = '1';
c = f;
f = HT[c].parent;
}
HC[i].str = new char[length - start];
HC[i].data = HT[i].data;
strcpy(HC[i].str, &cd[start]);
}
delete[]cd;
return OK;
}
Status Encode(char* data, HuffmanCode HC, int length) {
ofstream outfile;
outfile.open("code.txt");
for (int i = 0; i < strlen(data); i++) {
for (int j = 1; j <= length; j++) {
if (data[i] == HC[j].data) {
outfile << HC[j].str;
}
}
}
outfile.close();
cout << "the code txt has been written" << endl;
cout << endl;
return OK;
}
Status Decode(HuffmanTree HT, int length) {
char* codetxt = new char[MaxSize * length];
ifstream infile;
infile.open("code.txt");
infile.getline(codetxt, MaxSize * length);
infile.close();
ofstream outfile;
outfile.open("out.txt");
int root = 2 * length - 1;
for (int i = 0; i < strlen(codetxt); i++) {
if (codetxt[i] == '0') root = HT[root].lchild;
else if (codetxt[i] == '1') root = HT[root].lchild;
if (HT[root].lchild == 0 && HT[root].rchild == 0) {
outfile << HT[root].data;
root = 2 * length - 1;
}
}
outfile.close();
cout << "the output txt has been written" << endl;
cout << endl;
return OK;
}
int main(char argc, char** argv) {
char data[MaxSize];
NumCount Cntarray;
ReadData(data);
WordCount(data, &Cntarray);
Show(&Cntarray);
HuffmanTree tree;
CreateHuffmanTree(tree, Cntarray.length, Cntarray);
HuffmanCode code;
CreateHuffmanCode(tree, code, Cntarray.length);
Encode(data, code, Cntarray.length);
cout << "Please view the generated TXT file to check result" << endl;
return 0;
}
文本: The last leg of a journey marks the halfway point.
在创建文本文件时出现了一个小问题,即文本文件在创建时就已经有了后缀名,不需要再加一个后缀名。而我忽视了这一点,导致文件一直打不开。