#include <iostream>
#include <fstream>
#include <queue>
#include <unordered_map>
#include <string>
#include <vector>
#include <bitset>
#include <algorithm>
#include <iomanip>
#include <sstream>
#include <stdexcept>
#include <limits>
#include <locale>
#include <codecvt>
#include <windows.h>
#include <cstdio>
#if defined(_MSC_VER) && _MSC_VER >= 1900
// MSVC兼容
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
#endif
// 自定义streambuf封装FILE*(不依赖GNU扩展)
class FileBuf : public std::streambuf {
public:
FileBuf(FILE* fp, std::ios::openmode mode) : fp_(fp), mode_(mode) {
// 设置输入输出缓冲区
setg(in_buf_, in_buf_, in_buf_);
setp(out_buf_, out_buf_ + BUF_SIZE - 1);
}
~FileBuf() {
sync(); // 确保输出缓冲区数据写入文件
if (fp_) fclose(fp_);
}
protected:
static const size_t BUF_SIZE = 4096;
char in_buf_[BUF_SIZE]; // 输入缓冲区
char out_buf_[BUF_SIZE]; // 输出缓冲区
int underflow() override {
if (!fp_ || !(mode_ & std::ios::in)) return EOF;
if (gptr() >= egptr()) {
// 从文件读取数据到输入缓冲区
size_t bytes = fread(in_buf_, 1, BUF_SIZE, fp_);
if (bytes == 0) return EOF;
setg(in_buf_, in_buf_, in_buf_ + bytes);
}
return static_cast<unsigned char>(*gptr());
}
int overflow(int c) override {
if (!fp_ || !(mode_ & std::ios::out)) return EOF;
if (c == EOF) {
sync();
return 0;
}
*pptr() = static_cast<char>(c);
pbump(1);
if (pptr() >= epptr()) {
sync();
}
return c;
}
int sync() override {
if (!fp_ || !(mode_ & std::ios::out)) return 0;
if (pbase() != pptr()) {
// 将输出缓冲区数据写入文件
size_t bytes = fwrite(pbase(), 1, pptr() - pbase(), fp_);
setp(out_buf_, out_buf_ + BUF_SIZE - 1);
if (bytes != (pptr() - pbase())) return -1;
}
return fflush(fp_) == 0 ? 0 : -1;
}
private:
FILE* fp_ = nullptr;
std::ios::openmode mode_;
};
// 兼容中文/空格路径的文件打开函数(跨编译器版本)
template <typename StreamType>
StreamType openFile(const std::string& utf8Path, std::ios::openmode mode) {
StreamType stream;
#ifdef _WIN32
try {
// 将UTF-8路径转为宽字符
std::wstring_convert<std::codecvt_utf8<wchar_t>> converter;
std::wstring wpath = converter.from_bytes(utf8Path);
// 转换C++流的openmode为C的文件打开模式
std::wstring cMode;
if (mode & std::ios::binary) {
if (mode & std::ios::out) {
if (mode & std::ios::app) cMode = L"ab";
else if (mode & std::ios::trunc) cMode = L"wb";
else cMode = L"r+b";
} else {
cMode = L"rb";
}
} else {
if (mode & std::ios::out) {
if (mode & std::ios::app) cMode = L"a";
else if (mode & std::ios::trunc) cMode = L"w";
else cMode = L"r+";
} else {
cMode = L"r";
}
}
// 使用Windows API打开宽路径文件
FILE* fp = nullptr;
if (_wfopen_s(&fp, wpath.c_str(), cMode.c_str()) == 0 && fp != nullptr) {
//显式转换为std::streambuf*
stream.rdbuf(static_cast<std::streambuf*>(new FileBuf(fp,mode)));
} else {
// 打开失败,尝试用窄路径
stream.open(utf8Path, mode);
}
} catch (const std::range_error&) {
// 转换失败,直接用窄路径
stream.open(utf8Path, mode);
}
#else
stream.open(utf8Path, mode);
#endif
return stream;
}
// 将 GBK 编码的字符串转换为 UTF-8 编码的字符串
// 用于解决 VS Code 终端中文输入的编码问题
std::string gbk_to_utf8(const std::string& gbk_str) {
int len = MultiByteToWideChar(CP_ACP, 0, gbk_str.c_str(), -1, nullptr, 0);
std::wstring wstr(len, L'\0');
MultiByteToWideChar(CP_ACP, 0, gbk_str.c_str(), -1, &wstr[0], len);
len = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, nullptr, 0, nullptr, nullptr);
std::string utf8_str(len, '\0');
WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &utf8_str[0], len, nullptr, nullptr);
return utf8_str;
}
// 定义大文件分块大小(8MB)
const size_t CHUNK_SIZE = 8 * 1024 * 1024;
// Huffman树节点结构
struct HuffmanNode {
char data;
unsigned long long freq;
HuffmanNode* left, * right;
HuffmanNode(char data, unsigned long long freq) : data(data), freq(freq), left(nullptr), right(nullptr) {}
};
// 用于优先队列的比较函数,构建最小堆(频率小的节点优先级高)
struct Compare {
bool operator()(HuffmanNode* l, HuffmanNode* r) {
return (l->freq > r->freq);
}
};
// 频率统计函数,支持进度显示
void calculateFrequency(const std::string& inputFile, std::unordered_map<char, unsigned long long>& freqMap) {
std::ifstream file = openFile<std::ifstream>(inputFile, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
throw std::runtime_error("无法打开输入文件: " + inputFile);
}
uint64_t fileSize = file.tellg();
file.seekg(0, std::ios::beg);
if (fileSize == 0) {
file.close();
return;
}
if (fileSize > 1024ULL * 1024 * 1024 * 10) {
file.close();
throw std::runtime_error("文件过大,超过10GB限制");
}
char* buffer = new char[CHUNK_SIZE];
uint64_t bytesRead = 0;
uint64_t lastProgress = 0;
try {
while (file.read(buffer, CHUNK_SIZE) || file.gcount() > 0) {
std::streamsize bytes = file.gcount();
if (bytes <= 0) break;
for (std::streamsize i = 0; i < bytes; ++i) {
freqMap[buffer[i]]++;
}
bytesRead += bytes;
uint64_t progress = (bytesRead * 100) / fileSize;
if (progress != lastProgress) {
lastProgress = progress;
std::cout << "\r正在统计频率: " << progress << "% " << std::flush;
}
}
std::cout << "\r正在统计频率: 100% " << std::endl;
}
catch (...) {
delete[] buffer;
file.close();
throw;
}
delete[] buffer;
file.close();
}
// 构建Huffman树并返回根节点
HuffmanNode* buildHuffmanTree(const std::unordered_map<char, unsigned long long>& freqMap) {
std::priority_queue<HuffmanNode*, std::vector<HuffmanNode*>, Compare> minHeap;
for (const auto& pair : freqMap) {
minHeap.push(new HuffmanNode(pair.first, pair.second));
}
if (minHeap.empty()) {
return nullptr;
}
while (minHeap.size() != 1) {
HuffmanNode* left = minHeap.top();
minHeap.pop();
HuffmanNode* right = minHeap.top();
minHeap.pop();
HuffmanNode* top = new HuffmanNode('$', left->freq + right->freq);
top->left = left;
top->right = right;
minHeap.push(top);
}
return minHeap.top();
}
// 生成Huffman编码表(递归函数)
void generateCodes(HuffmanNode* root, const std::string& str, std::unordered_map<char, std::string>& huffmanCode) {
if (root == nullptr) {
return;
}
if (!root->left && !root->right) {
huffmanCode[root->data] = str.empty() ? "0" : str;
}
generateCodes(root->left, str + "0", huffmanCode);
generateCodes(root->right, str + "1", huffmanCode);
}
// 序列化Huffman树
void serializeTree(HuffmanNode* root, std::ofstream& outFile) {
if (root == nullptr) {
outFile.put('#'); // 直接写入字符
return;
}
if (root->left == nullptr && root->right == nullptr) {
outFile.put('L'); // 标记叶子节点
outFile.put(root->data);
} else {
outFile.put('I'); // 标记内部节点
serializeTree(root->left, outFile);
serializeTree(root->right, outFile);
}
}
// 反序列化Huffman树
HuffmanNode* deserializeTree(std::ifstream& inFile) {
char marker;
if (!inFile.get(marker)) return nullptr;
if (marker == '#') return nullptr;
if (marker == 'L') {
char data;
if (!inFile.get(data)) throw std::runtime_error("反序列化失败:叶子节点数据缺失");
return new HuffmanNode(data, 0);
} else if (marker == 'I') {
HuffmanNode* left = deserializeTree(inFile);
HuffmanNode* right = deserializeTree(inFile);
HuffmanNode* node = new HuffmanNode('$', 0);
node->left = left;
node->right = right;
return node;
} else {
throw std::runtime_error("反序列化失败:无效节点标记");
}
}
// 辅助函数:释放Huffman树占用的内存
void deleteHuffmanTree(HuffmanNode* root) {
if (root == nullptr) return;
deleteHuffmanTree(root->left);
deleteHuffmanTree(root->right);
delete root;
}
// 文件压缩函数
void compressFile(const std::string& inputFile, const std::string& outputFile) {
try {
std::cout << "正在压缩文件 " << inputFile << "..." << std::endl;
std::unordered_map<char, unsigned long long> freqMap;
calculateFrequency(inputFile, freqMap);
if (freqMap.empty()) {
std::ofstream outFile = openFile<std::ofstream>(outputFile, std::ios::binary);
const char* magic = "HUFF";
outFile.write(magic, 4);
serializeTree(nullptr, outFile);
uint64_t originalSize = 0;
outFile.write(reinterpret_cast<const char*>(&originalSize), sizeof(originalSize));
uint8_t padding = 0;
outFile.write(reinterpret_cast<const char*>(&padding), sizeof(padding));
outFile.close();
std::cout << "压缩完成(空文件),输出为 " << outputFile << std::endl;
return;
}
HuffmanNode* root = buildHuffmanTree(freqMap);
std::unordered_map<char, std::string> huffmanCode;
generateCodes(root, "", huffmanCode);
std::ifstream inFile = openFile<std::ifstream>(inputFile, std::ios::binary);
if (!inFile.is_open()) {
throw std::runtime_error("无法打开输入文件: " + inputFile);
}
std::ofstream outFile = openFile<std::ofstream>(outputFile, std::ios::binary);
if (!outFile.is_open()) {
deleteHuffmanTree(root);
throw std::runtime_error("无法打开输出文件: " + outputFile);
}
const char* magic = "HUFF";
outFile.write(magic, 4);
serializeTree(root, outFile);
inFile.seekg(0, std::ios::end);
uint64_t originalFileSize = inFile.tellg();
inFile.seekg(0, std::ios::beg);
outFile.write(reinterpret_cast<const char*>(&originalFileSize), sizeof(originalFileSize));
char buffer[CHUNK_SIZE];
std::string bitString;
uint64_t bytesReadTotal = 0;
uint64_t lastProgress = 0;
while (inFile.read(buffer, CHUNK_SIZE) || inFile.gcount() > 0) {
std::streamsize bytesRead = inFile.gcount();
if (bytesRead <= 0) break;
bytesReadTotal += bytesRead;
for (std::streamsize i = 0; i < bytesRead; ++i) {
bitString += huffmanCode[buffer[i]];
while (bitString.size() >= 8) {
std::string byteStr = bitString.substr(0, 8);
bitString = bitString.substr(8);
char byte = static_cast<char>(std::bitset<8>(byteStr).to_ulong());
outFile.put(byte);
}
}
uint64_t progress = (bytesReadTotal * 100) / originalFileSize;
if (progress != lastProgress) {
lastProgress = progress;
std::cout << "\r正在压缩: " << progress << "% " << std::flush;
}
}
uint8_t padding = 0;
if (!bitString.empty()) {
padding = 8 - bitString.size();
bitString += std::string(padding, '0');
char byte = static_cast<char>(std::bitset<8>(bitString).to_ulong());
outFile.put(byte);
}
outFile.put(static_cast<char>(padding));
std::cout << "\r正在压缩: 100% " << std::endl;
inFile.close();
outFile.close();
deleteHuffmanTree(root);
std::ifstream compressedFile = openFile<std::ifstream>(outputFile, std::ios::binary | std::ios::ate);
uint64_t compressedFileSize = compressedFile.tellg();
compressedFile.close();
double compressionRatio = (1.0 - static_cast<double>(compressedFileSize) / originalFileSize) * 100.0;
std::cout << std::fixed << std::setprecision(2);
std::cout << "原始大小:" << originalFileSize << "字节" << std::endl;
std::cout << "压缩后大小:" << compressedFileSize << "字节" << std::endl;
std::cout << "压缩比:" << compressionRatio << "%" << std::endl;
std::cout << "文件压缩完成,输出为 " << outputFile << std::endl;
}
catch (const std::exception& e) {
std::cerr << "压缩失败: " << e.what() << std::endl;
exit(1);
}
}
// 文件解压函数
void decompressFile(const std::string& inputFile, const std::string& outputFile) {
std::ifstream inFile;
std::ofstream outFile;
HuffmanNode* root = nullptr;
try {
std::cout << "正在解压文件 " << inputFile << "..." << std::endl;
inFile = openFile<std::ifstream>(inputFile, std::ios::binary | std::ios::ate);
if (!inFile.is_open()) {
throw std::runtime_error("无法打开文件 " + inputFile);
}
uint64_t fileSize = inFile.tellg();
inFile.seekg(0, std::ios::beg);
if (fileSize == 0) {
inFile.close();
outFile = openFile<std::ofstream>(outputFile, std::ios::binary);
if (outFile.is_open()) outFile.close();
std::cout << "解压完成(空文件),输出为 " << outputFile << std::endl;
return;
}
char magic[4];
inFile.read(magic, 4);
if (std::string(magic, 4) != "HUFF") {
inFile.close();
throw std::runtime_error("无效的Huffman压缩文件");
}
root = deserializeTree(inFile);
if (!root) {
inFile.close();
outFile = openFile<std::ofstream>(outputFile, std::ios::binary);
if (outFile.is_open()) outFile.close();
std::cout << "解压完成(空文件或树结构损坏),输出为 " << outputFile << std::endl;
return;
}
uint64_t originalFileSize;
if (!inFile.read(reinterpret_cast<char*>(&originalFileSize), sizeof(originalFileSize))) {
throw std::runtime_error("文件损坏:无法读取原始文件大小");
}
outFile = openFile<std::ofstream>(outputFile, std::ios::binary);
if (!outFile.is_open()) {
throw std::runtime_error("无法打开输出文件 " + outputFile);
}
std::streampos dataStart = inFile.tellg();
uint64_t dataSize = fileSize - dataStart - 1;
HuffmanNode* currentNode = root;
char byte;
uint64_t bytesWritten = 0;
uint64_t bytesProcessed = 0;
uint64_t lastProgress = 0;
inFile.seekg(-1, std::ios::end);
uint8_t padding;
if (!inFile.read(reinterpret_cast<char*>(&padding), 1)) {
throw std::runtime_error("文件损坏:无法读取填充位数");
}
inFile.seekg(dataStart);
while (bytesProcessed < dataSize && bytesWritten < originalFileSize) {
if (!inFile.get(byte)) {
bytesProcessed = dataSize;
break;
}
bytesProcessed++;
bool isLastByte = (bytesProcessed == dataSize);
for (int i = 7; i >= 0; --i) {
if (isLastByte && i < padding) {
continue;
}
bool bit = (byte >> i) & 1;
currentNode = bit ? currentNode->right : currentNode->left;
if (currentNode == nullptr) {
throw std::runtime_error("文件损坏或Huffman树结构错误");
}
if (!currentNode->left && !currentNode->right) {
outFile.put(currentNode->data);
bytesWritten++;
if (bytesWritten == originalFileSize) {
goto end_decoding;
}
currentNode = root;
}
}
if (dataSize > 0) {
uint64_t progress = (bytesProcessed * 100) / dataSize;
if (progress != lastProgress) {
lastProgress = progress;
std::cout << "\r正在解压: " << progress << "% " << std::flush;
}
}
}
std::cout << "\r正在解压: 100% " << std::endl;
end_decoding:
outFile.close();
inFile.close();
deleteHuffmanTree(root);
if (bytesWritten == originalFileSize) {
std::cout << "文件解压完成,输出为 " << outputFile << std::endl;
} else {
std::cerr << "警告:解压可能不完整。写入 " << bytesWritten << " 字节,预期 " << originalFileSize << " 字节。文件可能已损坏。" << std::endl;
}
}
catch (const std::exception& e) {
if (outFile.is_open()) outFile.close();
if (inFile.is_open()) inFile.close();
deleteHuffmanTree(root);
std::cerr << "解压失败: " << e.what() << std::endl;
exit(1);
}
}
// 显示程序使用帮助
void showHelp() {
std::cout << "Huffman 压缩解压工具使用方法:" << std::endl;
std::cout << " 压缩文件: huffman -c 输入文件 输出文件" << std::endl;
std::cout << " 解压文件: huffman -d 输入文件 输出文件" << std::endl;
std::cout << " 显示帮助: huffman -h" << std::endl;
}
// 主函数
int main(int argc, char* argv[]) {
std::ios_base::sync_with_stdio(false);
std::cin.tie(NULL);
// 命令行模式
if (argc >= 2) {
std::string option = argv[1];
if (option == "-h") {
showHelp();
return 0;
}
else if (option == "-c" && argc == 4) {
// 对命令行传入的中文路径进行编码转换
std::string input = gbk_to_utf8(argv[2]);
std::string output = gbk_to_utf8(argv[3]);
compressFile(input, output);
return 0;
}
else if (option == "-d" && argc == 4) {
// 对命令行传入的中文路径进行编码转换
std::string input = gbk_to_utf8(argv[2]);
std::string output = gbk_to_utf8(argv[3]);
decompressFile(input, output);
return 0;
}
else {
std::cerr << "无效的命令参数" << std::endl;
showHelp();
return 1;
}
}
// 交互模式
while (true) {
std::cout << "\n==== Huffman 压缩解压工具 ====" << std::endl;
std::cout << "1. 压缩文件" << std::endl;
std::cout << "2. 解压文件" << std::endl;
std::cout << "3. 显示帮助" << std::endl;
std::cout << "4. 退出程序" << std::endl;
std::cout << "请选择操作(1/2/3/4): " << std::endl;
int choice;
std::cin >> choice;
std::cin.ignore((std::numeric_limits<std::streamsize>::max)(), '\n');
std::string inputFile, outputFile;
switch (choice) {
case 1:
std::cout << "请输入要压缩的文件名:" << std::endl;
std::getline(std::cin, inputFile);
std::cout << "请输入压缩后的输出文件名:" << std::endl;
std::getline(std::cin, outputFile);
// 对交互模式输入的中文路径进行编码转换
compressFile(gbk_to_utf8(inputFile), gbk_to_utf8(outputFile));
std::cout << "\n操作完成,返回主菜单。" << std::endl;
break;
case 2:
std::cout << "请输入要解压的文件名:" << std::endl;
std::getline(std::cin, inputFile);
std::cout << "请输入解压后的输出文件名:" << std::endl;
std::getline(std::cin, outputFile);
// 对交互模式输入的中文路径进行编码转换
decompressFile(gbk_to_utf8(inputFile), gbk_to_utf8(outputFile));
std::cout << "\n操作完成,返回主菜单。" << std::endl;
break;
case 3:
showHelp();
break;
case 4:
std::cout << "感谢使用,程序即将退出。" << std::endl;
return 0;
default:
std::cout << "无效的选择,请重新输入。" << std::endl;
break;
}
}
return 0;
} 报错信息是 no matching function for call to 'std::basic_ifstream<char>::rdbuf(std::streambuf*)' 帮我解决报错
最新发布