平台:Windows/Linux
编辑器IDE:JetBrains Clion
第三方库:Boost
在操作文本文件的时候经常会遇到中文乱码的问题,尤其是在Windows平台下,本文针对常见的两种编码格式UTF-8和GBK编写一套代码实现二者的互转,可实现单字符串string,文件、批量文件、多级目录转换功能,详见代码:
1、头文件
//
// Created by lipin on 2019/12/18
//
#ifndef ENCODINGCONVERT_ENCODINGCONVERT_H
#define ENCODINGCONVERT_ENCODINGCONVERT_H
#include <iostream>
#include <string>
#include <Windows.h>
#include <fstream>
#include <vector>
#include "boost/filesystem.hpp"
namespace boostfs = boost::filesystem;
#define BUFFER_SIZE (2 * 1024 * 1024) // allow file max size 2M
#define BUFFER_SIZE_ONE_LINE 1024
class EncodingConvert
{
public:
EncodingConvert(const std::string &source_files_path_, const std::string &dest_files_path_):source_files_path(source_files_path_)
,dest_files_path(dest_files_path_)
{
// append \ to dest_files_path
size_t separator = dest_files_path.find_last_of("\\");
if(separator != dest_files_path.length() -1)
{
dest_files_path.append("\\");
}
boostfs::path path(dest_files_path);
if(boostfs::exists(path) == false) // create directory
{
boostfs::create_directories(path);
}
}
void convertToUTF8();
void convertToGBK();
void convert(const std::string &input_file_path_, size_t source_encode_type_, size_t dest_encode_type_);
static std::string convertString(size_t convert_type, const std::string source_string_); // 转换方向 0---GBK to UTF-8 1--- UTF-8 to GBK
private:
std::string source_files_path;
std::string dest_files_path;
std::vector<std::string> vct_file_paths;
bool setFilePathSet(const std::string &file_set_path_);
};
#endif //ENCODINGCONVERT_ENCODINGCONVERT_H
2、源文件
//
// Created by lipin on 2019/12/18
//
#include <stdio.h>
#include "../encodingconvert.h"
bool EncodingConvert::setFilePathSet(const std::string &file_set_path_)
{
std::cout << "scan file" << std::endl;
boostfs::path path(file_set_path_);
if(boostfs::exists(path) == false)
{
std::cout<<"ERROR: invalid path"<<std::endl;
return false;
}
boostfs::directory_iterator end_iter;
for(boostfs::directory_iterator iter(path); iter != end_iter