有些编辑器会把一些文本处理成自动换行,在本没有换行符的地方加入了换行符,但有时就需要去掉这些。
我大致是这样处理的
#include <iostream>
#include <sstream>
#include <fstream>
#include <string>
#include <Windows.h>
#define MAX_CHAR_PER_LINE 200 //每行最多字符数,每个字,标点算两个字符
using namespace std;
string old_filename,new_filename;
string newstbuffer;
//文件名可以不加后缀或加.txt
bool makeName()
{
string basename, extname;
string::size_type idx = old_filename.find(".");
if(idx == string::npos) //没后缀情况
{
new_filename = old_filename + "_modified.txt";
old_filename += ".txt";
}
else //有后缀情况
{
basename = old_filename.substr(0, idx);
extname = old_filename.substr(idx+1);
if(extname != "txt")
{
cout << "invalid extension" << endl;
Sleep(3000);
return false;
}
else
{
new_filename = basename + "_modified.txt";
}
}
return true;
}
int main(int argc, char **argv)
{
switch(argc)
{
case 1:
cout << "输入待处理文本文件名"<<endl;
cin >> old_filename;
if(!makeName()) return 1;
break;
case 2:
old_filename = argv[1];
if(!makeName()) return 1;
break;
case 3:
old_filename = argv[1]; //两个参数,第二个指定处理过文件名
new_filename = argv[2];
break;
default:
cout << "invalid arguments" << endl;
Sleep(3000);
return 1;
}
ifstream in(old_filename,ios::in);
char line[MAX_CHAR_PER_LINE];
//下面筛除开头的换行符
stringstream strstm;
string oldst;
strstm << in.rdbuf();
oldst = strstm.str();
in.seekg(0,ios::beg);
int i = 0;
while(oldst[i] == '\n')
{
in.seekg(2,ios::cur);
i += 1;
}
//下面开始处理文本内容
while(!in.eof())
{
in.get(line, MAX_CHAR_PER_LINE,'\n');
newstbuffer += line;
cout << line;
memset(line,0,sizeof(line));
in.seekg(-2,ios::cur); //回退一个字,用于判断是否该换行
in.get(line,3,'\n'); //取出这个字,
if( strncmp(line,"。",2)==0 || //如果满足要求,则加一个换行,否则忽略
strncmp(line,"”",2)==0 ||
strncmp(line,"!",2)==0 ||
strncmp(line,"…",2)==0 ||
strncmp(line,"?",2)==0)
{
newstbuffer += "\n";
cout << endl;
}
in.seekg(2,ios::cur); //跳过换行,文本中'\n'被解释成'\r\n',所以跳过两个字符
}
in.close();
ofstream out(new_filename,ios::out); //将处理过的字符串写入另一个文本
out << newstbuffer;
system("pause");
}
(这个只适用于纯中文文本,包括标点在内的,如果有单字节字符,要先替换成中文的。)
)