#一个目录下的所有DOC文件写入多个TXT文件
#程序:刘兴
#时间:2010.3.19
#blog:http://deepfuture.iteye.com
#QQ:782322192
use 5.010;
use warnings;
use strict;
use Win32::OLE;
use Win32::OLE::Const 'Microsoft Word';
my $mydir='E:/zc';
my @list = glob("$mydir/*.doc");
#笔者blog:http://deepfuture.iteye.com/
my $word = CreateObject Win32::OLE 'Word.Application' or die $!;
$word->{'Visible'} = 0;
my $mylist;
$|=1;
#笔者blog:http://deepfuture.iteye.com/
foreach $mylist(@list){
my $txtfn=$mylist;
$txtfn=~s/.doc/.txt/i;
open MYTXT,">$txtfn";
my $document = $word->Documents->Open("$mylist");
my $countid=$document->Paragraphs->Count; #取得文档的段落数目
my $id=1;
say '';
say "正在处理:$mylist";
#笔者blog:http://deepfuture.iteye.com/
while ($id<=$countid){
my $paragraphs = $document->Paragraphs($id);
print ".";
if ($paragraphs)
{
my $myrange = $paragraphs->range;
if ($myrange){
my $mytext=$myrange->Text;
if ($mytext){
#笔者blog:http://deepfuture.iteye.com/
print MYTXT "$mytext\n"; #把某个目录下所有doc文件内容输出为一个文件 mytxt.txt
}
}
}
$id++;
}
if ($word->Documents) {
$word->Documents->close;
}
close MYTXT;
$|=1;
#读取WORD文件,取得 某某发[xxxx] xx号的下面的文件标题,并将导出的txt文档改名为文件标题
open MYTXT,"<$txtfn";
my $firstfind=-1;
my $newfn='';
my $temp='';
print "\n";
while (<MYTXT> ){
#找到文号
if (m/]|〕(\d+)号/){
$firstfind++;
next;
}
#去除文号下的空行
if ($firstfind==0 ){
next if m/^(\s*)\r\n/;
$firstfind++;
}
#标题行结束
if ($firstfind>1 and m/^(\s*)\r\n/){
last;
}
#标题行开始
if ($firstfind>0){
$firstfind++;
s/\r\n//;
$newfn.=$_;
}
}
close MYTXT;
$newfn=~s/(\s+)|(\t+)//;
print "转换:$mylist==>$newfn.txt\n";
rename $txtfn,"$newfn.txt";
}
$word->quit();

被折叠的 条评论
为什么被折叠?



