#找出语音对应的sent文件,将关键词列表中的关键词在sent文件中的时间点输出到一个文件中
#!/bin/perl
use Encode;
# use Encode;
#0.0000
#喂 您好 很高 兴 为 您 服务
#6.93 7.04 7.04 7.3 7.3 7.49 7.49 7.62 7.62 7.69 7.69 7.83 7.83 8.33
#0.0000
#现在 就是 一个 五 元 卡 基础 月租费
#12.95 13.19 13.19 13.51 13.51 13.75 13.75 13.9 13.9 14.03 14.03 14.31 14.31 14.62 14.62 15.09
#0.0000
################################################################
# $dat="测试文本";
# $str=decode("gb2312",$dat);
# print encode("gb2312",$str),"\n";
if($#ARGV < 2 )
{
print "usage:perl key_search.pl wavlist keywordlist result\n";
exit(-1);
}
my($wavlist,$keywordlist,$keyresult)=@ARGV;
open(WAV, "<$wavlist") or die "1\n";
open(KEYWORD, "<$keywordlist") or die "3\n";
open(RESULT, ">$keyresult") or die "4\n";
$pos=0;
$kresult;
$count=0;
$leap=0;
my @posarray=();
my @resultarray=();
my @wav = ();
my @sent = ();
my @keyword = ();
my @tmpresult=();
@wav = <WAV>;
$sentpath=@wav[0];
chomp($sentpath);
$sentpathtmp=$sentpath;
@segPath = split( /\\/ , $sentpathtmp );
#print "segPath is @segFile\n";
$sentwav=$segPath[-1];
chomp($sentwav);
$sentwav=~s/\.wav//g;
#print "$sentwav\n";
$sentpathtmp=~s/\.wav//g;
$sentpathtmp=~s/$sentwav//g;
#print "sentpath is $sentpathtmp\n";
system ( "dir \/b \/s \/O:N $sentpathtmp\*_sent\.txt > sent.list ");
open(SENT, "<sent.list") or die "2\n";
@sent = <SENT>;
@keyword = <KEYWORD>;
for my $lwav (@wav)
{
chomp $lwav;
@segFile = split( /\\/ , $lwav );
#print "segFile is @segFile\n";
$tmpwav=$segFile[-1];
$tmpwav=~s/\.wav//g;
chomp($tmpwav);
$tmpwav=~s/\s//g;
#print "wav is $tmpwav\n";
for my $lsent (@sent)
{
chomp($lsent);
#print ("lsent is $lsent\n");
if($lsent=~/$tmpwav/g)
{
print(RESULT "$lwav\n");
#print "wav is $lwav sent is $lsent\n";
for my $lkeyword (@keyword)
{
chomp($lkeyword);
@segkeyword = split( /=/ , $lkeyword );
#print "segFile is @segFile\n";
$tmpkey=$segkeyword[0];
$dtmpkey=decode("gb2312",$tmpkey);
open(FSENT, "<$lsent") or die "5\n";
while($flsent=<FSENT>)
{
chomp($flsent);
@segsent = split( /\s/ , $flsent );
if($flsent =~ /[\x80-\xFF]+?/)
{
$leap=0;
$count=0;
for my $lresult (@segsent)
{
$dresult=decode("gb2312",$lresult);
if($dresult=~/$dtmpkey/)
{
$find=1;
push @posarray, $count;
push @resultarray, $lresult;
#$pos=$count;
#print(RESULT "$lresult ");
}
$count++;
}
}
if($leap==1)
{
if($find==1)
{
$inumber=@posarray;
for($i=0;$i<$inumber;$i++)
{
$pos=@posarray[$i];
#print(RESULT "@resultarray[$i] @segsent[$pos*2] @segsent[$pos*2+1] ");
$tmpline="@resultarray[$i] @segsent[$pos*2] @segsent[$pos*2+1]";
push @tmpresult,$tmpline;
}
#for my $pos (@posarray) and for my $kresult (@resultarray)
#{
# print(RESULT "$kresult @segsent[$pos*2] @segsent[$pos*2+1] ");
#}
#print(RESULT "\n");
$find=0;
@posarray=();
@resultarray=();
}
}
if($leap==2)
{
$inumber1=@tmpresult;
for($j=0;$j<$inumber1;$j++)
{
$tmp=@tmpresult[$j];
print(RESULT "$tmp $flsent\n");
}
@tmpresult=();
next;
}
$leap++;
}
close(FSENT);
}
}
}
}
close(WAV);
close(SENT);
close(KEYWORD);
close(RESULT);
#!/bin/perl
use Encode;
# use Encode;
#################在下面的文本中检索关键词#####################
#对 对
#6.15 6.29 6.29 6.61#0.0000
#喂 您好 很高 兴 为 您 服务
#6.93 7.04 7.04 7.3 7.3 7.49 7.49 7.62 7.62 7.69 7.69 7.83 7.83 8.33
#0.0000
#现在 就是 一个 五 元 卡 基础 月租费
#12.95 13.19 13.19 13.51 13.51 13.75 13.75 13.9 13.9 14.03 14.03 14.31 14.31 14.62 14.62 15.09
#0.0000
################################################################
# $dat="测试文本";
# $str=decode("gb2312",$dat);
# print encode("gb2312",$str),"\n";
if($#ARGV < 2 )
{
print "usage:perl key_search.pl wavlist keywordlist result\n";
exit(-1);
}
my($wavlist,$keywordlist,$keyresult)=@ARGV;
open(WAV, "<$wavlist") or die "1\n";
open(KEYWORD, "<$keywordlist") or die "3\n";
open(RESULT, ">$keyresult") or die "4\n";
$pos=0;
$kresult;
$count=0;
$leap=0;
my @posarray=();
my @resultarray=();
my @wav = ();
my @sent = ();
my @keyword = ();
my @tmpresult=();
@wav = <WAV>;
$sentpath=@wav[0];
chomp($sentpath);
$sentpathtmp=$sentpath;
@segPath = split( /\\/ , $sentpathtmp );
#print "segPath is @segFile\n";
$sentwav=$segPath[-1];
chomp($sentwav);
$sentwav=~s/\.wav//g;
#print "$sentwav\n";
$sentpathtmp=~s/\.wav//g;
$sentpathtmp=~s/$sentwav//g;
#print "sentpath is $sentpathtmp\n";
system ( "dir \/b \/s \/O:N $sentpathtmp\*_sent\.txt > sent.list ");
open(SENT, "<sent.list") or die "2\n";
@sent = <SENT>;
@keyword = <KEYWORD>;
for my $lwav (@wav)
{
chomp $lwav;
@segFile = split( /\\/ , $lwav );
#print "segFile is @segFile\n";
$tmpwav=$segFile[-1];
$tmpwav=~s/\.wav//g;
chomp($tmpwav);
$tmpwav=~s/\s//g;
#print "wav is $tmpwav\n";
for my $lsent (@sent)
{
chomp($lsent);
#print ("lsent is $lsent\n");
if($lsent=~/$tmpwav/g)
{
print(RESULT "$lwav\n");
#print "wav is $lwav sent is $lsent\n";
for my $lkeyword (@keyword)
{
chomp($lkeyword);
@segkeyword = split( /=/ , $lkeyword );
#print "segFile is @segFile\n";
$tmpkey=$segkeyword[0];
$dtmpkey=decode("gb2312",$tmpkey);
open(FSENT, "<$lsent") or die "5\n";
while($flsent=<FSENT>)
{
chomp($flsent);
@segsent = split( /\s/ , $flsent );
if($flsent =~ /[\x80-\xFF]+?/)
{
$leap=0;
$count=0;
for my $lresult (@segsent)
{
$dresult=decode("gb2312",$lresult);
if($dresult=~/$dtmpkey/)
{
$find=1;
push @posarray, $count;
push @resultarray, $lresult;
#$pos=$count;
#print(RESULT "$lresult ");
}
$count++;
}
}
if($leap==1)
{
if($find==1)
{
$inumber=@posarray;
for($i=0;$i<$inumber;$i++)
{
$pos=@posarray[$i];
#print(RESULT "@resultarray[$i] @segsent[$pos*2] @segsent[$pos*2+1] ");
$tmpline="@resultarray[$i] @segsent[$pos*2] @segsent[$pos*2+1]";
push @tmpresult,$tmpline;
}
#for my $pos (@posarray) and for my $kresult (@resultarray)
#{
# print(RESULT "$kresult @segsent[$pos*2] @segsent[$pos*2+1] ");
#}
#print(RESULT "\n");
$find=0;
@posarray=();
@resultarray=();
}
}
if($leap==2)
{
$inumber1=@tmpresult;
for($j=0;$j<$inumber1;$j++)
{
$tmp=@tmpresult[$j];
print(RESULT "$tmp $flsent\n");
}
@tmpresult=();
next;
}
$leap++;
}
close(FSENT);
}
}
}
}
close(WAV);
close(SENT);
close(KEYWORD);
close(RESULT);