#!/usr/bin/perl -w# Gist: https://gist.github.com/2928006use LWP:: Simple ;my $url = $ARGV [ 0 ];my $filename = $ARGV [ 1 ];my $content = get ( $url )or die "Couldn't get $url" ;
#$content =~ s#^.*?(<div.*?</div>).*$##m;
if ( $content =~ m #.*(<div id="enText" style="display:block">.*?</div>).*#sg){$text = $1 ;
# 打开模板文件open ( TEMPLATE , "template.html" )or die " Couldn't open template.html for writing: $! " ;
# 读取模板文件$/ = "</html>" ; # 读到</html>结束my $reads = <TEMPLATE> ;
# 替换听力文本$reads =~ s/==TEXT_CONTENT==/$text/gix ;#print $reads;
# 输出html文件open ( OUT , " > $filename " )or die " Couldn't open $filename for writing: $! " ;print OUT $reads ;
# 下载听力my $baseUrl = $url ;$baseUrl =~ s/(.*)(// .* /.html)/ $1 / g ;my $reslink = $content ;$reslink =~ s/.*<a href="(.*?)" title="进入下载资料页面">下载听力<// a >.* /$1/sg ;$reslink = $baseUrl . "/" . $reslink ;
print "/nreslink:" , $reslink , "/n" ;my $respage = get ( $reslink )or die "Couldn't get $reslink" ;
my $mp3link = $respage ;print $mp3link ;#$mp3link =~ s#.*<a href="(.*?)" target="_blank"><img src="/images/downloadurl1/.jpg"></a>.*#$1#sg;#if ($mp3link =~ m/.*<a href="(.*?)" target="_blank"><img src="//images//downloadurl1/.jpg"><//a>.*/sg) {
# 没登录,下载链接获取不到,怎么办?if ( $mp3link =~ m/downloadurl1/sg ) {print "匹配/n" ;}else {print "不匹配/n" ;}#<a href="(.*?)" target="_blank"><img src="/images/downloadurl1.jpg"></a>
print "/ndownload:" . $reslink . "/n" ;}else {print "不匹配/n" ;}
[经验总结]Perl提取网页信息
最新推荐文章于 2024-01-12 10:19:06 发布

2841





