抓取一例,以
#!/usr/bin/perl
use DBI;
use Encode;
# Connect to target DB
my $dbh = DBI->connect("DBI:mysql:database=news_db;host=127.0.0.1","root","", {'RaiseError' => 0});
`rm -f fenghuan_tmp`;
`wget -O fenghuan_tmp "http://news.ifeng.com/" -o .log`;
my %hash;
my $domain = "ifeng.com";
my $category = decode("utf-8","滚动新闻");
#滚动新闻
open(FH,"fenghuan_tmp");
while($line=<FH>){
if($line=~/<h3 class="sysNW fz20">/){
while($line=<FH>){
if($line=~/^href=/"([^/"]+)/" [^>]+>([^<]+)/){
my $url = $1;
my $con = $2;
if($url=~//.shtml$/){
$hash{$con} = $url;
#print $url."/t".$con."/n";
}
}elsif($line=~/<!--左侧 end-->/){
last;
}
}
last;
}
}
close(FH);
foreach my $k (keys %hash){
my $url = $hash{$k};
&get_page($url,$k,$category,$domain);
}
undef %hash;