文件:ijcai.pl
use LWP;
use LWP::UserAgent;
$ua = LWP::UserAgent->new;
$ua->agent("MyApp/0.1");
#save_abstract
sub save_abstaract
{
}
$out = $ARGV[0] || 'default.html';
# Pass request to the user agent and get a response back
my $response = $ua->get('http://ijcai.org/papers09/contents.php',
#':content_cb' => /&save_abstract
':content_file' => $out
);
# Check the outcome of the response
if ($response->is_success) {
#print $response->content();
print $response->status_line, "/n";
}
else {
print $response->status_line, "/n";
}
文件:parse_abstract.pl
作用:把abstract内容保存在word文档
#parse the content
use strict;
use LWP;
use URI;
my $browser = LWP::UserAgent->new;
my $ijcai_file = $ARGV[0] || 'default.html';
my $out_file = $ARGV[1] || 'abstract_list.txt';
my $abstract_file = $ARGV[2] || 'abstract.doc';
my @abstract;
my $url_base = 'http://ijcai.org/papers09/contents.php';
my $url;
my @netscape_like_headers = (
'User-Agent' => 'Mozilla/4.76 [en] (Win98; U)',
'Accept-Language' => 'en-US',
'Accept-Charset' => 'iso-8859-1,*,utf-8',
'Accept-Encoding' => 'gzip',
'Accept' =>
"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, image/png, */*",
);
open(IJCAI, "<$ijcai_file") or die "Can't open content file: $!";
open(OUT, ">$out_file") or die "Can't write abstract file: $!";
open(ABSTRACT, ">$abstract_file") or die "Can't write abstract doc file: $!";
while(<IJCAI>)
{
if($_ =~ m{<a href="(.*?)">Abstract</a>}i )
{
$url = URI->new_abs($1, $url_base);
push @abstract, $url;
print OUT $url, "/n";
}
}
foreach my $abs (@abstract) {
my $response = $browser->get($abs, @netscape_like_headers);
if($response->is_success){
print $abs, "/n";
}
else{
print "Can't get $abs: ", $response->status_line, "/n";
}
print ABSTRACT $response->content(), "/n/n";
}
close(IJCAI);
close(OUT);
close(ABSTRACT);