#----------------------------- use LWP::Simple; $content = get($URL); #----------------------------- use LWP::Simple; unless (defined ($content = get $URL)) { die "could not get $URL/n"; } #----------------------------- # download the following standalone program #!/usr/bin/perl -w # titlebytes - find the title and size of documents use LWP::UserAgent; use HTTP::Request; use HTTP::Response; use URI::Heuristic; my $raw_url = shift or die "usage: $0 url/n"; my $url = URI::Heuristic::uf_urlstr($raw_url); $| = 1; # to flush next line printf "%s =>/n/t", $url; my $ua = LWP::UserAgent->new(); $ua->agent("Schmozilla/v9.14 Platinum"); # give it time, it'll get there my $req = HTTP::Request->new(GET => $url); $req->referer("http://wizard.yellowbrick.oz"); # perplex the log analysers my $response = $ua->request($req); if ($response->is_error()) { printf " %s/n", $response->status_line; } else { my $count; my $bytes; my $content = $response->content(); $bytes = length $content; $count = ($content =~ tr//n//n/); printf "%s (%d lines, %d bytes)/n", $response->title(), $count, $bytes; }
#----------------------------- use HTML::LinkExtor;
$parser = HTML::LinkExtor->new(undef, $base_url); $parser->parse_file($filename); @links = $parser->links; foreach $linkarray (@links) { my @element = @$linkarray; my $elt_type = shift @element; # element type
# possibly test whether this is an element we're interested in while (@element) { # extract the next attribute and its value my ($attr_name, $attr_value) = splice(@element, 0, 2); # ... do something with them ... } } #----------------------------- <A HREF="http://www.perl.com/">Home page</A> <IMG SRC="images/big.gif" LOWSRC="images/big-lowres.gif"> #----------------------------- [ [ a, href => "http://www.perl.com/" ], [ img, src => "images/big.gif", lowsrc => "images/big-lowres.gif" ] ] #----------------------------- if ($elt_type eq 'a' && $attr_name eq 'href') { print "ANCHOR: $attr_value/n" if $attr_value->scheme =~ /http|ftp/; } if ($elt_type eq 'img' && $attr_name eq 'src') { print "IMAGE: $attr_value/n"; } #----------------------------- # download the following standalone program #!/usr/bin/perl -w # xurl - extract unique, sorted list of links from URL use HTML::LinkExtor; use LWP::Simple;
$base_url = shift; $parser = HTML::LinkExtor->new(undef, $base_url); $parser->parse(get($base_url))->eof; @links = $parser->links; foreach $linkarray (@links) { my @element = @$linkarray; my $elt_type = shift @element; while (@element) { my ($attr_name , $attr_value) = splice(@element, 0, 2); $seen{$attr_value}++; } } for (sort keys %seen) { print $_, "/n" }
#----------------------------- # download the following standalone program #!/usr/bin/perl -w -p00 # text2html - trivial html encoding of normal text # -p means apply this script to each record. # -00 mean that a record is now a paragraph
use HTML::Entities; $_ = encode_entities($_, "/200-/377");
if (/^/s/) { # Paragraphs beginning with whitespace are wrapped in <PRE> s{(.*)$} {<PRE>/n$1</PRE>/n}s; # indented verbatim } else { s{^(>.*)} {$1<BR>}gm; # quoted text s{<URL:(.*?)>} {<A HREF="$1">$1</A>}gs # embedded URL (good) || s{(http:/S+)} {<A HREF="$1">$1</A>}gs; # guessed URL (bad) s{/*(/S+)/*} {<STRONG>$1</STRONG>}g; # this is *bold* here s{/b_(/S+)/_/b} {<EM>$1</EM>}g; # this is _italics_ here s{^} {<P>/n}; # add paragraph tag }
#----------------------------- ($plain_text = $html_text) =~ s/<[^>]*>//gs; #WRONG #----------------------------- use HTML::Parse; use HTML::FormatText; $plain_text = HTML::FormatText->new->format(parse_html($html_text)); #----------------------------- #% perl -pe 's/<[^>]*>//g' file #----------------------------- #<IMG SRC = "foo.gif" # ALT = "Flurp!"> #----------------------------- #% perl -0777 -pe 's/<[^>]*>//gs' file #----------------------------- { local $/; # temporary whole-file input mode $html = <FILE>; $html =~ s/<[^>]*>//gs; } #----------------------------- #<IMG SRC = "foo.gif" ALT = "A > B"> # #<!-- <A comment> --> # #<script>if (a<b && a>c)</script> # #<# Just data #> # #<![INCLUDE CDATA [ >>>>>>>>>>>> ]]> #----------------------------- #<!-- This section commented out. # <B>You can't see me!</B> #--> #----------------------------- package MyParser; use HTML::Parser; use HTML::Entities qw(decode_entities);
@ISA = qw(HTML::Parser);
sub text { my($self, $text) = @_; print decode_entities($text); }
package main; MyParser->new->parse_file(*F); #----------------------------- ($title) = ($html =~ m#<TITLE>/s*(.*?)/s*</TITLE>#is); #----------------------------- # download the following standalone program #!/usr/bin/perl # htitle - get html title from URL
die "usage: $0 url .../n" unless @ARGV; require LWP;
#----------------------------- sub template { my ($filename, $fillings) = @_; my $text; local $/; # slurp mode (undef) local *F; # create local filehandle open(F, "< $filename/0") || return; $text = <F>; # read whole file close(F); # ignore retval # replace quoted words with value in %$fillings hash $text =~ s{ %% ( .*? ) %% } { exists( $fillings->{$1} ) ? $fillings->{$1} : "" }gsex; return $text; } #----------------------------- #<!-- simple.template for internal template() function --> #<HTML><HEAD><TITLE>Report for %%username%%</TITLE></HEAD> #<BODY><H1>Report for %%username%%</H1> #%%username%% logged in %%count%% times, for a total of %%total%% minutes. #----------------------------- #<!-- fancy.template for Text::Template --> #<HTML><HEAD><TITLE>Report for {$user}</TITLE></HEAD> #<BODY><H1>Report for {$user}</H1> #{ lcfirst($user) } logged in {$count} times, for a total of #{ int($total / 60) } minutes. #----------------------------- %fields = ( username => $whats_his_name, count => $login_count, total => $minute_used, );
print template("/home/httpd/templates/simple.template", /%fields); #----------------------------- # download the following standalone program #!/usr/bin/perl -w # userrep1 - report duration of user logins using SQL database
use DBI; use CGI qw(:standard);
# template() defined as in the Solution section above $user = param("username") or die "No username";
$dbh = DBI->connect("dbi:mysql:connections:mysql.domain.com:3306", "connections", "seekritpassword") or die "Couldn't connect/n"; $sth = $dbh->prepare(<<"END_OF_SELECT") or die "Couldn't prepare SQL"; SELECT COUNT(duration),SUM(duration) FROM logins WHERE username='$user' END_OF_SELECT
# this time the duration is assumed to be in seconds if (@row = $sth->fetchrow()) { ($count, $seconds) = @row; } else { ($count, $seconds) = (0,0); }
#----------------------------- You owe: {$total} #----------------------------- The average was {$count ? ($total/$count) : 0}. #----------------------------- # download the following standalone program #!/usr/bin/perl -w # userrep2 - report duration of user logins using SQL database
use Text::Template; use DBI; use CGI qw(:standard);
$tmpl = "/home/httpd/templates/fancy.template"; $template = Text::Template->new(-type => "file", -source => $tmpl); $user = param("username") or die "No username";
$dbh = DBI->connect("dbi:mysql:connections:mysql.domain.com:3306", "connections", "secret passwd") or die "Couldn't db connect/n"; $sth = $dbh->prepare(<<"END_OF_SELECT") or die "Couldn't prepare SQL"; SELECT COUNT(duration),SUM(duration) FROM logins WHERE username='$user' END_OF_SELECT
#----------------------------- #<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY> #<H1>Welcome to Scooby World!</H1> #I have <A HREF="pictures.html">pictures</A> of the crazy dog #himself. Here's one!<P> #<IMG SRC="scooby.jpg" ALT="Good doggy!"><P> #<BLINK>He's my hero!</BLINK> I would like to meet him some day, #and get my picture taken with him.<P> #P.S. I am deathly ill. <A HREF="shergold.html">Please send #cards</A>. #</BODY></HTML> #----------------------------- #% htmlsub picture photo scooby.html #<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY> # #<H1>Welcome to Scooby World!</H1> # #I have <A HREF="pictures.html">photos</A> of the crazy dog # #himself. Here's one!<P> # #<IMG SRC="scooby.jpg" ALT="Good doggy!"><P> # #<BLINK>He's my hero!</BLINK> I would like to meet him some day, # #and get my photo taken with him.<P> # #P.S. I am deathly ill. <A HREF="shergold.html">Please send # #cards</A>. # #</BODY></HTML> #----------------------------- # download the following standalone program #!/usr/bin/perl -w # htmlsub - make substitutions in normal text of HTML files # from Gisle Aas <gisle@aas.no>
sub usage { die "Usage: $0 <from> <to> <file>.../n" }
my $from = shift or usage; my $to = shift or usage; usage unless @ARGV;
# Build the HTML::Filter subclass to do the substituting.
package MyFilter; require HTML::Filter; @ISA=qw(HTML::Filter); use HTML::Entities qw(decode_entities encode_entities);
sub text { my $self = shift; my $text = decode_entities($_[0]); $text =~ s//Q$from/$to/go; # most important line $self->SUPER::text(encode_entities($text)); }
#----------------------------- #% hrefsub shergold.html cards.html scooby.html #<HTML><HEAD><TITLE>Hi!</TITLE></HEAD><BODY> # #<H1>Welcome to Scooby World!</H1> # #I have <A HREF="pictures.html">pictures</A> of the crazy dog # #himself. Here's one!<P> # #<IMG SRC="scooby.jpg" ALT="Good doggy!"><P> # #<BLINK>He's my hero!</BLINK> I would like to meet him some day, # #and get my picture taken with him.<P> # #P.S. I am deathly ill. <a href="cards.html">Please send # #cards</A>. # #</BODY></HTML> #----------------------------- # download the following standalone program #!/usr/bin/perl -w # hrefsub - make substitutions in <A HREF="..."> _fcksavedurl=""...">" fields of HTML files # from Gisle Aas <gisle@aas.no>
sub usage { die "Usage: $0 <from> <to> <file>.../n" }
my $from = shift or usage; my $to = shift or usage; usage unless @ARGV;
# The HTML::Filter subclass to do the substitution.
package MyFilter; require HTML::Filter; @ISA=qw(HTML::Filter); use HTML::Entities qw(encode_entities);
sub start { my($self, $tag, $attr, $attrseq, $orig) = @_; if ($tag eq 'a' && exists $attr->{href}) { if ($attr->{href} =~ s//Q$from/$to/g) { # must reconstruct the start tag based on $tag and $attr. # wish we instead were told the extent of the 'href' value # in $orig. my $tmp = "<$tag"; for (@$attrseq) { my $encoded = encode_entities($attr->{$_}); $tmp .= qq( $_="$encoded "); } $tmp .= ">"; $self->output($tmp); return; } } $self->output($orig); }