<?php // Plug-in 73: Fetch Wiki Page
/*
* 插件说明:
* 读取wiki页面
* 插件接受一个维基文章的标题,返回这个文章的文本内容。如果读取失败,返回false.
* 它需要以下参数:
* $netry 危及文章的标题
*/
// This is an executable example with additional code supplied
// To obtain just the plug-ins please click on the Download link
echo '<html><head><meta http-equiv="Content-Type" ' .
'content="text/html; charset=utf-8" /></head><body>';
echo '<font face="Verdana" size="2">';
echo PIPHP_FetchWikiPage('Climate Change');
function PIPHP_FetchWikiPage($entry)
{
// Plug-in 73: Fetch Wiki Page
//
// This plug-in fetches the XML of a Wikipedia entry for the
// term $entry and returns a string containing the salient
// details. It requires the following argument:
//
// $entry: The entry to fetch (eg: 'bread')
$agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; ' .
'rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR ' .
'3.5.30729)';
$text = '';
while ($text == '' || substr($text, 0, 9) == '#REDIRECT')
{
$entry = rawurlencode($entry);
$url = "http://en.wikipedia.org/wiki/Special:Export/$entry";
$page = PIPHP_CurlGetContents($url, $agent);
$xml = simplexml_load_string($page);
$title = $xml->page->title;
$text = $xml->page->revision->text;
if (substr($text, 0, 9) == '#REDIRECT')
{
preg_match('/\[\[(.+)\]\]/', $text, $matches);
$entry = $matches[1];
}
}
$sections = array('References', 'See also', 'External links',
'Notes', 'Further reading');
foreach($sections as $section)
{
$ptr = stripos($text, "==$section==");
if ($ptr) $text = substr($text, 0, $ptr);
$ptr = stripos($text, "== $section ==");
if ($ptr) $text = substr($text, 0, $ptr);
}
$data = array('\[{2}Imag(\[{2})*.*(\]{2})*\]{2}', '',
'\[{2}File(\[{2})*.*(\]{2})*\]{2}', '',
'\[{2}Cate(\[{2})*.*(\]{2})*\]{2}', '',
'\{{2}([^\{\}]+|(?R))*\}{2}', '',
'\'{3}(.*?)\'{3}', '<b>$1</b>',
'\'{2}(.*?)\'{2}', '<i>$1</i>',
'\[{2}[^\|\]]+\|([^\]]*)\]{2}', '$1',
'\[{2}(.*?)\]{2}', '$1',
'\[(http[^\]]+)\]', ' ',
'\n(\*|#)+', '<br /> ● ',
'\n:.*?\n', '',
'\n\{[^\}]+\}', '',
'\n={7}([^=]+)={7}', '<h7>$1</h7>',
'\n={6}([^=]+)={6}', '<h6>$1</h6>',
'\n={5}([^=]+)={5}', '<h5>$1</h5>',
'\n={4}([^=]+)={4}', '<h4>$1</h4>',
'\n={3}([^=]+)={3}', '<h3>$1</h3>',
'\n={2}([^=]+)={2}', '<h2>$1</h2>',
'\n={1}([^=]+)={1}', '<h1>$1</h1>',
'\n{2}', '<p>',
'<gallery>([^<]+?)<\/gallery>', '',
'<ref>([^<]+?)<\/ref>', '',
'<ref [^>]+>', '');
for ($j = 0 ; $j < count($data) ; $j += 2)
$text = preg_replace("/$data[$j]/", $data[$j+1], $text);
$text = strip_tags($text, '<h1><h2><h3><h4><h5><h6><h7>' .
'<p><br><b><i>');
$url = "http://en.wikipedia.org/wiki/$title";
$text .= "<p>Source: <a href='$url'>Wikipedia ($title)</a>";
return trim($text);
}
function PIPHP_CurlGetContents($url, $agent)
{
// Plug-in 72: Curl Get Contents
//
// This plug-in fetches a page that may otherwise be
// forbidden using the file_get_contents() function.
// It requires the following arguments:
//
// $url: The URL of the page to fetch
// $agent: A typical browser User Agent string
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_USERAGENT, $agent);
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_ENCODING, "gzip");
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 8);
curl_setopt($ch, CURLOPT_TIMEOUT, 8);
$result = curl_exec($ch);
curl_close($ch);
return $result;
}
?>