插件73:读取wiki页面

<?php // Plug-in 73: Fetch Wiki Page
/*
 * 插件说明:
 * 读取wiki页面
 * 插件接受一个维基文章的标题,返回这个文章的文本内容。如果读取失败,返回false.
 * 它需要以下参数:
 * $netry 危及文章的标题
 */
// This is an executable example with additional code supplied
// To obtain just the plug-ins please click on the Download link

echo '<html><head><meta http-equiv="Content-Type" ' .
     'content="text/html; charset=utf-8" /></head><body>';
echo '<font face="Verdana" size="2">';
echo PIPHP_FetchWikiPage('Climate Change');

function PIPHP_FetchWikiPage($entry)
{
   // Plug-in 73: Fetch Wiki Page
   //
   // This plug-in fetches the XML of a Wikipedia entry for the
   // term $entry and returns a string containing the salient
   // details. It requires the following argument:
   //
   //    $entry: The entry to fetch (eg: 'bread')

   $agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; ' .
            'rv:1.9.1) Gecko/20090624 Firefox/3.5 (.NET CLR ' .
            '3.5.30729)';
   $text = '';

   while ($text == '' || substr($text, 0, 9) == '#REDIRECT')
   {
      $entry = rawurlencode($entry);
      $url   = "http://en.wikipedia.org/wiki/Special:Export/$entry";
      $page  = PIPHP_CurlGetContents($url, $agent);
      $xml   = simplexml_load_string($page);
      $title = $xml->page->title;
      $text  = $xml->page->revision->text;

      if (substr($text, 0, 9) == '#REDIRECT')
      {
         preg_match('/\[\[(.+)\]\]/', $text, $matches);
         $entry = $matches[1];
      }
   }

   $sections = array('References', 'See also', 'External links',
      'Notes', 'Further reading');

   foreach($sections as $section)
   {
      $ptr = stripos($text, "==$section==");
      if ($ptr) $text = substr($text, 0, $ptr);
      $ptr = stripos($text, "== $section ==");
      if ($ptr) $text = substr($text, 0, $ptr);
   }

   $data = array('\[{2}Imag(\[{2})*.*(\]{2})*\]{2}', '',
                 '\[{2}File(\[{2})*.*(\]{2})*\]{2}', '',
                 '\[{2}Cate(\[{2})*.*(\]{2})*\]{2}', '',
                 '\{{2}([^\{\}]+|(?R))*\}{2}',       '',
                 '\'{3}(.*?)\'{3}',         '<b>$1</b>',
                 '\'{2}(.*?)\'{2}',         '<i>$1</i>',
                 '\[{2}[^\|\]]+\|([^\]]*)\]{2}',   '$1',
                 '\[{2}(.*?)\]{2}',                '$1',
                 '\[(http[^\]]+)\]',                ' ',
                 '\n(\*|#)+',   '<br /> ● ',
                 '\n:.*?\n',                         '', 
                 '\n\{[^\}]+\}',                     '',
                 '\n={7}([^=]+)={7}',     '<h7>$1</h7>',
                 '\n={6}([^=]+)={6}',     '<h6>$1</h6>',
                 '\n={5}([^=]+)={5}',     '<h5>$1</h5>',
                 '\n={4}([^=]+)={4}',     '<h4>$1</h4>',
                 '\n={3}([^=]+)={3}',     '<h3>$1</h3>',
                 '\n={2}([^=]+)={2}',     '<h2>$1</h2>',
                 '\n={1}([^=]+)={1}',     '<h1>$1</h1>',
                 '\n{2}',                         '<p>',
                 '<gallery>([^<]+?)<\/gallery>',     '',
                 '<ref>([^<]+?)<\/ref>',             '',
                 '<ref [^>]+>',                      '');

   for ($j = 0 ; $j < count($data) ; $j += 2)
      $text = preg_replace("/$data[$j]/", $data[$j+1], $text);

   $text  = strip_tags($text, '<h1><h2><h3><h4><h5><h6><h7>' .
                              '<p><br><b><i>');
   $url   = "http://en.wikipedia.org/wiki/$title";
   $text .= "<p>Source: <a href='$url'>Wikipedia ($title)</a>";
   return trim($text);
}

function PIPHP_CurlGetContents($url, $agent)
{
   // Plug-in 72: Curl Get Contents
   //
   // This plug-in fetches a page that may otherwise be
   // forbidden using the file_get_contents() function.
   // It requires the following arguments:
   //
   //    $url:   The URL of the page to fetch
   //    $agent: A typical browser User Agent string

   $ch = curl_init();
   curl_setopt($ch, CURLOPT_URL,            $url);
   curl_setopt($ch, CURLOPT_USERAGENT,      $agent);
   curl_setopt($ch, CURLOPT_HEADER,         0);
   curl_setopt($ch, CURLOPT_ENCODING,       "gzip");
   curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
   curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
   curl_setopt($ch, CURLOPT_FAILONERROR,    1);
   curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 8);
   curl_setopt($ch, CURLOPT_TIMEOUT,        8);
   $result = curl_exec($ch);
   curl_close($ch);
   return $result;
}

?>

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值