// This is an executable example with additional code supplied
// To obtain just the plug-ins please click on the Download link
$url = "http://www.mhprofessional.com/";
$html = file_get_contents($url);
$title = "RSS version of '$url'";
$description = "The website '$url' converted to an RSS feed";
$webmaster = "nobody@nowhere.com";
$copyright = "Translator Copyright 2009 pluginphp.com";
header('Content-Type: text/xml');
echo PIPHP_HTMLToRSS($html, $title, $description, $url,
$webmaster, $copyright);
function PIPHP_HTMLToRSS($html, $title, $description, $url,
$webmaster, $copyright)
{
// Plug-in 48: HTML To RSS
//
// This plug-in takes a string containing a complete HTML
// page and turns it into RSS format which is returned. The
// arguments required are:
//
// $html: HTML to convert to RSS
// $title: Title to use
// $description: Description to use
// $url: URL to link to (generally same as the
// HTML source)
// $webmaster: Webmaster contact email address
// $copyright: Copyright details
$date = date("D, d M Y H:i:s e");
$html = str_replace('&', '&', $html);
$html = str_replace('&', '!!**1**!!', $html);
$dom = new domdocument();
@$dom ->loadhtml($html);
$xpath = new domxpath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
$links = array();
$to = array();
$count = 0;
for ($j = 0 ; $j < $hrefs->length ; ++$j)
$links[] = $hrefs->item($j)->getAttribute('href');
$links = array_unique($links);
sort($links);
foreach ($links as $link)
{
if ($link != "")
{
$temp = str_replace('!!**1**!!', '&', $link);
$to[$count] = urlencode(PIPHP_RelToAbsURL($url, $temp));
$html = str_replace("href=\"$link\"",
"href=\"!!$count!!\"", $html);
$html = str_replace("href='$link'",
"href='!!$count!!'", $html);
$html = str_replace("href=$link",
"href=!!$count!!", $html);
++$count;
}
}
for ($j = 0 ; $j < $count ; ++$j)
$html = str_replace("!!$j!!", $to[$j],
$html);
$html = str_replace('http%3A%2F%2F', 'http://', $html);
$html = str_replace('!!**1**!!', '&', $html);
$html = preg_replace('/[\s]+/', ' ', $html);
$html = preg_replace('/
$html);
$html = preg_replace('/
$html);
$ok = '
$ok .= '
$html = strip_tags($html, $ok);
$html = preg_replace('/]*?>/i', '',
$html);
$html = htmlentities($html);
$html = preg_replace("//si",
"\n", $html);
$html = preg_replace("/<\/h[1-7]>/si",
"$url", $html);
return <<<_END
<?xml version="1.0" encoding="UTF-8"?>
Pluginphp.com: plug-in 48
$description
en
$webmaster
$copyright
$date
$date
$title
$url
$html
_END;
}
// The below function is repeated here to ensure that it's
// available to the main function which relies on it
function PIPHP_RelToAbsURL($page, $url)
{
// Plug-in 21: Relative To Absolute URL
//
// This plug-in accepts the absolute URL of a web page
// and a link featured within that page. The link is then
// turned into an absolute URL which can be independently
// accessed. Only applies to http:// URLs. Arguments are:
//
// $page: The web page containing the URL
// $url: The URL to convert to absolute
if (substr($page, 0, 7) != "http://") return $url;
$parse = parse_url($page);
$root = $parse['scheme'] . "://" . $parse['host'];
$p = strrpos(substr($page, 7), '/');
if ($p) $base = substr($page, 0, $p + 8);
else $base = "$page/";
if (substr($url, 0, 1) == '/') $url = $root . $url;
elseif (substr($url, 0, 7) != "http://") $url = $base . $url;
return $url;
}
?>
插件说明:
本插件接受一个HTML文档或其他相关参数,返回一个格式正确的RSS文件。他需要以下参数:
$html 需要转换的HTML文档
$title 作为RSS文件的标题
$description RSS文件说明
$url 该RSS文件链接的URL
$wenmaster 网站管理员的Email地址
$copyright 版权信息