html转换rss,插件48:把HTML文件转换为RSS文件

// This is an executable example with additional code supplied

// To obtain just the plug-ins please click on the Download link

$url = "http://www.mhprofessional.com/";

$html = file_get_contents($url);

$title = "RSS version of '$url'";

$description = "The website '$url' converted to an RSS feed";

$webmaster = "nobody@nowhere.com";

$copyright = "Translator Copyright 2009 pluginphp.com";

header('Content-Type: text/xml');

echo PIPHP_HTMLToRSS($html, $title, $description, $url,

$webmaster, $copyright);

function PIPHP_HTMLToRSS($html, $title, $description, $url,

$webmaster, $copyright)

{

// Plug-in 48: HTML To RSS

//

// This plug-in takes a string containing a complete HTML

// page and turns it into RSS format which is returned. The

// arguments required are:

//

// $html: HTML to convert to RSS

// $title: Title to use

// $description: Description to use

// $url: URL to link to (generally same as the

// HTML source)

// $webmaster: Webmaster contact email address

// $copyright: Copyright details

$date = date("D, d M Y H:i:s e");

$html = str_replace('&', '&', $html);

$html = str_replace('&', '!!**1**!!', $html);

$dom = new domdocument();

@$dom ->loadhtml($html);

$xpath = new domxpath($dom);

$hrefs = $xpath->evaluate("/html/body//a");

$links = array();

$to = array();

$count = 0;

for ($j = 0 ; $j < $hrefs->length ; ++$j)

$links[] = $hrefs->item($j)->getAttribute('href');

$links = array_unique($links);

sort($links);

foreach ($links as $link)

{

if ($link != "")

{

$temp = str_replace('!!**1**!!', '&', $link);

$to[$count] = urlencode(PIPHP_RelToAbsURL($url, $temp));

$html = str_replace("href=\"$link\"",

"href=\"!!$count!!\"", $html);

$html = str_replace("href='$link'",

"href='!!$count!!'", $html);

$html = str_replace("href=$link",

"href=!!$count!!", $html);

++$count;

}

}

for ($j = 0 ; $j < $count ; ++$j)

$html = str_replace("!!$j!!", $to[$j],

$html);

$html = str_replace('http%3A%2F%2F', 'http://', $html);

$html = str_replace('!!**1**!!', '&', $html);

$html = preg_replace('/[\s]+/', ' ', $html);

$html = preg_replace('/

$html);

$html = preg_replace('/

$html);

$ok = '

';

$ok .= '


';

$html = strip_tags($html, $ok);

$html = preg_replace('/]*?>/i', '',

$html);

$html = htmlentities($html);

$html = preg_replace("//si",

"\n", $html);

$html = preg_replace("/<\/h[1-7]>/si",

"$url", $html);

return <<<_END

<?xml version="1.0" encoding="UTF-8"?>

Pluginphp.com: plug-in 48

$title$url

$description

en

$webmaster

$copyright

$date

$date

$title

$url

$html

_END;

}

// The below function is repeated here to ensure that it's

// available to the main function which relies on it

function PIPHP_RelToAbsURL($page, $url)

{

// Plug-in 21: Relative To Absolute URL

//

// This plug-in accepts the absolute URL of a web page

// and a link featured within that page. The link is then

// turned into an absolute URL which can be independently

// accessed. Only applies to http:// URLs. Arguments are:

//

// $page: The web page containing the URL

// $url: The URL to convert to absolute

if (substr($page, 0, 7) != "http://") return $url;

$parse = parse_url($page);

$root = $parse['scheme'] . "://" . $parse['host'];

$p = strrpos(substr($page, 7), '/');

if ($p) $base = substr($page, 0, $p + 8);

else $base = "$page/";

if (substr($url, 0, 1) == '/') $url = $root . $url;

elseif (substr($url, 0, 7) != "http://") $url = $base . $url;

return $url;

}

?>

插件说明:

本插件接受一个HTML文档或其他相关参数,返回一个格式正确的RSS文件。他需要以下参数:

$html 需要转换的HTML文档

$title 作为RSS文件的标题

$description RSS文件说明

$url 该RSS文件链接的URL

$wenmaster 网站管理员的Email地址

$copyright 版权信息

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值