Replace of HTML tag

$str=preg_replace("/\s+/", " ", $str); //过滤多余回车
$str=preg_replace("/<[ ]+/si","<",$str); //过滤<__("<"号后面带空格)
  
$str=preg_replace("/<\!--.*?-->/si","",$str); //注释
$str=preg_replace("/<(\!.*?)>/si","",$str); //过滤DOCTYPE
$str=preg_replace("/<(\/?html.*?)>/si","",$str); //过滤html标签
$str=preg_replace("/<(\/?head.*?)>/si","",$str); //过滤head标签
$str=preg_replace("/<(\/?meta.*?)>/si","",$str); //过滤meta标签
$str=preg_replace("/<(\/?body.*?)>/si","",$str); //过滤body标签
$str=preg_replace("/<(\/?link.*?)>/si","",$str); //过滤link标签
$str=preg_replace("/<(\/?form.*?)>/si","",$str); //过滤form标签
$str=preg_replace("/cookie/si","COOKIE",$str); //过滤COOKIE标签
  
$str=preg_replace("/<(applet.*?)>(.*?)<(\/applet.*?)>/si","",$str); //过滤applet标签
$str=preg_replace("/<(\/?applet.*?)>/si","",$str); //过滤applet标签
  
$str=preg_replace("/<(style.*?)>(.*?)<(\/style.*?)>/si","",$str); //过滤style标签
$str=preg_replace("/<(\/?style.*?)>/si","",$str); //过滤style标签
  
$str=preg_replace("/<(title.*?)>(.*?)<(\/title.*?)>/si","",$str); //过滤title标签
$str=preg_replace("/<(\/?title.*?)>/si","",$str); //过滤title标签
  
$str=preg_replace("/<(object.*?)>(.*?)<(\/object.*?)>/si","",$str); //过滤object标签
$str=preg_replace("/<(\/?objec.*?)>/si","",$str); //过滤object标签
  
$str=preg_replace("/<(noframes.*?)>(.*?)<(\/noframes.*?)>/si","",$str); //过滤noframes标签
$str=preg_replace("/<(\/?noframes.*?)>/si","",$str); //过滤noframes标签
  
$str=preg_replace("/<(i?frame.*?)>(.*?)<(\/i?frame.*?)>/si","",$str); //过滤frame标签
$str=preg_replace("/<(\/?i?frame.*?)>/si","",$str); //过滤frame标签
  
$str=preg_replace("/<(script.*?)>(.*?)<(\/script.*?)>/si","",$str); //过滤script标签
$str=preg_replace("/<(\/?script.*?)>/si","",$str); //过滤script标签
$str=preg_replace("/javascript/si","Javascript",$str); //过滤script标签
$str=preg_replace("/vbscript/si","Vbscript",$str); //过滤script标签
$str=preg_replace("/on([a-z]+)\s*=/si","On\\1=",$str); //过滤script标签
$str=preg_replace("/&#/si","&#",$str); //过滤script标签,如javAsCript:alert(

 

// 修复HTML文档使其符合XML规范: QString TriopticsParser::preprocessHtmlForXml(const QString & html) { QString result = html; int pos = 0; QRegularExpressionMatch match; while ((pos = result.indexOf(tagRegex, pos, &match)) != -1) { QString fullTag = match.captured(0).toLower(); QString slash = match.captured(1).toLower(); QString tagName = match.captured(2).toLower(); // 转换为小写 QString attributes = match.captured(3).toLower(); QString newTag = "<" + slash + tagName + attributes + ">"; result.replace(pos, fullTag.length(), newTag); pos += newTag.length(); } // 移除<html>标签前面的申明<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> int index = result.indexOf("<html>"); result = result.remove(0, index); // 1. 修复字符集声明 result.replace("charset=\n=iso-8859-1", "charset=\"iso-8859-1\""); // 2. 转义URL中的&符号 result.replace(QRegularExpression("href=\"([^\"]*?)(?<!&|<|>|&apos;|")&([^\"]*?)\""), "href=\"\\1&\\2\""); // 3. 修复自闭合标签 QStringList selfClosingTags = { "img", "br", "hr", "meta", "link" }; for (const QString& tag : selfClosingTags) { // 处理有属性的标签 result.replace(QRegularExpression(QString("<%1\\b([^>]*)>").arg(tag), QRegularExpression::CaseInsensitiveOption), QString("<%1\\1/>").arg(tag)); // 处理没有属性的标签 result.replace(QRegularExpression(QString("<%1\\s*>").arg(tag), QRegularExpression::CaseInsensitiveOption), QString("<%1/>").arg(tag)); } // 4. 移除无效的</link>结束标签 result.replace("</link>", ""); // 5. 添加XML声明 if (!result.contains("<?xml")) { result.prepend("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"); } return result; }优化速率
07-18
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值