Replace of HTML tag

$str=preg_replace("/\s+/", " ", $str); //过滤多余回车
$str=preg_replace("/<[ ]+/si","<",$str); //过滤<__("<"号后面带空格)
  
$str=preg_replace("/<\!--.*?-->/si","",$str); //注释
$str=preg_replace("/<(\!.*?)>/si","",$str); //过滤DOCTYPE
$str=preg_replace("/<(\/?html.*?)>/si","",$str); //过滤html标签
$str=preg_replace("/<(\/?head.*?)>/si","",$str); //过滤head标签
$str=preg_replace("/<(\/?meta.*?)>/si","",$str); //过滤meta标签
$str=preg_replace("/<(\/?body.*?)>/si","",$str); //过滤body标签
$str=preg_replace("/<(\/?link.*?)>/si","",$str); //过滤link标签
$str=preg_replace("/<(\/?form.*?)>/si","",$str); //过滤form标签
$str=preg_replace("/cookie/si","COOKIE",$str); //过滤COOKIE标签
  
$str=preg_replace("/<(applet.*?)>(.*?)<(\/applet.*?)>/si","",$str); //过滤applet标签
$str=preg_replace("/<(\/?applet.*?)>/si","",$str); //过滤applet标签
  
$str=preg_replace("/<(style.*?)>(.*?)<(\/style.*?)>/si","",$str); //过滤style标签
$str=preg_replace("/<(\/?style.*?)>/si","",$str); //过滤style标签
  
$str=preg_replace("/<(title.*?)>(.*?)<(\/title.*?)>/si","",$str); //过滤title标签
$str=preg_replace("/<(\/?title.*?)>/si","",$str); //过滤title标签
  
$str=preg_replace("/<(object.*?)>(.*?)<(\/object.*?)>/si","",$str); //过滤object标签
$str=preg_replace("/<(\/?objec.*?)>/si","",$str); //过滤object标签
  
$str=preg_replace("/<(noframes.*?)>(.*?)<(\/noframes.*?)>/si","",$str); //过滤noframes标签
$str=preg_replace("/<(\/?noframes.*?)>/si","",$str); //过滤noframes标签
  
$str=preg_replace("/<(i?frame.*?)>(.*?)<(\/i?frame.*?)>/si","",$str); //过滤frame标签
$str=preg_replace("/<(\/?i?frame.*?)>/si","",$str); //过滤frame标签
  
$str=preg_replace("/<(script.*?)>(.*?)<(\/script.*?)>/si","",$str); //过滤script标签
$str=preg_replace("/<(\/?script.*?)>/si","",$str); //过滤script标签
$str=preg_replace("/javascript/si","Javascript",$str); //过滤script标签
$str=preg_replace("/vbscript/si","Vbscript",$str); //过滤script标签
$str=preg_replace("/on([a-z]+)\s*=/si","On\\1=",$str); //过滤script标签
$str=preg_replace("/&#/si","&#",$str); //过滤script标签,如javAsCript:alert(

 

// 修复HTML文档使其符合XML规范: QString TriopticsParser::preprocessHtmlForXml(const QString & html) { QString result = html; int pos = 0; QRegularExpressionMatch match; while ((pos = result.indexOf(tagRegex, pos, &match)) != -1) { QString fullTag = match.captured(0).toLower(); QString slash = match.captured(1).toLower(); QString tagName = match.captured(2).toLower(); // 转换为小写 QString attributes = match.captured(3).toLower(); QString newTag = "<" + slash + tagName + attributes + ">"; result.replace(pos, fullTag.length(), newTag); pos += newTag.length(); } // 移除<html>标签前面的申明<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> int index = result.indexOf("<html>"); result = result.remove(0, index); // 1. 修复字符集声明 result.replace("charset=\n=iso-8859-1", "charset=\"iso-8859-1\""); // 2. 转义URL中的&符号 result.replace(QRegularExpression("href=\"([^\"]*?)(?<!&|<|>|&apos;|")&([^\"]*?)\""), "href=\"\\1&\\2\""); // 3. 修复自闭合标签 QStringList selfClosingTags = { "img", "br", "hr", "meta", "link" }; for (const QString& tag : selfClosingTags) { // 处理有属性的标签 result.replace(QRegularExpression(QString("<%1\\b([^>]*)>").arg(tag), QRegularExpression::CaseInsensitiveOption), QString("<%1\\1/>").arg(tag)); // 处理没有属性的标签 result.replace(QRegularExpression(QString("<%1\\s*>").arg(tag), QRegularExpression::CaseInsensitiveOption), QString("<%1/>").arg(tag)); } // 4. 移除无效的</link>结束标签 result.replace("</link>", ""); // 5. 添加XML声明 if (!result.contains("<?xml")) { result.prepend("<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n"); } return result; }优化速率
07-18
doDiff(leftText, rightText) { var unicodeCharacter = String.fromCharCode(this.unicodeRangeStart + this.mapLength) this.tagMap[' '] = unicodeCharacter this.tagMap[unicodeCharacter] = ' ' this.mapLength++ var diffableLeft = this.convertHtmlToDiffableString(leftText) var diffableRight = this.convertHtmlToDiffableString(rightText) var diffs = this.dmp.diff_main(diffableLeft, diffableRight) this.dmp.diff_cleanupSemantic(diffs) var diffOutput = '' for (var x = 0; x < diffs.length; x++) { diffs[x][1] = this.insertTagsForOperation(diffs[x][1], diffs[x][0]) diffOutput += this.convertDiffableBackToHtml(diffs[x][1]) } return diffOutput }, convertHtmlToDiffableString(htmlString) { var diffableString = '' if (htmlString === null || htmlString.length === 0) { return diffableString } htmlString = htmlString.replace(/ /g, this.tagMap[' ']) var offset = 0 while (offset < htmlString.length) { var tagStart = htmlString.indexOf('<', offset) if (tagStart < 0) { diffableString += htmlString.substr(offset) break } else { var tagEnd = htmlString.indexOf('>', tagStart) if (tagEnd < 0) { console.log('Invalid HTML. String will be truncated.') diffableString += htmlString.substr(offset, tagStart - offset) break } var tagString = htmlString.substr(tagStart, tagEnd + 1 - tagStart) var unicodeCharacter = this.tagMap[tagString] if (unicodeCharacter === undefined) { unicodeCharacter = String.fromCharCode(this.unicodeRangeStart + this.mapLength) this.tagMap[tagString] = unicodeCharacter this.tagMap[unicodeCharacter] = tagString this.mapLength++ } diffableString += htmlString.substr(offset, tagStart - offset) diffableString += unicodeCharacter offset = tagEnd + 1 } } return diffableString }, insertTagsForOperation(diffableString, operation) { var n = -1 do { n++ } while (diffableString.charCodeAt(n) >= this.unicodeRangeStart + 1) if (n >= diffableString.length) { return diffableString } var openTag = '' var closeTag = '' if (operation === 1) { openTag = '<ins style="color: green;">' closeTag = '</ins>' } else if (operation === -1) { openTag = '<del style="color: red;">' closeTag = '</del>' } else { return diffableString } var outputString = openTag var isOpen = true for (var x = 0; x < diffableString.length; x++) { if (diffableString.charCodeAt(x) < this.unicodeRangeStart) { if (!isOpen) { outputString += openTag isOpen = true } outputString += diffableString[x] } else { if (isOpen) { outputString += closeTag isOpen = false } outputString += diffableString[x] } } if (isOpen) outputString += closeTag return outputString }, convertDiffableBackToHtml(diffableString) { var htmlString = '' for (var x = 0; x < diffableString.length; x++) { var charCode = diffableString.charCodeAt(x) if (charCode < this.unicodeRangeStart) { htmlString += diffableString[x] continue } var tagString = this.tagMap[diffableString[x]] if (tagString === undefined) { htmlString += diffableString[x] } else { htmlString += tagString } } return htmlString }非常详细解释下这块代码
09-24
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值