删除html标签的工具类
public static String HtmlRemoveTag(String html) {
if (html == null)
return null
String htmlStr = html
String textStr = ""
java.util.regex.Pattern p_script
java.util.regex.Matcher m_script
java.util.regex.Pattern p_style
java.util.regex.Matcher m_style
java.util.regex.Pattern p_html
java.util.regex.Matcher m_html
try {
String regEx_script = "<[\\s]*?script[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?script[\\s]*?>"
// }
String regEx_style = "<[\\s]*?style[^>]*?>[\\s\\S]*?<[\\s]*?\\/[\\s]*?style[\\s]*?>"
// }
String regEx_html = "<[^>]+>"
p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE)
m_script = p_script.matcher(htmlStr)
htmlStr = m_script.replaceAll("")
p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE)
m_style = p_style.matcher(htmlStr)
htmlStr = m_style.replaceAll("")
p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE)
m_html = p_html.matcher(htmlStr)
htmlStr = m_html.replaceAll("")
textStr = htmlStr
} catch (Exception e) {
// System.err.println("Html2Text: " + e.getMessage())
}
return textStr.replaceAll("\\s*", "")
}
private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>";
private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>";
private static final String regEx_html = "<[^>]+>";
private static final String regEx_space = "\\s*|\t|\r|\n";
private static final String regEx_w = "<w[^>]*?>[\\s\\S]*?<\\/w[^>]*?>";
/**
* @param htmlStr
* @return 删除Html标签
* @author LongJin
*/
public static String delHTMLTag(String htmlStr) {
Pattern p_w = Pattern.compile(regEx_w, Pattern.CASE_INSENSITIVE);
Matcher m_w = p_w.matcher(htmlStr);
htmlStr = m_w.replaceAll("");
Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
Matcher m_script = p_script.matcher(htmlStr);
htmlStr = m_script.replaceAll("");
Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
Matcher m_style = p_style.matcher(htmlStr);
htmlStr = m_style.replaceAll("");
Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
Matcher m_html = p_html.matcher(htmlStr);
htmlStr = m_html.replaceAll("");
Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE);
Matcher m_space = p_space.matcher(htmlStr);
htmlStr = m_space.replaceAll("");
htmlStr = htmlStr.replaceAll(" ", "");
return htmlStr.trim();
}