public static String removeHTML(String htmlString)
{
// Remove HTML tag from java String
String noHTMLString = htmlString.replaceAll("//<.*?//>", "");
// Remove Carriage return from java String
noHTMLString = noHTMLString.replaceAll("/r", "<br/>");
// Remove New line from java string and replace html break
noHTMLString = noHTMLString.replaceAll("/n", " ");
noHTMLString = noHTMLString.replaceAll("/'", "'");
noHTMLString = noHTMLString.replaceAll("/"", """);
return noHTMLString;
}
public static void main(String[] args) {
String strHTML= "<html>"+
"<head>"+
"<title>Convert HTML to Text String</title>"+
"</head>"+
"<body>"+
"This is HTML String of java's source code /"my program/""+
"</body>"+
"</html>";
String stringWithoutHTML=removeHTML(strHTML);
System.out.println(stringWithoutHTML);
}
- public static String regEx_script = "<script[^>]*?>[//s//S]*?<///script>";
- public static String regEx_style = "<style[^>]*?>[//s//S]*?<///style>";
- public static String regEx_html = "<[^>]+>";
- public static Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
- public static Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
- public static Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
- public static String getOptimizedData(String inputString) {
- if (inputString == null) {
- return inputString;
- }
- //stripping script tags whether the tag contains "/n" or "/r" or not.
- Matcher m_script = p_script.matcher(inputString);
- String htmlStr = m_script.replaceAll("");
- //stripping style tags whether the tag contains "/n" or "/r" or not.
- Matcher m_style = p_style.matcher(htmlStr);
- htmlStr = m_style.replaceAll("");
- //stripping html tags but continue to have the "/n" and "/r" in right place.
- Matcher m_html = p_html.matcher(htmlStr);
- htmlStr = m_html.replaceAll("");
- return htmlStr;
- }