正则表达式获取网页<meta name="description" content=内容

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;


public class Test8 {
    
    static int tempa = 10;
    
    public static void main(String[] args) throws IOException  {
        File file = new File("D:/2cd99ecc9a3b8f2f233cb6d9c17a86b9.jsp");
        String htmlReport = readFileAsString(file);
        //System.out.println(htmlReport);
        //String pattern = "<meta name=\"description\" content=\"(.+?)\r\n(.+?)/>";
        String pattern = "name=\"description\" content=\"(.+?)/>";
        
        Pattern p = Pattern.compile(pattern,Pattern.CASE_INSENSITIVE | Pattern.DOTALL);   
        Matcher matcher = p.matcher(htmlReport);
        

        while (matcher.find()) {
            String tmp = matcher.group(1);
            System.out.println(tmp);
            }
        }
    
    private static String readFileAsString(File file) throws IOException {
        
        StringBuffer strBuff = new StringBuffer();
        String charsetName = "utf-8";
        
        if(file != null && file.exists()){
            
            //InputStream in = new FileInputStream(file);
            InputStreamReader inputStreamReader = new InputStreamReader(new FileInputStream(file), charsetName);
            BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
            String line = null;
            while((line = bufferedReader.readLine()) != null){
                strBuff.append(line);
                strBuff.append("\n");
            }
            
            inputStreamReader.close();
            bufferedReader.close();
            
        } else {
        }
        
        return strBuff.toString();
        
        /*char[] chr = new char[4096];
        final StringBuffer buffer = new StringBuffer();
        final FileReader reader = new FileReader(file);
        try {
            while ((len = reader.read(chr)) > 0) {
                buffer.append(chr, 0, len);
                System.out.println("len=======" + len);
            }
        } finally {
            reader.close();
        }*/
  
    }

}

<!doctype html> <html lang="zh-cn" xmlns="http://www.w3.org/1999/xhtml"> <head> <meta charset="utf-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no" /> <meta http-equiv="X-UA-Compatible" content="IE=edge" /> <title>汉的解释|汉的意思|汉典“汉”字的基本解释</title> <meta name="title" content="汉的解释|汉的意思|汉典“汉”字的基本解释"> <meta name="keywords" content="左:氵,右:又。" /><meta property="og:title" content="“汉”字的解释 | 汉典" /> <meta property="og:image" content="//www.zdic.net/images/logo-zh-cn.png" /> <meta property="og:description" content="“汉”字的解释,释义,异体字,音韵方言,部首笔画,康熙字典,说文解字,字源字形" /> <meta name="description" content="“汉”字的解释,释义,异体字,音韵方言,部首笔画,康熙字典,说文解字,字源字形" /> <meta property="og:url" content="//www.zdic.net/hans/汉" /> <link rel="canonical" href="//www.zdic.net/hans/汉" /> <link rel="alternate" hreflang="x-default" href="//www.zdic.net/hans/汉" /> <link rel="alternate" hreflang="zh-Hans" href="//www.zdic.net/hans/汉" /> <link rel="alternate" hreflang="zh-Hant" href="//www.zdic.net/hant/汉" /> <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" /> <meta name="apple-mobile-web-app-title" content="汉典" /> <meta name="theme-color" content="#8b411c" /> <!-- Global site tag (gtag.js) - Google Analytics --> <script async src="https://www.googletagmanager.com/gtag/js?id=UA-161009-3"></script> <script> window.dataLayer = window.dataLayer || []; function gtag(){dataLayer.push(arguments);} gtag('js', new Date()); gtag('config', 'UA-161009-3'); </script> <script async src="https://pagead2.googlesyndication.com/pagead/js/adsbygoogle.js?client=ca-pub-5789084355642416" crossorigin="anonymous"></script> <script> var _hmt = _hmt || []; (function() { var hm = document.createElement("script"); hm.src = "https://hm.baidu.com/hm.js?3df9beb4da8d56b1d05ad32a8e6cf208"; var s = document.getElementsByTagName("script")[0]; s.parentNode.insertBefore(hm, s); })(); </script> <link type="text/css" rel="stylesheet" media="screen" href="/style.css" /
03-15
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值