crawler 查询

最新推荐文章于 2024-09-18 13:14:58 发布

原创最新推荐文章于 2024-09-18 13:14:58 发布 · 175 阅读

CC 4.0 BY-SA版权

package com.zking.cucu;

import java.io.IOException;
import java.nio.file.Paths;
import java.sql.SQLException;
import java.util.List;
import java.util.Map;

import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import com.zking.dao.BlogDao;
import com.zking.util.PropertiesUtil;

/**
 * 构建lucene
 * @author Administrator
 *
 */
public class IndexStrat {
       private static BlogDao blogDao=new BlogDao();
       public static void main(String[] args) {
    	 //写在分词器中
		 IndexWriterConfig conf=new IndexWriterConfig(new SmartChineseAnalyzer());
	     //保存路径
		 Directory d;
		 //写流
	     IndexWriter indexWriter=null;
		try {
			d = FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath")));
			indexWriter=new IndexWriter(d, conf);
			//开始写这些
			List<Map<String, Object>> list = blogDao.list(null, null);
			for (Map<String, Object> map : list) {
				//每个人作为一个单独的数据写在文档里
				Document doc=new Document();
				doc.add(new StringField("id", (String) map.get("id"), Field.Store.YES));
				//用于对一句话进行分词处理
				doc.add(new TextField("title", (String) map.get("title"), Field.Store.YES));
				doc.add(new StringField("url", (String) map.get("url"), Field.Store.YES));
				indexWriter.addDocument(doc);
			}
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (InstantiationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (IllegalAccessException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (SQLException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}finally {
			if(indexWriter!=null) {
				try {
					indexWriter.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
		}
      }
}

然后拿到我们写入的文件

通过字符去查询

//判断你传过来的分词是否有
if(StringUtils.isBlank(title)) {
				List<Map<String, Object>> blogList = this.blogDao.list(title, null);
				request.setAttribute("blogList", blogList);
			}else {
				//分词器
				SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
				//相当于io的读取
				IndexReader indexReader=DirectoryReader.open(FSDirectory.open(Paths.get(PropertiesUtil.getValue("indexPath"))));
				//搜索引擎
				IndexSearcher searcher=new IndexSearcher(indexReader);
				//查询前面100 条数据
				Query query= new QueryParser("title", analyzer).parse(title);
				TopDocs tsearch = searcher.search(query, 100);
				
				//将碰撞出来的关键字给点亮
				QueryScorer queryScorer=new QueryScorer(query);
				//样式
				Formatter formatter=new SimpleHTMLFormatter("<span style='color:red;'><b>", "</b></span>");
				Highlighter highlight=new Highlighter(formatter, queryScorer);
				
				List<Map<String, Object>> blogList =new ArrayList<Map<String,Object>>();
				Map<String, Object> map=null;
				ScoreDoc[] scoreDocs = tsearch.scoreDocs;
			    for (ScoreDoc scoreDoc : scoreDocs) {
			    	map=new HashMap<String, Object>(); 
			    	Document doc = searcher.doc(scoreDoc.doc);
			    	map.put("id", doc.get("id"));
			    	//点亮
			    	String titleHighlighter = doc.get("title");
			    	if(StringUtils.isNotBlank(titleHighlighter)) {
			    		titleHighlighter=highlight.getBestFragment(analyzer, "title", titleHighlighter);
			    	}
			    	map.put("title",titleHighlighter );
			    	map.put("url", doc.get("url"));
			    	blogList.add(map);
			    }
			    request.setAttribute("blogList", blogList);
			}

两个方法类

帮助类

package com.zking.util;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

/**
 * properties工具类
 * @author user
 *
 */
public class PropertiesUtil {

	/**
	 * 根据key获取value值
	 * @param key
	 * @return
	 */
	public static String getValue(String key){
		Properties prop=new Properties();
		InputStream in=new PropertiesUtil().getClass().getResourceAsStream("/lucene.properties");
		try {
			prop.load(in);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		return prop.getProperty(key);
	}
}

文件..lucene.properties

你写的地方

url=jdbc:mysql://localhost:3306/hibernate?useUnicode=true&characterEncoding=utf8&serverTimezone=GMT&useSSL=false
user=root
pwd=123
driver=com.mysql.jdbc.Driver
ehcacheXmlPath=C://blogCrawler/ehcache.xml
blogImages=C://blogCrawler/blogImages/
indexPath=C://blogCrawler/lucene