以下索引程序是否正确,共有5个java文件
LuceneIndex.java代码为:
package com.boe.cim.teacher.luence;
import java.nio.file.Paths;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.boe.cim.teacher.pojo.TeacherInfo;
/**
* @author hyh
* @description 创建文档索引
*/
public class LuceneIndex {
private Directory dir;
/**
*实例化indexerWriter
* @return
* @throws Exception
*/
private IndexWriter getWriter()throws Exception{
//中文分词器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
/**
* 获取indexDir
* @param indexDir
* @throws Exception
*/
public void index(String indexDir,List<TeacherInfo> listTeacher)throws Exception{
dir=FSDirectory.open(Paths.get(indexDir));
IndexWriter writer=getWriter();
for(int i=0;i<listTeacher.size();i++){
Document doc=new Document();
TeacherInfo teacher = listTeacher.get(i);
//StringField 只索引不分词
doc.add(new StringField("id",String.valueOf(teacher.getId()), Field.Store.YES));
doc.add(new StringField("teacher", teacher.getTeacher(), Field.Store.YES));
doc.add(new StringField("school",teacher.getSchool(),Field.Store.YES));
doc.add(new StringField("department", teacher.getDepartment(), Field.Store.YES));
doc.add(new TextField("researchdirector", teacher.getResearchdirector(), Field.Store.YES));
doc.add(new TextField("instruments", teacher.getInstruments(), Field.Store.YES));
doc.add(new TextField("achievements", teacher.getAchievements(), Field.Store.YES));
// writer.deleteDocuments(new Term("id",String.valueOf(teacher.getId())));
writer.updateDocument(new Term("id",String.valueOf(teacher.getId())), doc);
}
writer.close();
}
}
LuceneIndexRequirement.java代码为:
package com.boe.cim.teacher.luence;
import java.nio.file.Paths;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.boe.cim.teacher.pojo.RequirementInfo;
/**
* @author hyh
* @description 创建文档索引
*/
public class LuceneIndexRequirement {
private Directory dir;
/**
*实例化indexerWriter
* @return
* @throws Exception
*/
private IndexWriter getWriter()throws Exception{
//中文分词器
SmartChineseAnalyzer analyzer=new SmartChineseAnalyzer();
IndexWriterConfig iwc=new IndexWriterConfig(analyzer);
IndexWriter writer=new IndexWriter(dir, iwc);
return writer;
}
/**
* 获取indexreDir
* @param indexreDir
* @throws Exception
*/
public void index(String indexreDir,List<RequirementInfo> listRequirement)throws Exception{
dir=FSDirectory.open(Paths.get(indexreDir));
IndexWriter writer=getWriter();
for(int i=0;i<listRequirement.size();i++){
Document doc=new Document();
RequirementInfo requirement = listRequirement.get(i);
//StringField 只索引不分词
doc.add(new StringField("id",String.valueOf(requirement.getId()), Field.Store.YES));
doc.add(new StringField("requirement", requirement.getRequirement(), Field.Store.YES));
doc.add(new StringField("department",requirement.getDepartment(),Field.Store.YES));
doc.add(new StringField("liaisonman", requirement.getLiaisonman(), Field.Store.YES));
doc.add(new StringField("requirementtype", Integer.toString(requirement.getRequirementtype()), Field.Store.YES));
doc.add(new TextField("requirementbackground", requirement.getRequirementbackground(), Field.Store.YES));
doc.add(new TextField("requirementcontents", requirement.getRequirementcontents(), Field.Store.YES));
// writer.deleteDocuments(new Term("id",String.valueOf(teacher.getId())));
writer.updateDocument(new Term("id",String.valueOf(requirement.getId())), doc);
}
writer.close();
}
}
LuceneSearch.java代码为:
package com.boe.cim.teacher.luence;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.boe.cim.teacher.pojo.TeacherInfo;
public class LuceneSearch {
/**
* @param indexDir
* 索引文件路径path
* @param queryField
* 被索引字段 Field
* @param queryMsg
* 索引值
* @return
* @throws Exception
*/
public List<TeacherInfo> search(String indexDir, String queryField, String queryMsg) throws Exception {
// 得到读取索引文件的路径
Directory dir = FSDirectory.open(Paths.get(indexDir));
// 通过dir得到的路径下的所有的文件
// 建立索引查询器
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
// 中文分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
// 建立查询解析器
/**
* 第一个参数是要查询的字段; 第二个参数是分析器Analyzer
*/
// QueryParser parser = new QueryParser(queryField, analyzer);
// 根据传进来的par查找
// Query query = parser.parse(queryMsg);
// Query query = new TermQuery(new Term("teacher",queryMsg));
// Query query = new WildcardQuery(new Term(queryField,"*"+queryMsg+"*"));
Query query;
//这三种类型需要特别匹配,不需要分词器进行搜索
if(queryField.equals("teacher") || queryField.equals("school") || queryField.equals("department") ) {
query = new WildcardQuery(new Term(queryField,"*"+queryMsg+"*"));
}else {
QueryParser parser = new QueryParser(queryField, analyzer);
query = parser.parse(queryMsg);
}
// 计算索引开始时间
long start = System.currentTimeMillis();
// 开始查询
/**
* 第一个参数是通过传过来的参数来查找得到的query; 第二个参数是要出查询的行数
*/
TopDocs topDocs = searcher.search(query, 104);
// 索引结束时间
long end = System.currentTimeMillis();
System.out.println("匹配:["+queryField+"]," + queryMsg + ",总共花费了" + (end - start) + "毫秒,共查到" + topDocs.totalHits + "条记录。");
// 高亮显示start
// 算分
QueryScorer scorer = new QueryScorer(query);
// 显示得分高的片段
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
// 设置标签内部关键字的颜色
// 第一个参数:标签的前半部分;第二个参数:标签的后半部分。
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
// 第一个参数是对查到的结果进行实例化;第二个是片段得分(显示得分高的片段,即摘要)
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
// 设置片段
highlighter.setTextFragmenter(fragmenter);
// 高亮显示end
// 遍历topDocs
/**
* ScoreDoc:是代表一个结果的相关度得分与文档编号等信息的对象。 scoreDocs:代表文件的数组
*
* @throws Exception
*/
List<TeacherInfo> listinfo = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
TeacherInfo teacherinfo = new TeacherInfo();
// 获取文档
Document document = searcher.doc(scoreDoc.doc);
// 输出全路径
String id = document.get("id");
String queryType = document.get(queryField);
teacherinfo.setId(Integer.parseInt(id));
if (id != null) {
// 把全部得分高的摘要给显示出来
// 第一个参数是对哪个参数进行设置;第二个是以流的方式读入
TokenStream tokenStream = analyzer.tokenStream(queryField, new StringReader(queryType));
// 获取最高的片段
String highlighterString;
if(queryField.equals("teacher") || queryField.equals("school") || queryField.equals("department") ) {
highlighterString = queryType.replaceAll(queryMsg, "<b><font color='red'>"+queryMsg+"</font></b>");
}else {
highlighterString = highlighter.getBestFragment(tokenStream, queryType);
}
// String highlighterString = highlighter.getBestFragment(new SmartChineseAnalyzer(), queryField, queryType);
//设置高亮字段
switch (queryField) { // 根据搜索条件进行赋值
case "researchdirector": //研究方向
teacherinfo.setResearchdirector(highlighterString);
break;
case "instruments": //实验设备
teacherinfo.setInstruments(highlighterString);
break;
case "achievements": //科研成果
teacherinfo.setAchievements(highlighterString);
break;
case "teacher": //教师姓名
teacherinfo.setTeacher(highlighterString);
break;
case "school": //学校
teacherinfo.setSchool(highlighterString);
break;
case "department": //科研院所
teacherinfo.setDepartment(highlighterString);
break;
}
listinfo.add(teacherinfo);
}
}
reader.close();
return listinfo;
}
}
LuceneSearchRequirement.java代码为:
package com.boe.cim.teacher.luence;
import java.io.StringReader;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import com.boe.cim.teacher.pojo.RequirementInfo;
public class LuceneSearchRequirement {
/**
* @param indexreDir
* 索引文件路径path
* @param queryField
* 被索引字段 Field
* @param queryMsg
* 索引值
* @return
* @throws Exception
*/
public List<RequirementInfo> search(String indexreDir, String queryField, String queryMsg) throws Exception {
// 得到读取索引文件的路径
Directory dir = FSDirectory.open(Paths.get(indexreDir));
// 通过dir得到的路径下的所有的文件
// 建立索引查询器
IndexReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
// 中文分词器
SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
// 建立查询解析器
/**
* 第一个参数是要查询的字段; 第二个参数是分析器Analyzer
*/
// QueryParser parser = new QueryParser(queryField, analyzer);
// 根据传进来的par查找
// Query query = parser.parse(queryMsg);
// Query query = new TermQuery(new Term("teacher",queryMsg));
// Query query = new WildcardQuery(new Term(queryField,"*"+queryMsg+"*"));
Query query;
//这四种类型需要特别匹配,不需要分词器进行搜索
if(queryField.equals("requirement") || queryField.equals("department") || queryField.equals("liaisonman") || queryField.equals("requirementtype") ) {
query = new WildcardQuery(new Term(queryField,"*"+queryMsg+"*"));
}else {
QueryParser parser = new QueryParser(queryField, analyzer);
query = parser.parse(queryMsg);
}
// 计算索引开始时间
long start = System.currentTimeMillis();
// 开始查询
/**
* 第一个参数是通过传过来的参数来查找得到的query; 第二个参数是要出查询的行数
*/
TopDocs topDocs = searcher.search(query, 104);
// 索引结束时间
long end = System.currentTimeMillis();
System.out.println("匹配:["+queryField+"]," + queryMsg + ",总共花费了" + (end - start) + "毫秒,共查到" + topDocs.totalHits + "条记录。");
// 高亮显示start
// 算分
QueryScorer scorer = new QueryScorer(query);
// 显示得分高的片段
Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
// 设置标签内部关键字的颜色
// 第一个参数:标签的前半部分;第二个参数:标签的后半部分。
SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
// 第一个参数是对查到的结果进行实例化;第二个是片段得分(显示得分高的片段,即摘要)
Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
// 设置片段
highlighter.setTextFragmenter(fragmenter);
// 高亮显示end
// 遍历topDocs
/**
* ScoreDoc:是代表一个结果的相关度得分与文档编号等信息的对象。 scoreDocs:代表文件的数组
*
* @throws Exception
*/
List<RequirementInfo> listinfo = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
RequirementInfo requirementinfo = new RequirementInfo();
// 获取文档
Document document = searcher.doc(scoreDoc.doc);
// 输出全路径
String id = document.get("id");
String queryType = document.get(queryField);
requirementinfo.setId(Integer.parseInt(id));
if (id != null) {
// 把全部得分高的摘要给显示出来
// 第一个参数是对哪个参数进行设置;第二个是以流的方式读入
TokenStream tokenStream = analyzer.tokenStream(queryField, new StringReader(queryType));
// 获取最高的片段
String highlighterString;
if(queryField.equals("requirement") || queryField.equals("department") || queryField.equals("liaisonman") ) {
highlighterString = queryType.replaceAll(queryMsg, "<b><font color='red'>"+queryMsg+"</font></b>");
}else {
highlighterString = highlighter.getBestFragment(tokenStream, queryType);
}
// String highlighterString = highlighter.getBestFragment(new SmartChineseAnalyzer(), queryField, queryType);
//设置高亮字段
switch (queryField) { // 根据搜索条件进行赋值
case "requirement": //需求名称
requirementinfo.setRequirement(highlighterString);
break;
case "department": //需求组织
requirementinfo.setDepartment(highlighterString);
break;
case "liaisonman": //需求联系人
requirementinfo.setLiaisonman(highlighterString);
break;
case "requirementtype": //需求类型
requirementinfo.setRequirementtype(Integer.parseInt(highlighterString));
break;
case "requirementcontents": //需求内容
requirementinfo.setRequirementcontents(highlighterString);
break;
case "requirementbackground": //需求背景
requirementinfo.setRequirementbackground(highlighterString);
break;
}
listinfo.add(requirementinfo);
}
}
reader.close();
return listinfo;
}
}
StartApplicationWithLuceneIndex.java代码为
package com.boe.cim.teacher.luence;
import java.util.List;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.boot.ApplicationArguments;
import org.springframework.boot.ApplicationRunner;
import org.springframework.core.annotation.Order;
import org.springframework.retry.annotation.Backoff;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Component;
import com.boe.cim.teacher.dao.TeacherInfoMapper;
import com.boe.cim.teacher.pojo.TeacherInfo;
import com.boe.cim.teacher.dao.RequirementInfoMapper;
import com.boe.cim.teacher.pojo.RequirementInfo;
import com.boe.cim.teacher.utils.FileOperationFunction;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
@Order(value=5)
public class StartApplicationWithLuceneIndex implements ApplicationRunner{
@Value("${lucene.index.dir}")
private String IndexDir;
@Autowired
private TeacherInfoMapper teacherInfoMapper;
@Value("${lucenere.indexre.dir}")
private String IndexreDir;
@Autowired
private RequirementInfoMapper requirementInfoMapper;
@Override
@Retryable(value= Exception.class,backoff=@Backoff(delay=2000))
//重试机制,默认重试三次,失败程序启动失败,Main函数run,异常直接捕获不了,因为被aop捕获了
public void run(ApplicationArguments args) {
// TODO Auto-generated method stub
FileOperationFunction.deleteFileFromPath(IndexDir); //有个文件权限太高,删不掉
log.info("生成教师索引文件ing...");
List<TeacherInfo> listTeacher = teacherInfoMapper.userTeacherInfoCreateLuceneIndex();
try {
new LuceneIndex().index(IndexDir,listTeacher);
} catch (Exception e) {
// TODO Auto-generated catch block
}
log.info("生成教师索引文件成功,End");
// TODO Auto-generated method stub
FileOperationFunction.deleteFileFromPath(IndexreDir); //有个文件权限太高,删不掉
log.info("生成需求索引文件ing...");
List<RequirementInfo> listRequirement = requirementInfoMapper.RequirementInfoCreateLuceneIndex();
try {
new LuceneIndexRequirement().index(IndexreDir,listRequirement);
} catch (Exception e) {
// TODO Auto-generated catch block
}
log.info("生成教师索引文件成功,End");
}
@Recover//重试多次皆失败回调方法
public void retryRecover(Exception e) {
log.error("重试三次生成信息索引文件失败...");
log.error(e.getMessage());
}
}