pom.xml
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.et</groupId>
<artifactId>LuceneScoreSearch</artifactId>
<version>0.0.1-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.9.RELEASE</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.7.2</version>
</dependency>
<!-- JUnit Java语言的单元测试框架 -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
</project>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>cn.et</groupId>
<artifactId>LuceneScoreSearch</artifactId>
<version>0.0.1-SNAPSHOT</version>
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.9.RELEASE</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
<dependency>
<groupId> org.apache.lucene</groupId>
<artifactId>lucene-highlighter</artifactId>
<version>4.7.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.10</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
<encoding>UTF-8</encoding>
</configuration>
</plugin>
</plugins>
</build>
</project>
package cn.et;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.highlight.TokenSources;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.wltea.analyzer.lucene.IKAnalyzer;
@RestController
public class LueneTesting {
//创建IKAnalyzer分词器
static Analyzer analyzer = new IKAnalyzer();
//创建索引,写入文件
public static void write() throws Exception {
//索引存放目录
Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
//Lucene分词器配置
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
IndexWriter iwriter = new IndexWriter(directory, config);
//创建文档对象,相当于数据库中的每条记录(MongoDB、Oracle、MySQL...),注意:对象简述纯属虚构,不带任何攻击恶意
Document doc0 = new Document();
Field doc0field1 = new Field("AGE","20",TextField.TYPE_STORED);
Field doc0field2 = new Field("NAME","路橙",TextField.TYPE_STORED);
Field doc0field3 = new Field("BRIEF","来自中国湖南永州,是一名初级Java开发工程师,中国互联网技术博客:http://blog.youkuaiyun.com/phone13144830339",TextField.TYPE_STORED);
doc0.add(doc0field1);
doc0.add(doc0field2);
doc0.add(doc0field3);
Document doc1 = new Document();
Field doc1field1 = new Field("AGE","21",TextField.TYPE_STORED);
Field doc1field2 = new Field("NAME","谢飞",TextField.TYPE_STORED);
Field doc1field3 = new Field("BRIEF","来自中国湖北武汉,是一名语文老师,中国教育网成员,2010年评选为中国10大优秀教师",TextField.TYPE_STORED);
doc1.add(doc1field1);
doc1.add(doc1field2);
doc1.add(doc1field3);
Document doc2 = new Document();
Field doc2field1 = new Field("AGE","22",TextField.TYPE_STORED);
Field doc2field2 = new Field("NAME","邓娟",TextField.TYPE_STORED);
Field doc2field3 = new Field("BRIEF","来自中国四川绵阳,是一名幼儿园老师",TextField.TYPE_STORED);
doc2.add(doc2field1);
doc2.add(doc2field2);
doc2.add(doc2field3);
Document doc3 = new Document();
Field doc3field1 = new Field("AGE","23",TextField.TYPE_STORED);
Field doc3field2 = new Field("NAME","曹焰斌",TextField.TYPE_STORED);
Field doc3field3 = new Field("BRIEF","来自中国广东广州,是一名建筑工人",TextField.TYPE_STORED);
doc3.add(doc3field1);
doc3.add(doc3field2);
doc3.add(doc3field3);
Document doc4 = new Document();
Field doc4field1 = new Field("AGE","24",TextField.TYPE_STORED);
Field doc4field2 = new Field("NAME","SMISI",TextField.TYPE_STORED);
Field doc4field3 = new Field("BRIEF","来自美国底特律,是一名外资企业经理",TextField.TYPE_STORED);
doc4.add(doc4field1);
doc4.add(doc4field2);
doc4.add(doc4field3);
iwriter.addDocument(doc0);
iwriter.addDocument(doc1);
iwriter.addDocument(doc2);
iwriter.addDocument(doc3);
iwriter.addDocument(doc4);
iwriter.commit();
iwriter.close();
}
//查找索引,查看得分情况
@RequestMapping("/simpleSearchScore")
public static String simpleSearch() throws Exception {
String content = "中国";
Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
//指定索引查找目录
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
//指定查询的field名和使用的分词解析器
QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer);
Query query = parser.parse(content);
//搜索得分排序的数组,文字中包含收搜内容的数量
String resultStr = "";
TopDocs docs = isearcher.search(query, 10);
for (ScoreDoc doc : docs.scoreDocs) {
String str =
"文档ID: " + doc.doc
+ "<br/>BRIEF:"+isearcher.doc(doc.doc).get("BRIEF")
+ "<br/>NAME:"+isearcher.doc(doc.doc).get("NAME")
+"<br/>AGE:"+isearcher.doc(doc.doc).get("AGE")
+ "<br/>得分情况: " + doc.score
+ "<hr border='5px' color='red'/>";
resultStr += str;
}
return resultStr;
}
//查找索引,收搜文档对象,页面高亮显示
@RequestMapping("/highlighterSearch")
public List<Map<String,String>> highlighterTesting() throws Exception{
String content = "中国";
Directory directory = FSDirectory.open(new File("H:/Lucene/index"));
//指定索引查找目录
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(ireader);
QueryParser parser = new QueryParser(Version.LUCENE_47,"BRIEF",analyzer);
Query query = parser.parse(content);
TopDocs hits = searcher.search(query, 10);
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
//设置高亮处理的字符个数
highlighter.setMaxDocCharsToAnalyze(20);
List<Map<String,String>> list = new ArrayList<Map<String,String>>();
int item = hits.scoreDocs.length;
System.out.println(item);
for (int i = 0; i < item; i++) {
System.out.println(i);
int id = hits.scoreDocs[i].doc;
Document doc = searcher.doc(id);
Map<String,String> map = new HashMap<String,String>();
map.put("name",doc.get("NAME"));
String text = doc.get("BRIEF");
TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "BRIEF", analyzer);
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
System.out.println(frag.length);
for (int j = 0; j < frag.length; j++) {
System.out.println(frag[j]);
if((frag[j] != null) && (frag[j].getScore() > 0)) {
String str = frag[j].toString();
System.out.println(str);
map.put("brief",str);
}
}
map.put("age",doc.get("AGE"));
list.add(map);
}
return list;
}
}
package cn.et;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
@SpringBootApplication
public class SpringBootMain {
public static void main(String[] args) {
SpringApplication.run(SpringBootMain.class, args);
}
}