全文检索保存在文件中的数据——简单实例

本文介绍了一个基于Lucene的索引管理系统,该系统能够为指定目录下的HTML文档创建索引,并提供搜索功能。通过使用StandardAnalyzer分析器,系统可以解析HTML文件内容并将其存储到索引中,方便后续进行全文检索。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

1、创建索引

package com.dream.index;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.html.HTMLParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.htmlparser.Parser;


/**
* 索引管理器
* 为资源建立索引
*/
public class IndexMgr {

private String dataDir = "";//资源所在目录
private String indexDir = "";//索引文件所在目录

public IndexMgr(){}

/**
*
* @param dataDir 要索引的资源所在目录
* @param indexDir 索引文件所在目录
*/
public IndexMgr(String dataDir,String indexDir){
this.dataDir = dataDir;
this.indexDir = indexDir;
}

/**
* 创建索引
* @throws IOException
* @throws InterruptedException
*/
public boolean createIndex() throws IOException, InterruptedException{

// if(isIndexExist()){//若索引已经存在
// return true;
// }
File dir = new File(dataDir);
if(!dir.exists()){
return false;
}

File[] htmls = dir.listFiles();
//索引文件存放的位置
//Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
//分析器
Analyzer analyzer = new StandardAnalyzer();
//负责将Document写入索引文件
IndexWriter indexWriter = new IndexWriter(this.indexDir,analyzer,true);

for (int i = 0; i < htmls.length; i++) {
String htmlPath = htmls[i].getAbsolutePath();

addDocument(htmlPath, indexWriter);

}
indexWriter.optimize();
indexWriter.close();
return true;
}

/**
* 将Document写入索引文件
* @throws InterruptedException
* @throws IOException
*/
public void addDocument(String filePath,IndexWriter indexWriter) throws IOException, InterruptedException{

String content = readFileByLines(filePath);
Document document = new Document();
document.add(new Field("title",content,Field.Store.YES,Field.Index.TOKENIZED));
indexWriter.addDocument(document);
}

public String readFileByLines(String fileName){
File file = new File(fileName);
BufferedReader reader = null;
StringBuffer retString = new StringBuffer();
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
//һ�ζ���һ�У�ֱ������nullΪ�ļ�����
while ((tempString = reader.readLine()) != null){
//��ʾ�к�
retString.append(tempString);
retString.append("\n");
line++;
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e1) {
}
}
}

return retString.toString();

}

/**
* 判断索引是否已存在
* @param indexDir 索引所在路径
*/
public boolean isIndexExist(){
File directory = new File(indexDir);
if(directory.listFiles().length > 0){
return true;
}else{
return false;
}
}

public String getDataDir() {
return dataDir;
}

public void setDataDir(String dataDir) {
this.dataDir = dataDir;
}

public String getIndexDir() {
return indexDir;
}

public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}


2、执行搜索

package com.dream.index;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.demo.html.HTMLParser;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.htmlparser.Parser;


/**
* 索引管理器
* 为资源建立索引
*/
public class IndexMgr {

private String dataDir = "";//资源所在目录
private String indexDir = "";//索引文件所在目录

public IndexMgr(){}

/**
*
* @param dataDir 要索引的资源所在目录
* @param indexDir 索引文件所在目录
*/
public IndexMgr(String dataDir,String indexDir){
this.dataDir = dataDir;
this.indexDir = indexDir;
}

/**
* 创建索引
* @throws IOException
* @throws InterruptedException
*/
public boolean createIndex() throws IOException, InterruptedException{

// if(isIndexExist()){//若索引已经存在
// return true;
// }
File dir = new File(dataDir);
if(!dir.exists()){
return false;
}

File[] htmls = dir.listFiles();
//索引文件存放的位置
//Directory fsDirectory = FSDirectory.getDirectory(indexDir, true);
//分析器
Analyzer analyzer = new StandardAnalyzer();
//负责将Document写入索引文件
IndexWriter indexWriter = new IndexWriter(this.indexDir,analyzer,true);

for (int i = 0; i < htmls.length; i++) {
String htmlPath = htmls[i].getAbsolutePath();

addDocument(htmlPath, indexWriter);

}
indexWriter.optimize();
indexWriter.close();
return true;
}

/**
* 将Document写入索引文件
* @throws InterruptedException
* @throws IOException
*/
public void addDocument(String filePath,IndexWriter indexWriter) throws IOException, InterruptedException{

String content = readFileByLines(filePath);
Document document = new Document();
document.add(new Field("title",content,Field.Store.YES,Field.Index.TOKENIZED));
indexWriter.addDocument(document);
}

public String readFileByLines(String fileName){
File file = new File(fileName);
BufferedReader reader = null;
StringBuffer retString = new StringBuffer();
try {
reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
//һ�ζ���һ�У�ֱ������nullΪ�ļ�����
while ((tempString = reader.readLine()) != null){
//��ʾ�к�
retString.append(tempString);
retString.append("\n");
line++;
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (reader != null){
try {
reader.close();
} catch (IOException e1) {
}
}
}

return retString.toString();

}

/**
* 判断索引是否已存在
* @param indexDir 索引所在路径
*/
public boolean isIndexExist(){
File directory = new File(indexDir);
if(directory.listFiles().length > 0){
return true;
}else{
return false;
}
}

public String getDataDir() {
return dataDir;
}

public void setDataDir(String dataDir) {
this.dataDir = dataDir;
}

public String getIndexDir() {
return indexDir;
}

public void setIndexDir(String indexDir) {
this.indexDir = indexDir;
}
}


3、控制器

package com.dream.servlet;

import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;

import javax.servlet.RequestDispatcher;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

import sun.rmi.server.Dispatcher;

import com.dream.index.IndexMgr;
import com.dream.search.SearchMgr;

public class ToSearch extends HttpServlet {

public ToSearch() {
super();
}

public void destroy() {
super.destroy();
}

public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
doPost(request, response);
}

public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String keyword = request.getParameter("keyword");
List searchResult = new ArrayList();
SearchMgr searchMgr = new SearchMgr(keyword);
IndexMgr indexMgr = new IndexMgr();
//设置要搜索的资源所在目录
indexMgr.setDataDir("D:\\dataDir");
//设置索引存放的目录
indexMgr.setIndexDir("D:\\indexDir");
searchMgr.setIndexMgr(indexMgr);
searchResult = searchMgr.search();
request.setAttribute("searchResult", searchResult);
RequestDispatcher rdp = request.getRequestDispatcher("index.jsp");
rdp.forward(request, response);

}


public void init() throws ServletException {
}

}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值