lucene.net配合盘古分词实现中文站内搜索,首先配置好盘古分词的Dict目录,将其放在项目下,文件夹下的所有文件输出到bin文件。
lucene.net下使用的代码:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Threading;
using Lucene.Net.Store;
using Lucene.Net.Index;
using System.IO;
using log4net;
using Lucene.Net.Analysis.PanGu;
using RPSite.BLL;
using Lucene.Net.Documents;
namespace RPSite.Search
{
public class IndexManager
{
private static ILog logger = LogManager.GetLogger(typeof(IndexManager));
private static IndexManager instance = new IndexManager();
//所有的地方要对索引库进行修改都通过IndexManager,所以要单例
//因为同时只能有一个在写索引库,所以由“消费者”来进行写
//别的地方想写索引库要请求“消费者”来进行写AddArticle
private IndexManager()
{
}
/// <summary>
/// 启动消费者线程
/// </summary>
public void Start()
{
Thread threadIndex = new Thread(Index);
threadIndex.IsBackground = true;
threadIndex.Start();
}
private void Index()
{
while (true)
{
//防止空转造成cpu占用率过高
if (jobs.Count <= 0)
{
logger.Debug("没有任务,再睡会!");
Thread.Sleep(5 * 1000);
continue;
}
//为什么每次循环都要打开、关闭索引库。因为关闭索引库以后才会把写入的数据提交到索引库中。也可以每次操作都“提交”(参考Lucene.net文档)
string indexPath = "c:/cmsindex";
FSDirectory directory = FSDirectory.Open(new DirectoryInfo(indexPath), new NativeFSLockFactory());
bool isUpdate = IndexReader.IndexExists(directory);
logger.Debug("索引库存在状态" + isUpdate);
if (isUpdate)
{
//如果索引目录被锁定(比如索引过程中程序异常退出),则首先解锁
if (IndexWriter.IsLocked(directory))
{
logger.Debug("开始解锁索引库");
IndexWriter.Unlock(directory);
logger.Debug("解锁索引库完成");
}
}
IndexWriter writer = new IndexWriter(directory, new PanGuAnalyzer(), !isUpdate, Lucene.Net.Index.IndexWriter.MaxFieldLength.UNLIMITED);
ProcessJobs(writer);
writer.Close();
directory.Close();//不要忘了Close,否则索引结果搜不到
logger.Debug("全部索引完毕");
}
}
private void ProcessJobs(IndexWriter writer)
{
foreach (var job in jobs.ToArray())
{
//todo:异常处理
jobs.Remove(job);// 消费掉
//因为是自己的网站,所以直接读取数据库,不用webclient了
//为避免重复索引,所以先删除number=i的记录,再重新添加
writer.DeleteDocuments(new Term("number", job.Id.ToString()));
//如果“添加文章”任务再添加,
if (job.JobType == JobType.Add)
{
RP_ArticleBLL artBll = new RP_ArticleBLL();
if (artBll == null)//有可能刚添加就被删除了
{
continue;
}
var art = artBll.GetById(job.Id);
string title = art.Title;
string body = art.Msg;//去掉标签
Document document = new Document();
//只有对需要全文检索的字段才ANALYZED
document.Add(new Field("number", job.Id.ToString(), Field.Store.YES, Field.Index.NOT_ANALYZED));
document.Add(new Field("title", title, Field.Store.YES, Field.Index.NOT_ANALYZED));
document.Add(new Field("body", body, Field.Store.YES, Field.Index.ANALYZED, Lucene.Net.Documents.Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.AddDocument(document);
logger.Debug("索引" + job.Id + "完毕");
}
}
}
public static IndexManager GetInstance()
{
//Queue<string> q;
//q.ad
return instance;
}
//private Queue<
private List<IndexJob> jobs = new List<IndexJob>();
public void AddArticle(int artId)
{
IndexJob job = new IndexJob();
job.Id = artId;
job.JobType = JobType.Add;
logger.Debug(artId+"加入任务列表");
jobs.Add(job);//把任务加入商品库
}
public void RemoveArticle(int artId)
{
IndexJob job = new IndexJob();
job.JobType = JobType.Remove;
job.Id = artId;
logger.Debug(artId + "加入删除任务列表");
jobs.Add(job);//把任务加入商品库
}
}
class IndexJob
{
public int Id { get; set; }
public JobType JobType { get; set; }
}
enum JobType { Add,Remove}
}