#region 版权信息 //====================================================================== // // Copyright (C) 2001-2008 高考资源网 // All rights reserved // // filename :Default3 // description : // created by webabc at 2008-5-22 2:58:47 // http://www.ks5u.com //====================================================================== #endregion using System; using System.Collections; using System.Configuration; using System.Data; using System.Web; using System.Web.Security; using System.Web.UI; using System.Web.UI.HtmlControls; using System.Web.UI.WebControls; using System.Web.UI.WebControls.WebParts; using System.Collections.Generic; using System.IO; using System.Text; using Lucene.Net.Analysis; using Lucene.Net.Search; using Lucene.Net.QueryParsers; using WawaSoft.Search.Common.Analyzer; using Lucene.Net.Documents; using System.Text.RegularExpressions; using Lucene.Net.Highlight; using KTDictSeg; using System.Diagnostics; using Lucene.Net.Analysis.KTDictSeg; namespace Jebsearch { public partial class search : System.Web.UI.Page { public static CSimpleDictSeg m_SimpleDictSeg; protected string _indexDirectory; protected IndexSearcher _searcher = null; private MultiFieldQueryParser _mfqp; /// <summary> /// Search results. /// </summary> protected DataTable Results = new DataTable(); protected DataTable History = new DataTable(); /// <summary> /// First item on page (index format). /// </summary> private int startAt; /// <summary> /// First item on page (user format). /// </summary> private int fromItem; /// <summary> /// Last item on page (user format). /// </summary> private int toItem; /// <summary> /// Total items returned by search. /// </summary> public int total; public TimeSpan duration; /// <summary> /// How many items can be showed on one page. /// </summary> private int maxResults = 10; public int resultsCount; string dictPath; Highlighter highlighter; Lucene.Net.Analysis.Standard.StandardAnalyzer highanalyzer; Query q; WawaSimpleAnalyzer simpleAnalyzer; Query highquery; protected void Page_Load(object sender, EventArgs e) { liter_msg.Text = "<div style="padding:5px;line-height:25px;" mce_style="padding:5px;line-height:25px;"><b>非常抱歉,没有符合条件的结果!<br>建议您重新设置搜索条件再搜索一次!如果问题不能解决,请您联系我们。<br>客服部 电话:010-51438333 / 010-52219241 <br>技术部 电话:010-58425259 邮箱:ks5u@163.com</b><br>您可以写上意见或建议给我们以帮助我们改进。(如果需要我们与您联系,请写上您的电子邮件,谢谢!)</div>"; if (!IsPostBack) { if (this.WQ == null) { return; } History = initdb_soso(); //设置每页显示数据 maxResults = this.RN; do_search(); if (this.Query != null) { //收录关键词 key_history(this.Query.Trim(), total); } //没有搜索到记录 if (Results.Rows.Count == 0) { liter_msg.Text = "<div style="padding:5px;line-height:25px;" mce_style="padding:5px;line-height:25px;"><b>非常抱歉,没有符合条件的结果!<br>建议您重新设置搜索条件再搜索一次!如果问题不能解决,请您联系我们。<br>客服部 电话:010-51438333 / 010-52219241 <br>技术部 电话:010-58425259 邮箱::ks5u@163.com</b><br>您可以写上意见或建议给我们以帮助我们改进。(如果需要我们与您联系,请写上您的电子邮件,谢谢!)</div>"; } else { liter_msg.Text = ""; DataBind(); } } } #region Web Form Designer generated code override protected void OnInit(EventArgs e) { // // CODEGEN: This call is required by the ASP.NET Web Form Designer. // InitializeComponent(); base.OnInit(e); } /// <summary> /// Required method for Designer support - do not modify /// the contents of this method with the code editor. /// </summary> private void InitializeComponent() { } #endregion /// <summary> /// 执行搜索 /// </summary> #region 执行 protected void do_search() { DateTime start = DateTime.Now; _indexDirectory = Server.MapPath("_index"); //索引路径; //得到文件名,目录 // Directory fsDir = FSDirectory.GetDirectory(_indexDirectory, false); // Directory ramDir = new RAMDirectory(fsDir); //string[] indexDir = Directory.GetDirectories(_indexDirectory); //Searchable[] subSearchable = new Searchable[indexDir.Length]; //for (int i = 0; i < indexDir.Length; i++) //{ // Lucene.Net.Store.Directory directory = Lucene.Net.Store.FSDirectory.GetDirectory(indexDir[i], false); // //Lucene.Net.Store.Directory ramDir = new Lucene.Net.Store.RAMDirectory(directory); // subSearchable[i] = new IndexSearcher(Lucene.Net.Index.IndexReader.Open(directory)); //} dictPath = Server.MapPath("_Data"); //词库路径 // Lucene.Net.Index.MultiReader reader = new Lucene.Net.Index.MultiReader(subReaders); // MultiSearcher multisearcher = new MultiSearcher(subSearchable); _searcher = new IndexSearcher(_indexDirectory); //sharpICTCLASAnalyzer = new SharpICTCLASAnalyzer(dictPath); //中文分析器 Analyzer KTDanalyzer = new KTDictSegAnalyzer(dictPath); //PerFieldAnalyzerWrapper wrapperKTD = new PerFieldAnalyzerWrapper(KTDanalyzer); PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(highanalyzer); simpleAnalyzer = new WawaSimpleAnalyzer(); //按分隔符语汇单元化的分析器 //收件人,抄送和密送地址用简单解析器 wrapper.AddAnalyzer("title", simpleAnalyzer); wrapper.AddAnalyzer("content", simpleAnalyzer); string[] fields = new string[] { "title", "content" }; if (this.Mst == 1) { fields = new string[] { "title" }; } _mfqp = new MultiFieldQueryParser(fields, wrapper); BooleanQuery m_BooleanQuery = new BooleanQuery(); if (this.Query != null) { q = _mfqp.Parse(reqs(this.Query)); m_BooleanQuery.Add(q, BooleanClause.Occur.MUST); } //如果选择了学科则加学科进行条件搜索 if (this.WQ != "0") { Query query1 = new TermQuery(new Lucene.Net.Index.Term("subject", this.WQ)); // 词语搜索 m_BooleanQuery.Add(query1, BooleanClause.Occur.MUST); } //添加版本如果选择了版本则加版本进行条件搜索 if (this.ED != "1") { Query query2 = new TermQuery(new Lucene.Net.Index.Term("edition", this.ED)); // 词语搜索 m_BooleanQuery.Add(query2, BooleanClause.Occur.MUST); } //添加类别如果选择了类别则加类别进行条件搜索 if (this.CL != "0") { Query query3 = new TermQuery(new Lucene.Net.Index.Term("class", this.CL)); // 词语搜索 m_BooleanQuery.Add(query3, BooleanClause.Occur.MUST); } //添加类型如果选择了分类则加类分类行条件搜索 if (this.RSP != "0") { Query query4 = new TermQuery(new Lucene.Net.Index.Term("type", this.RSP)); // 词语搜索 m_BooleanQuery.Add(query4, BooleanClause.Occur.MUST); } //是否是免点资源 if (this.Free0) { Query query5 = new TermQuery(new Lucene.Net.Index.Term("point", "0")); // 词语搜索 m_BooleanQuery.Add(query5, BooleanClause.Occur.MUST); } //是否是推荐资源 if (this.Free1) { Query query6 = new TermQuery(new Lucene.Net.Index.Term("host", "true")); // 词语搜索 m_BooleanQuery.Add(query6, BooleanClause.Occur.MUST); } //是否是高级资源 if (this.Free2) { Query query7 = new TermQuery(new Lucene.Net.Index.Term("best", "true")); // 词语搜索 m_BooleanQuery.Add(query7, BooleanClause.Occur.MUST); } //是否是精品 if (this.Free3) { Query query8 = new TermQuery(new Lucene.Net.Index.Term("top", "0")); // 词语搜索 m_BooleanQuery.Add(query8, BooleanClause.Occur.MUST); } //Sort sort = new Sort(new SortField("id",true)); // ParallelMultiSearcher parallelmultisearcher = new ParallelMultiSearcher(subSearchable); //TopDocs paralleltopdocs = parallelmultisearcher.Search(m_BooleanQuery, null, 1000); // MultiSearcher parallelmultisearcher = new MultiSearcher(subSearchable); // TopDocs multitopdocs = parallelmultisearcher.Search(m_BooleanQuery, null, 1000); //开始时间 //DateTime start = DateTime.Now; Hits reshits = null; if (this.Date == "all") { // reshits = parallelmultisearcher.Search(m_BooleanQuery); reshits = _searcher.Search(m_BooleanQuery); } else { //运用RangeFilter类 string lower = DateTime.Now.AddDays(-1).ToString("yyyyMMdd"); string upper = DateTime.Now.ToString("yyyyMMdd"); if (this.Date == "week") { lower = DateTime.Now.AddDays(-7).ToString("yyyyMMdd"); } if (this.Date == "month") { lower = DateTime.Now.AddDays(-30).ToString("yyyyMMdd"); } if (this.Date == "year") { lower = DateTime.Now.AddDays(-365).ToString("yyyyMMdd"); } RangeFilter filter = new RangeFilter("time", lower, upper, true, true); //reshits = parallelmultisearcher.Search(m_BooleanQuery, filter); reshits = _searcher.Search(m_BooleanQuery, filter); } total = reshits.Length(); // create the result DataTable this.Results.Columns.Add("title", typeof(string)); this.Results.Columns.Add("sample", typeof(string)); this.Results.Columns.Add("path", typeof(string)); this.Results.Columns.Add("time", typeof(string)); this.Results.Columns.Add("filetype", typeof(string)); this.Results.Columns.Add("size", typeof(string)); this.Results.Columns.Add("author", typeof(string)); this.Results.Columns.Add("subjectname", typeof(string)); this.Results.Columns.Add("class", typeof(string)); startAt = initStartAt(); int resultsCount = smallerOf(total, this.maxResults + this.startAt); // string qsid = ""; highanalyzer = new Lucene.Net.Analysis.Standard.StandardAnalyzer(); MultiFieldQueryParser parser = new MultiFieldQueryParser(new string[] { "title", "content" }, highanalyzer); if (this.Query != null) { highquery = parser.Parse(reqsn(this.Query)); } for (int i = startAt; i < resultsCount; i++) { Document doc = reshits.Doc(i); // qsid += doc.Get("id")+","; //} //qsid = qsid.Remove(qsid.Length-1); // DataTable dtdb = db_helper.Query("select so_id,so_title,so_content,so_format,so_time,so_size,so_author,so_subject,so_class,so_sourecurl from so_engine_main where so_id in(" + qsid + ")").Tables[0]; AspNetPager1.RecordCount = total; AspNetPager1.PageSize = maxResults; // foreach ( DataRow drs in dtdb.Rows) //{ string plainText, title; plainText = parseHtml(doc.Get("content")); title = parseHtml(doc.Get("title")); string filetype = doc.Get("formt"); string result, titleresult; //string plainText, title; //plainText = parseHtml(drs["so_content"].ToString()); //title = parseHtml(drs["so_title"].ToString()); //string filetype = drs["so_format"].ToString(); //string result, titleresult; //Lucene.Net.Analysis.TokenStream tokenStream = sharpICTCLASAnalyzer.TokenStream("sample", new System.IO.StringReader(plainText)); //hl = new Highlighter(new SimpleHTMLFormatter("<font color=/"red/" font-size=/"14px/">", "</font>"), new QueryScorer(q)); DataRow row = this.Results.NewRow(); //highlighter.GetBestFragments(tokenStream, text, 2, "..."); //未标注内容读取highlighter.GetBestFragments(tokenStream, text, 2, "..."); //Response.Write(plainText); //hl.SetTextFragmenter(new SimpleFragmenter(plainText.Length)); if (this.Query != null) { highlighter = new Highlighter(new SimpleHTMLFormatter("<font color=/"red/">", "</font>"), new QueryScorer(highquery)); //highlighter.SetTextFragmenter(new SimpleFragmenter(plainText.Length)); Lucene.Net.Analysis.TokenStream tokenStream = highanalyzer.TokenStream("content", new System.IO.StringReader(plainText)); Lucene.Net.Analysis.TokenStream titletokenStream = highanalyzer.TokenStream("title", new System.IO.StringReader(title)); result = highlighter.GetBestFragments(tokenStream, plainText, 0, "..."); titleresult = highlighter.GetBestFragments(titletokenStream, title, 0, "..."); //hl.GetBestFragment(tokenStream, plainText); // highlighter.GetBestFragments(tokenStream, plainText, 2, "..."); } else { titleresult = ""; result = plainText; } if (string.IsNullOrEmpty(titleresult)) { titleresult = title; } if (string.IsNullOrEmpty(result)) { if (plainText.Length > 100) { result = plainText.Substring(0, 100); } else result = plainText; } if (result.Length < plainText.Length) result = result + "..."; if (filetype.Length > 0) { row["filetype"] = filetype; } else { row["filetype"] = "unknown"; } row["title"] = titleresult; row["path"] = doc.Get("sourceurl"); row["sample"] = result; row["time"] = doc.Get("time"); row["size"] = doc.Get("size"); row["author"] = doc.Get("author"); row["subjectname"] = getSubjectName(int.Parse(doc.Get("subject"))); row["class"] = getClass(int.Parse(doc.Get("class"))); this.Results.Rows.Add(row); } this.duration = DateTime.Now - start; this.fromItem = startAt + 1; this.toItem = smallerOf(startAt + maxResults, total); } #endregion #region 关键词收录 private void key_history(string keyword, int recototle) { } #endregion #region 获取搜索频率最高的数据 private DataTable initdb_soso() { return new DataTable(); } /// <summary> /// Very simple, inefficient, and memory consuming HTML parser. Take a look at Demo/HtmlParser in DotLucene package for a better HTML parser. /// </summary> /// <param name="html"></param> /// <returns></returns> /// #region HTML字符替换 private string parseHtml(string html) { html = Regex.Replace(html, @"^<img/s+[^>]*>", "略", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<mce:style[^><!-- ]*?>.*? --></mce:style><style[^ mce_bogus="1">]*?>.*?</style>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<p.*?>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<span.*?>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<b.*?>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<strong.*?>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<font.*?>", "", RegexOptions.IgnoreCase); html = Regex.Replace(html, @"<img.*?>", "", RegexOptions.IgnoreCase); return html.Replace(" ", " "); } public String ConvertRelativePathsToAbsolute(String text, String absoluteUrl) { String value = Regex.Replace(text, "<(.*?)(src)=/"(?!http)(.*?)/"(.*?)>", "<$1$2=/"" + absoluteUrl + "$3/"$4 style='width:500px;height:300px;'>", RegexOptions.IgnoreCase | RegexOptions.Multiline); // Now just make sure that there isn't a // because if // the original relative path started with a / then the // replacement above would create a //. return value.Replace(absoluteUrl + "/", absoluteUrl); } #endregion /// <summary> /// Returns the smaller value of parameters. /// </summary> /// <param name="first"></param> /// <param name="second"></param> /// <returns></returns> private int smallerOf(int first, int second) { return first < second ? first : second; } /// <summary> /// 页码 /// </summary> /// <param name="start"></param> /// <param name="number"></param> /// <param name="active"></param> /// <returns></returns> /// #region 页码 protected string pagingItemHtml() { //<a href="/" mce_href="/""Search.aspx?dw=" + this.Query + "&start=" + start + "&oq=" + subjectID + "&ed=" + edtion + "&f=" + cl + "&rsp=" + cltype + "/">[" + number + "]</a> string result = ""; if (this.pageCount <= 1) { result += "<option value=1'>第1页</option>"; } else { for (int i = 1; i <= this.pageCount; i++) { if (this.pageIndex == i) { result += "<option value='" + i + "' selected=true>第" + i + "页</option>"; } else { result += "<option value='" + i + "'>第" + i + "页</option>"; } } } return result; } #endregion /// <summary> /// Prepares the string with seach summary information. /// </summary> #region 方法 /// <summary> /// Initializes startAt value. Checks for bad values. /// </summary> /// <returns></returns> private int initStartAt() { try { // too small starting item, return first page if (this.pageIndex <= 1) return 0; // too big starting item, return last page if (this.pageIndex * maxResults >= total - 1) { return lastPageStartsAt; } return (this.pageIndex - 1) * maxResults; } catch { return 0; } } public string reqsn(string svs) { svs = Regex.Replace(svs, @"/W", ""); string s = ""; foreach (char s1 in svs) { if (s.IndexOf(s1) == -1) s += s1; } return s; } #endregion /// <summary> /// 分词算法 /// </summary> /// <param name="svs"></param> /// <returns></returns> #region 分词 //分词 private ArrayList reqs_arr(string svs) { svs = Regex.Replace(svs, @"/W", ""); //wordsString.AppendFormat("{0}/n", svs); ArrayList list = new ArrayList(); list.Add(svs + "^10"); if (svs.Length > 2) { if (m_SimpleDictSeg == null) { try { m_SimpleDictSeg = new CSimpleDictSeg(); m_SimpleDictSeg.DictPath = System.Web.HttpContext.Current.Server.MapPath("_Data") + Path.DirectorySeparatorChar; ; m_SimpleDictSeg.LoadDict(); } catch (Exception e1) { m_SimpleDictSeg = null; throw e1; } } m_SimpleDictSeg.FilterStopWords = true; m_SimpleDictSeg.MatchName = true; Stopwatch watch = new Stopwatch(); watch.Start(); ArrayList words = m_SimpleDictSeg.Segment(svs.Replace("/n", "-")); watch.Stop(); for (int i = 0; i < words.Count; i++) { if (!list.Contains(words[i])) { list.Add(words[i]); } } return list; } else { return list; } } //分词组合 private string reqs(string svs) { ArrayList list = reqs_arr(svs); StringBuilder wordsString = new StringBuilder(); foreach (String str in list) { wordsString.AppendFormat("{0}/n", str); } return wordsString.ToString(); } #endregion } } 完整代码下载地址:下载