lucene 全文检索数据库

lucene 全文检索数据库
我们以前经常碰到搜索数据库的内容;用like %的sql语句;
如果数据量大而且多表查询时;
用lucene2那就可以解决速度问题。
lucene2搜索photo表的title,username,tagname,desr内容;
用一个例题来说明更直观;
此例题能搜索中文分词;
(需要mysql5的jdbc包和lucene2的包):
1、数据库我用mysql5;建一个photo表;数据库名是test。

photo表有一下几个字段:
CREATE TABLE `photo` (
`photo_id` int(11) NOT NULL auto_increment,
`title` varchar(11) default NULL,
`address` varchar(50) default NULL,
`descr` text,
`user_id` int(11) default NULL,
`user_name` varchar(11) default NULL,
`upload_time` date default NULL,
`tag_name` varchar(11) default NULL,
PRIMARY KEY (`photo_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=REDUNDANT;
2、java文件有4个:
文件Photo.java是数据库的photo表的操作文件;
内容如下:

import java.sql.Connection;
import java.util.ArrayList;
import java.util.Date;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;

public class Photo {
 private long photoId;
 private String title;
 private String description;
 private String address;
 private String userName;
 private long userId;
 private String tag;
 private Date date;

 public String getAddress() {
  return address;
 }

 public void setAddress(String address) {
  this.address = address;
 }

 public String getDescription() {
  return description;
 }

 public void setDescription(String description) {
  this.description = description;
 }

 public long getPhotoId() {
  return photoId;
 }

 public void setPhotoId(long photoId) {
  this.photoId = photoId;
 }

 public String getTag() {
  return tag;
 }

 public void setTag(String tag) {
  this.tag = tag;
 }

 public String getTitle() {
  return title;
 }

 public void setTitle(String title) {
  this.title = title;
 }

 public long getUserId() {
  return userId;
 }

 public void setUserId(long userId) {
  this.userId = userId;
 }

 public String getUserName() {
  return userName;
 }

 public void setUserName(String userName) {
  this.userName = userName;
 }

 public static Photo[] loadPhotos(Connection con) throws Exception {
  ArrayList<Photo> list = new ArrayList<Photo>();
  PreparedStatement pstm = null;
  ResultSet rs = null;
  String sql = "select photo_id,title,address,descr,user_id,user_name,upload_time,tag_name from photo";
  try {
   pstm = con.prepareStatement(sql);
   rs = pstm.executeQuery();
   while (rs.next()) {
    Photo photo = new Photo();
    photo.setPhotoId(rs.getLong(1));
    photo.setTitle(rs.getString(2));
    photo.setAddress(rs.getString(3));
    photo.setDescription(rs.getString(4));
    photo.setUserId(rs.getLong(5));
    photo.setUserName(rs.getString(6));
    photo.setDate(rs.getTimestamp(7));
    photo.setTag(rs.getString(8));
    
    list.add(photo);
   }
   System.out.println("com.upolestar.kmpm.po.Photo.java  ========"+list.size());
  } catch (SQLException e) {
   e.printStackTrace();
  } finally {
   if (rs != null) {
    rs.close();
   }
   if (pstm != null) {
    pstm.close();
   }
  }
  return (Photo[]) list.toArray(new Photo[list.size()]);
 }

 public Date getDate() {
  return date;
 }

 public void setDate(Date date) {
  this.date = date;
 }
}

文件IndexerFile.java是把数据库的内容备份成索引文件到磁盘中去;
内容如下:
package com.upolestar.kmpm.service;

import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

import com.upolestar.kmpm.po.Photo;


public class IndexerFile {
 public static int indexFile(String indexDir, Photo[] list)
   throws IOException {
  IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(),
    true);
  writer.setUseCompoundFile(false);
  for (int i = 0; i < list.length; i++) {
   Document doc = new Document();
   doc.add(new Field("photoId", String.valueOf(list[i].getPhotoId()),
     Field.Store.YES, Field.Index.NO));
   if (list[i].getTitle() != null)
    doc.add(new Field("title", list[i].getTitle(), Field.Store.YES,
      Field.Index.TOKENIZED));
   if (list[i].getDescription() != null)
    doc.add(new Field("description", list[i].getDescription(),
      Field.Store.YES, Field.Index.TOKENIZED));
   doc.add(new Field("address", list[i].getAddress(), Field.Store.YES,
     Field.Index.NO));
   doc.add(new Field("userName", list[i].getUserName(),
     Field.Store.YES, Field.Index.TOKENIZED));
   doc.add(new Field("userId", String.valueOf(list[i].getUserId()),
     Field.Store.YES, Field.Index.NO));
   if (list[i].getTag().length() > 0)
    doc.add(new Field("tag", list[i].getTag(), Field.Store.YES,
      Field.Index.TOKENIZED));
   doc.add(new Field("uploadTime", list[i].getDate().toLocaleString(), Field.Store.YES,
     Field.Index.TOKENIZED));
   writer.addDocument(doc);
  }

  int numIndexed = writer.docCount();
  writer.optimize();
  writer.close();
  return numIndexed;
 }
}

文件SearcherFile.java是搜索磁盘索引文件内容的;
内容如下:
package com.upolestar.kmpm.service;

import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

public class SearcherFile {
 public static void search(Searcher searcher, String[] q)
   throws IOException, ParseException {
  Analyzer analyzer = new StandardAnalyzer();
  String[] fields = { "title", "description", "tag", "userName" };
  Query query = MultiFieldQueryParser.parse(q, fields, analyzer);
  Hits hits = searcher.search(query);
  System.out.println("SearcherFile======"+hits.length());
  for (int i = 0; i < hits.length(); i++) {
   Document doc = hits.doc(i);
   System.out.println(doc.get("photoId") + "==="
     + doc.get("uploadTime")+ "==="
     + doc.get("title")+ "==="
     + doc.get("description")+ "==="
     + doc.get("tag")+ "==="
     + doc.get("userName"));
  }
 }
}

文件test.java是操作的主文件;
内容如下:
package com.upolestar.kmpm.test;

import java.io.IOException;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Date;

import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Searcher;

import com.upolestar.kmpm.po.Photo;
import com.upolestar.kmpm.service.IndexerFile;
import com.upolestar.kmpm.service.SearcherFile;

public class Test {
 public final static String indexDir = "D:\\TestLucene";

 private static Connection getConnection() {
  Connection conn = null;
  String url = "jdbc:mysql://localhost:3306/opencms";
  String userName = "root";
  String password = "1111";
  try {
   Class.forName("com.mysql.jdbc.Driver");
   conn = java.sql.DriverManager
     .getConnection(url, userName, password);
  } catch (Exception e) {
   e.printStackTrace();
   System.out.println("Error Trace in getConnection() : "
     + e.getMessage());
  }
  return conn;
 }

 public static void main(String[] args) throws IOException, ParseException,
   SQLException {
  index();// 做索引
  Searcher searcher = null;
  try {
   searcher = new IndexSearcher(indexDir);
   search(searcher);// 搜索
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   if (searcher != null)
    searcher.close();
  }
 }

 public static void search(Searcher searcher) throws IOException,
   ParseException {
  // 以下是搜索的关键词
  String[] q = { "SVN", "捱三", "null", "null" };
  long start = new Date().getTime();
  SearcherFile.search(searcher, q);
  long end = new Date().getTime();
  System.out.println("花费时间:" + (double) (end - start) / 1000 + "秒");
 }

 public static void index() throws SQLException {
  Connection conn = null;
  try {
   conn = getConnection();
   Photo[] list = Photo.loadPhotos(conn);
   IndexerFile.indexFile(indexDir, list);
  } catch (Exception e) {
   e.printStackTrace();
  } finally {
   if (conn != null) {
    conn.close();
   }
  }
 }
}

 

已经测试过!!

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值