前一阵的分词器写好了,想用它建立索引,下面是具体代码
package
org.iceshirley.index;
import
java.sql.
*
;
import
java.io.
*
;
import
org.apache.lucene.index.
*
;
import
org.apache.lucene.store.
*
;
import
org.apache.lucene.document.
*
;
import
org.iceshirley.MMChineseAnalyzer.
*
;

public
class
Index
...
{

protected LinkToDb ltb=null;
private MMChineseAnalyzer analyzer=null;

public Index()...{
analyzer=new MMChineseAnalyzer();
}

public void createConnection() throws SQLException...{
String conurl="jdbc:mysql://localhost:3306/searchdb?user=root&password=821210&useUnicode=true&characterEncoding=GBK";
ltb=new LinkToDb("com.mysql.jdbc.Driver",conurl);
System.out.println("connection");
}

public int getTableNum()...{
int count=ltb.GetTableNum();
return count;
}

public void close() throws SQLException...{
ltb.close();
}

public void creatindex(String index,int count) throws IOException...{
//int count=0;
Directory dir=FSDirectory.getDirectory(index,true);
//new org.apache.lucene.analysis.standard.StandardAnalyzer()
IndexWriter writer=new IndexWriter(dir,analyzer,true);
writer.setMergeFactor(100);
writer.setUseCompoundFile(true);
ResultSet rs=ltb.GetResult();

try...{

while(rs.next())...{
Document doc=new Document();
String url=rs.getString("url");
String title=rs.getString("title");
String text=rs.getString("text");
String date=rs.getString("date");
String encode=rs.getString("encode");
doc.add(new Field("url",url,Field.Store.YES,Field.Index.NO,Field.TermVector.NO));
doc.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.NO));
doc.add(new Field("text",text,Field.Store.COMPRESS,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(new Field("date",date,Field.Store.YES,Field.Index.NO,Field.TermVector.NO));
doc.add(new Field("encode",encode,Field.Store.YES,Field.Index.NO,Field.TermVector.NO));
//----------------test--------------//

/**//*
java.util.Enumeration fields=doc.fields();
while(fields.hasMoreElements()){
Field field=(Field)fields.nextElement();
Reader reader=null;
if (field.readerValue() != null){
reader = field.readerValue();
System.out.println("1");
}else if (field.stringValue() != null){
reader = new StringReader(field.stringValue());
System.out.println("12");
}
org.apache.lucene.analysis.TokenStream ts=analyzer.tokenStream("test",reader);
org.apache.lucene.analysis.Token token=null;
while((token=ts.next())!=null){
System.out.println(token.termText());
}
}
System.out.println("----------------------------------------");*/
//--------------------------------//

try...{
writer.addDocument(doc);
//System.out.println(doc.getField("url")+" has beed saved to index");

}catch(Exception e)...{
e.printStackTrace();
//System.out.println(e.getMessage());
//System.out.println("can not add doc");
}
}
writer.optimize();
writer.close();
System.out.println("completed");
}

catch(SQLException e)...{
System.out.println("error "+e.getMessage());
}
}

public static void main(String[] args)throws IOException,SQLException...{
String indexdir="c:/indexdir";
int count=0;
Index ci=new Index();

try...{
ci.createConnection();
count=ci.getTableNum();

if(count<1)...{
System.out.println("no record in the database");
}

else...{
ci.creatindex(indexdir,count);
}
}

catch(SQLException e)...{
System.out.println(e.getMessage());
}
ci.close();
}

}
//
连接数据库

class
LinkToDb
...
{
protected Connection con;
protected PreparedStatement preCount;
protected PreparedStatement preSelect;

LinkToDb(String driver,String sqlurl)...{

try...{
Class.forName(driver);
con=DriverManager.getConnection(sqlurl);
preCount=con.prepareStatement("SELECT count(*) as qty FROM complete_queue;");
preSelect=con.prepareStatement("SELECT * FROM complete_queue;");
}

catch(Exception e)...{
System.out.println(e.getMessage());
}
}

public int GetTableNum()...{
int count=0;

try...{
ResultSet rs=preCount.executeQuery();
rs.next();
count=rs.getInt("qty");
}

catch(Exception e)...{
System.out.println(e.getMessage());
}
return count;
}

public ResultSet GetResult()...{
ResultSet rs=null;

try...{
//preSelect.setInt(1,i);
rs=preSelect.executeQuery();
//rs.next();
}

catch(Exception e)...{
System.out.println(e.getMessage());
}
return rs;
}

public void close() throws SQLException...{
con.close();
}
}
lucene2.0中,Field字段有很大的改变,原先的Field.text等方法不再存在,取而代之的是直接使用Field的构造函数,共有5种构造函数
Field(String
name, byte[] value, Field.Store
store)
Field
(String
name, Reader
reader)
Field
(String
name, Reader
reader, Field.TermVector
termVector)
Field (String name, String value, Field.Store store, Field.Index index)
Field (String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector)
在Field中有三个内部类:Field.Index,Field.Store,Field.termVector,而构造函数也用到了它们。termVector是Lucene 1.4
新增的,它提供一种向量机制来进行模糊查询,这个不常用。它们的不同的组合,在全文检索
中有着不同的作用。
可以根据自己的需要,来决定你使用哪个方法构造Field