Lucene 与Digester结合_与digester具有相似功能-优快云博客

Lucene 与Digester结合

1.Digester简介：

解析XML有两个基本方法，一个是DOM(Document Object Model)解析，这个方法把XML文件看做一颗树，把整个文档读入内存来解析。另一个方法就是SAX()解析，这种解析是依据事件来解析XML文件的。Digester使用SAX来解析XML文档，但是直接使用SAX比较复杂，所以Digester提供了一些高级API来处理SAX事件，从而为开发人员减轻负担。

2.Digester引入了三个重要的概念：元素匹配模式、处理规则和对象栈。

元素匹配模式使XML元素和规则处理相关联，举一个例子：

<datasources> 'datasources'
<datasource> 'datasources/datasource'

对象栈就是规则处理中需要操作的对象。对象既可以手动也可以通过规则处理从栈中添加或删除（push,pop）

3.处理规则：

首先，我们需要实例化一个Digester对象，把需要的对象添加到对象栈中，然后添加一系列处理规则，最后解析这个文件。

4.具体实现：

Digester的解析类:AddressBookParser.java

import java.io.File; import java.io.IOException; import org.apache.commons.digester.Digester; import org.xml.sax.SAXException; public class AddressBookParser { /** * Prints the contact information to standard output. * * @param contact the <code>Contact</code> to print out */ public void addContact(Contact contact) { System.out.println("TYPE: " + contact.getType()); System.out.println("NAME: " + contact.getName()); System.out.println(" ADDRESS: " + contact.getAddress()); System.out.println(" CITY: " + contact.getCity()); System.out.println(" PROVINCE: " + contact.getProvince()); System.out.println(" POSTALCODE: " + contact.getPostalcode()); System.out.println(" COUNTRY: " + contact.getCountry()); System.out.println(" TELEPHONE: " + contact.getTelephone()); } /** * Configures Digester rules and actions, parses the XML file specified * as the first argument. * * @param args command line arguments */ public static void main(String[] args) throws IOException, SAXException { // instantiate Digester and disable XML validation System.out.println("start parse"); Digester digester = new Digester(); digester.setValidating(false); // instantiate AddressBookParser class digester.addObjectCreate("address-book", AddressBookParser.class ); // instantiate Contact class digester.addObjectCreate("address-book/contact", Contact.class ); // set type property of Contact instance when 'type' attribute is found digester.addSetProperties("address-book/contact", "type", "type" ); // set different properties of Contact instance using specified methods digester.addCallMethod("address-book/contact/name", "setName", 0); digester.addCallMethod("address-book/contact/address", "setAddress", 0); digester.addCallMethod("address-book/contact/city", "setCity", 0); digester.addCallMethod("address-book/contact/province", "setProvince", 0); digester.addCallMethod("address-book/contact/postalcode", "setPostalcode", 0); digester.addCallMethod("address-book/contact/country", "setCountry", 0); digester.addCallMethod("address-book/contact/telephone", "setTelephone", 0); // call 'addContact' method when the next 'address-book/contact' pattern is seen digester.addSetNext("address-book/contact", "addContact" ); // now that rules and actions are configured, start the parsing process File file=new File("NewFile.xml"); AddressBookParser abp = (AddressBookParser) digester.parse(file); //AddressBookParser abp = (AddressBookParser) digester.parse(file); } /** * JavaBean class that holds properties of each Contact entry. * It is important that this class be public and static, in order for * Digester to be able to instantiate it. */ public static class Contact { private String type; private String name; private String address; private String city; private String province; private String postalcode; private String country; private String telephone; public void setType(String newType) { type = newType; } public String getType() { return type; } public void setName(String newName) { name = newName; } public String getName() { return name; } public void setAddress(String newAddress) { address = newAddress; } public String getAddress() { return address; } public void setCity(String newCity) { city = newCity; } public String getCity() { return city; } public void setProvince(String newProvince) { province = newProvince; } public String getProvince() { return province; } public void setPostalcode(String newPostalcode) { postalcode = newPostalcode; } public String getPostalcode() { return postalcode; } public void setCountry(String newCountry) { country = newCountry; } public String getCountry() { return country; } public void setTelephone(String newTelephone) { telephone = newTelephone; } public String getTelephone() { return telephone; } } }
Digester的测试类:AddressBookSearcher.java

import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.Term; import org.apache.lucene.store.FSDirectory; import com.sun.org.apache.bcel.internal.generic.NEW; import java.io.File; import java.io.IOException; /** * <code>AddressBookSearcher</code> class provides a simple * example of searching with Lucene. It looks for an entry whose * 'name' field contains keyword 'Zane'. The index being searched * is called "address-book", located in a temporary directory. */ public class AddressBookSearcher { public static void main(String[] args) throws IOException { String indexDir = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "address-book"; String indexDir1="d:/";//注意indexDir1的路径，此时没有/的话会一直到系统的根目录下 IndexSearcher searcher = new IndexSearcher(FSDirectory.open(new File(indexDir1))); System.out.println(new File(indexDir1).getAbsolutePath()); Query query = new TermQuery(new Term("name", "Zane")); TopScoreDocCollector collector = TopScoreDocCollector.create(100, true); /* * QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "text", * new StandardAnalyzer(Version.LUCENE_CURRENT)); Query * query=parser.parse(phrase); TopScoreDocCollector collector = * TopScoreDocCollector.create(100,false); //改变之处 */// 查找 // searcher.search(query,collector); // ScoreDoc这个对象还不清楚，但是有多少结果，就有多少个这个对象 searcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; int num = hits.length; for (int i = 0; i < num; i++) { Document doc = searcher.doc(hits[i].doc);// 改变之处 if (doc == null) { } Field field = doc.getField("name"); String filename = field.stringValue(); System.out.println(filename); } searcher.close(); } }

4.Digester与Lucene的结合

DigesterMarriesLucene.java，测试依然可用AddressBookSearcher.java类进行

import org.apache.commons.digester.Digester; import org.xml.sax.SAXException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import java.io.File; import java.io.IOException; /** * Parses the contents of address-book XML file and indexes all * contact entries found in it. The name of the file to parse must be * specified as the first command line argument. */ public class DigesterMarriesLucene { private static IndexWriter writer; /** * Adds the contact to the index. * * @param contact the <code>Contact</code> to add to the index */ public void addContact(Contact contact) throws IOException { System.out.println("Adding " + contact.getName()); Document contactDocument = new Document(); Field filed = new Field("type", contact.getType(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("name", contact.getName(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("address", contact.getAddress(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("city", contact.getCity(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("province", contact.getProvince(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("postalcode", contact.getPostalcode(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("country", contact.getCountry(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); filed = new Field("telephone", contact.getTelephone(), Field.Store.YES, Field.Index.ANALYZED); contactDocument.add(filed); writer.addDocument(contactDocument); } /** * Created an index to add contacts to, configures Digester rules and * actions, parses the XML file specified as the first argument. * * @param args command line arguments */ public static void main(String[] args) throws IOException, SAXException { String indexDir = System.getProperty("java.io.tmpdir", "tmp") + System.getProperty("file.separator") + "address-book"; String indexDir1="d:/"; Analyzer analyzer = new WhitespaceAnalyzer(); boolean createFlag = true; // IndexWriter to use for adding contacts to the index // writer = new IndexWriter(indexDir, analyzer, createFlag); writer = new IndexWriter(FSDirectory.open(new File(indexDir1)), analyzer, createFlag,IndexWriter.MaxFieldLength.LIMITED); // instantiate Digester and disable XML validation Digester digester = new Digester(); digester.setValidating(false); // instantiate DigesterMarriesLucene class digester.addObjectCreate("address-book", DigesterMarriesLucene.class ); // instantiate Contact class digester.addObjectCreate("address-book/contact", Contact.class ); // set type property of Contact instance when 'type' attribute is found digester.addSetProperties("address-book/contact", "type", "type" ); // set different properties of Contact instance using specified methods digester.addCallMethod("address-book/contact/name", "setName", 0); digester.addCallMethod("address-book/contact/address", "setAddress", 0); digester.addCallMethod("address-book/contact/city", "setCity", 0); digester.addCallMethod("address-book/contact/province", "setProvince", 0); digester.addCallMethod("address-book/contact/postalcode", "setPostalcode", 0); digester.addCallMethod("address-book/contact/country", "setCountry", 0); digester.addCallMethod("address-book/contact/telephone", "setTelephone", 0); // call 'addContact' method when the next 'address-book/contact' pattern is seen digester.addSetNext("address-book/contact", "addContact" ); // now that rules and actions are configured, start the parsing process File file=new File("NewFile.xml"); digester.parse(file); // optimize and close the index writer.optimize(); writer.close(); } /** * JavaBean class that holds properties of each Contact entry. * It is important that this class be public and static, in order for * Digester to be able to instantiate it. */ public static class Contact { private String type; private String name; private String address; private String city; private String province; private String postalcode; private String country; private String telephone; public void setType(String newType) { type = newType; } public String getType() { return type; } public void setName(String newName) { name = newName; } public String getName() { return name; } public void setAddress(String newAddress) { address = newAddress; } public String getAddress() { return address; } public void setCity(String newCity) { city = newCity; } public String getCity() { return city; } public void setProvince(String newProvince) { province = newProvince; } public String getProvince() { return province; } public void setPostalcode(String newPostalcode) { postalcode = newPostalcode; } public String getPostalcode() { return postalcode; } public void setCountry(String newCountry) { country = newCountry; } public String getCountry() { return country; } public void setTelephone(String newTelephone) { telephone = newTelephone; } public String getTelephone() { return telephone; } } }