- 经过各种测试,解决N多BUG终于把增量索引配置成功了。
- 一定要查看solr_home/server/logs/solr.log这个日志文件,这里记录了整个输出信息。以前用的4.7版本日志都输出在cmd窗口的,到了5.5cmd窗口就只有启动时输出的少量信息。如果发现哪里功能不正常了就在这个log文件里查看是否有异常信息输出。
- 配置定时任务时用到的solr-dataimportscheduler-1.0.jar和其他人修改过的solr-dataimportscheduler-1.1.jar在5.5都会报空指针异常。
- 可以用的jar包为http://download.youkuaiyun.com/detail/ljsososo/9486023#comment 修改过的mydataimportscheduler.jar。
- 两个主要的配置文件代码如下:
- managed-schema文件(或者schema.xml)
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="example" version="1.6">
<fields>
<field name="text" type="text_mmseg4j_complex" indexed="true" stored="false" multiValued="true"/>
<field name="_version_" type="string" indexed="true" stored="true"/>
<field name="id" type="string" required="true"/>
<field name="doc_type" type="string" />
<field name="name" type="text_mmseg4j_maxword" />
<field name="address" type="text_mmseg4j_complex" />
<field name="abstract" type="text_mmseg4j_complex" />
<!-- Latitude -->
<field name="lat" type="tdouble" indexed="true" stored="true"/>
<!-- Longitude -->
<field name="lon" type="tdouble" indexed="true" stored="true"/>
<field name="actiontime" type="tdate" />
<field name="type" type="text_mmseg4j_maxword" />
<field name="city" type="text_mmseg4j_complex" />
<field name="dam_id" type="int"/>
</fields>
<uniqueKey>id</uniqueKey>
<types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true" />
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="int" class="solr.TrieIntField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="long" class="solr.TrieLongField" precisionStep="0" positionIncrementGap="0"/>
<fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="tdate" class="solr.TrieDateField" precisionStep="6" positionIncrementGap="0"/>
<!-- mmseg4j-->
<field name="mmseg4j_complex_name" type="text_mmseg4j_complex" indexed="true" stored="true"/>
<field name="mmseg4j_maxword_name" type="text_mmseg4j_maxword" indexed="true" stored="true"/>
<field name="mmseg4j_simple_name" type="text_mmseg4j_simple" indexed="true" stored="true"/>
<fieldType name="text_mmseg4j_complex" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="complex" dicPath="dic"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
</analyzer>
</fieldType>
<fieldType name="text_mmseg4j_maxword" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="max-word" dicPath="/dic"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
</analyzer>
</fieldType>
<fieldType name="text_mmseg4j_simple" class="solr.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="com.chenlb.mmseg4j.solr.MMSegTokenizerFactory" mode="simple" dicPath="/dic"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
</analyzer>
</fieldType>
<!-- mmseg4j-->
</types>
</schema>
data-config.xml文件:
<dataConfig>
<dataSource type="JdbcDataSource"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://192.168.1.124/isfs_sims"
user="root"
password="1234"/>
<document>
<entity name="dam" pk="dam_id" transformer="TemplateTransformer"
query="select a.intId dam_id,b.varName type,a.varName name,c.varName city ,a.varAddress address,a.fltLongitude lon,a.fltLatitude lat,a.varAbstract abstract,dtLastModified actiontime from tbDam a,tbAffiliate b,tbCity c where a.varType=b.intId and a.intCityId=c.intCityId"
deltaImportQuery="select a.intId dam_id,b.varName type,a.varName name,c.varName city ,a.varAddress address,a.fltLongitude lon,a.fltLatitude lat,a.varAbstract abstract,dtLastModified actiontime from tbDam a,tbAffiliate b,tbCity c where a.intId='${dataimporter.delta.dam_id}' and a.varType=b.intId and a.intCityId=c.intCityId"
deltaQuery="select intId dam_id from tbDam where dtLastModified > '${dataimporter.last_index_time}'">
<field column="id" template="Dam_${dam.dam_id}"/>
<field column="dam_id" />
<field column="doc_type" template="dam"/>
<field column="name" />
<field column="type" />
<field column="city" />
<field column="lat" />
<field column="lon" />
<field column="address" />
<field column="abstract" />
<field column="actiontime" />
</entity>
</document>
</dataConfig>
这里涉及到多表的联合查询,所以代码较长。但是这里主要关注的应该的dam_id,这个是entity的主键因此需要一个field来存这个值,不然作为唯一值的id不能设置template,进一步导致多个entity时,创建或修改索引就会乱七八糟,因为相同的id会直接覆盖。
别忘了在启动solr的容器中修改web.xml,在servlet标签之前加上:
<listener>
<listener-class>
org.apache.solr.handler.dataimport.scheduler.ApplicationListener
</listener-class>
</listener>