成品下载
下载kettel源码
pentaho-kettle-8.2.0.0-R.tar.gz
导入eclipse

修改相关pom
1.修改工程根的pom.xml(红色部分)文件(解决编译时部分包找不到的问题)
<repository>
<id>pentaho-public1</id>
<name>/</name>
<url>http://oss.sonatype.org/content/groups/public/</url>
</repository>
<repository>
<id>pentaho-public2</id>
<name>/</name>
<url>https://nexus.pentaho.org/repository/proxy-public-release/</url>
</repository>
<repository>
<id>pentaho-public3</id>
<name>/</name>
<url>https://nexus.pentaho.org/repository/proxy-public-snapshot/</url>
</repository>
2.plugins目录,修改pom.xml文件内容
<modules>
<module>elasticsearch-bulk-insert</module>
</modules>
3.plugins\elasticsearch-bulk-insert\core目录,修改pom.xml文件内容
<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.pentaho.di.plugins</groupId>
<artifactId>elasticsearch-bulk-insert</artifactId>
<version>8.2.0.0-342</version>
</parent>
<artifactId>elasticsearch-bulk-insert-core</artifactId>
<version>8.2.0.0-342</version>
<name>PDI Elasticsearch Bulk Insert Plugin Core</name>
<description>Elasticsearch Bulk Insert Plugin</description>
<properties>
<pdi.version>8.2.0.0-342</pdi.version>
<build.revision>${project.version}</build.revision>
<timestamp>${maven.build.timestamp}</timestamp>
<build.description>${project.description}</build.description>
<maven.build.timestamp.format>yyyy/MM/dd hh:mm</maven.build.timestamp.format>
<elasticsearch.version>7.3.0</elasticsearch.version>
</properties>
<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.3.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>${elasticsearch.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-engine</artifactId>
<version>${pdi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>${pdi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-ui-swt</artifactId>
<version>${pdi.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.eclipse.swt</groupId>
<artifactId>org.eclipse.swt.gtk.linux.x86_64</artifactId>
<version>4.6</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.eclipse</groupId>
<artifactId>jface</artifactId>
<version>3.3.0-I20070606-0010</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<version>1.9.5</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-engine</artifactId>
<classifier>tests</classifier>
<version>${pdi.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>pentaho-kettle</groupId>
<artifactId>kettle-core</artifactId>
<version>${pdi.version}</version>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<filtering>true</filtering>
<directory>src/main/resources</directory>
</resource>
</resources>
</build>
</project>
修改源码
1.修改ElasticSearchBulk.java
package org.pentaho.di.trans.steps.elasticsearchbulk;
import java.io.IOException;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import org.apache.commons.lang.StringUtils;
import org.apache.http.HttpHost;
import org.elasticsearch.action.DocWriteRequest.OpType;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequest;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.client.transport.NoNodeAvailableException;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.pentaho.di.core.exception.KettleException;
import org.pentaho.di.core.exception.KettleStepException;
import org.pentaho.di.core.row.RowDataUtil;
import org.pentaho.di.core.row.RowMetaInterface;
import org.pentaho.di.core.row.ValueMetaInterface;
import org.pentaho.di.i18n.BaseMessages;
import org.pentaho.di.trans.Trans;
import org.pentaho.di.trans.TransMeta;
import org.pentaho.di.trans.step.BaseStep;
import org.pentaho.di.trans.step.StepDataInterface;
import org.pentaho.di.trans.step.StepInterface;
import org.pentaho.di.trans.step.StepMeta;
import org.pentaho.di.trans.step.StepMetaInterface;
import org.pentaho.di.trans.steps.elasticsearchbulk.ElasticSearchBulkMeta.Server;
/**
* Does bulk insert of data into ElasticSearch
*
* @author webdetails
* @since 16-02-2011
*/
public class ElasticSearchBulk extends BaseStep implements StepInterface {
private static final String INSERT_ERROR_CODE = null;
private static Class<?> PKG = ElasticSearchBulkMeta.class; // for i18n
private ElasticSearchBulkMeta meta;
private ElasticSearchBulkData data;
// private Client client;
private RestHighLevelClient client;
private String index;
private String type;
// BulkRequestBuilder currentRequest;
BulkRequest currentRequest = new BulkRequest();
private int batchSize = 2;
private boolean isJsonInsert = false;
private int jsonFieldIdx = 0;
private String idOutFieldName = null;
private Integer idFieldIndex = null;
private Long timeout = null;
private TimeUnit timeoutUnit = TimeUnit.MILLISECONDS;
private int numberOfErrors = 0;
// private List<IndexRequestBuilder> requestsBuffer;
private List<IndexRequest> requestsBuffer;
private boolean stopOnError = true;
private boolean useOutput = true;
private Map<String, String> columnsToJson;
private boolean hasFields;
private IndexRequest.OpType opType = org.elasticsearch.action.DocWriteRequest.OpType.CREATE;
public ElasticSearchBulk( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,
Trans trans ) {
super( stepMeta, stepDataInterface, copyNr, transMeta, trans );
}
public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
Object[] rowData = getRow();
if ( rowData == null ) {
if ( currentRequest != null && currentRequest.numberOfActions() > 0 ) {
processBatch( false );
}
setOutputDone();
return false;
}
if ( first ) {
first = false;
setupData();
// currentRequest = client.prepareBulk();
// requestsBuffer = new ArrayList<IndexRequestBuilder>( this.batchSize );
// try {
// client.bulk(currentRequest, RequestOptions.DEFAULT);
// } catch (IOException e1) {
// rejectAllRows( e1.getLocalizedMessage() );
// String msg = BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e1.getLocalizedMessage() );
// logError( msg );
// throw new KettleStepException( msg, e1 );
// }
requestsBuffer = new ArrayList<IndexRequest>( this.batchSize );
initFieldIndexes();
}
try {
data.inputRowBuffer[data.nextBufferRowIdx++] = rowData;
return indexRow( data.inputRowMeta, rowData ) || !stopOnError;
} catch ( KettleStepException e ) {
throw e;
} catch ( Exception e ) {
rejectAllRows( e.getLocalizedMessage() );
String msg = BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e.getLocalizedMessage() );
logError( msg );
throw new KettleStepException( msg, e );
}
}
/**
* Initialize <code>this.data</code>
*
* @throws KettleStepException
*/
private void setupData() throws KettleStepException {
data.nextBufferRowIdx = 0;
data.inputRowMeta = getInputRowMeta().clone(); // only available after first getRow();
data.inputRowBuffer = new Object[batchSize][];
data.outputRowMeta = data.inputRowMeta.clone();
meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );
}
private void initFieldIndexes() throws KettleStepException {
if ( isJsonInsert ) {
Integer idx = getFieldIdx( data.inputRowMeta, environmentSubstitute( meta.getJsonField() ) );
if ( idx != null ) {
jsonFieldIdx = idx.intValue();
} else {
throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Error.NoJsonField" ) );
}
}
idOutFieldName = environmentSubstitute( meta.getIdOutField() );
if ( StringUtils.isNotBlank( meta.getIdInField() ) ) {
idFieldIndex = getFieldIdx( data.inputRowMeta, environmentSubstitute( meta.getIdInField() ) );
if ( idFieldIndex == null ) {
throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Error.InvalidIdField" ) );
}
} else {
idFieldIndex = null;
}
}
private static Integer getFieldIdx( RowMetaInterface rowMeta, String fieldName ) {
if ( fieldName == null ) {
return null;
}
for ( int i = 0; i < rowMeta.size(); i++ ) {
String name = rowMeta.getValueMeta( i ).getName();
if ( fieldName.equals( name ) ) {
return i;
}
}
return null;
}
/**
* @param rowMeta The metadata for the row to be indexed
* @param row The data for the row to be indexed
*/
private boolean indexRow( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException {
try {
// IndexRequestBuilder requestBuilder = client.prepareIndex( index, type );
// requestBuilder.setOpType( this.opType );
IndexRequest indexRequest = new IndexRequest(index);
indexRequest.type(type);
indexRequest.opType(this.opType);
if ( idFieldIndex != null ) {
// requestBuilder.setId( "" + row[idFieldIndex] ); // "" just in case field isn't string
indexRequest.id("" + row[idFieldIndex]);
}
if ( isJsonInsert ) {
// addSourceFromJsonString( row, requestBuilder );
addSourceFromJsonString( row, indexRequest );
} else {
// addSourceFromRowFields( requestBuilder, rowMeta, row );
addSourceFromRowFields( indexRequest, rowMeta, row );
}
// currentRequest = new BulkRequest();
// currentRequest.add( requestBuilder );
// requestsBuffer.add( requestBuilder );
currentRequest.add( indexRequest );
requestsBuffer.add( indexRequest );
if ( currentRequest.numberOfActions() >= batchSize ) {
return processBatch( true );
} else {
return true;
}
} catch ( KettleStepException e ) {
throw e;
} catch ( NoNodeAvailableException e ) {
throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulkDialog.Error.NoNodesFound" ) );
} catch ( Exception e ) {
throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e
.getLocalizedMessage() ), e );
}
}
// /**
// * @param row
// * @param requestBuilder
// */
// private void addSourceFromJsonString( Object[] row, IndexRequestBuilder requestBuilder ) throws KettleStepException {

本文档详细介绍了如何下载并导入Pentaho Kettle的Elasticsearch Bulk插件源码,修改相关POM配置以解决依赖问题,并展示了如何修改源码以适应特定需求。主要涉及步骤包括下载源码、在Eclipse中导入项目、调整Maven仓库设置、修改ElasticsearchBulk.java和ElasticSearchBulkDialog.java两个关键类,以及打包和运行过程。
最低0.47元/天 解锁文章
1081

被折叠的 条评论
为什么被折叠?



