kettel8.2升级elasticsearch为7.x

本文档详细介绍了如何下载并导入Pentaho Kettle的Elasticsearch Bulk插件源码,修改相关POM配置以解决依赖问题,并展示了如何修改源码以适应特定需求。主要涉及步骤包括下载源码、在Eclipse中导入项目、调整Maven仓库设置、修改ElasticsearchBulk.java和ElasticSearchBulkDialog.java两个关键类,以及打包和运行过程。

成品下载

下载kettel源码

pentaho-kettle-8.2.0.0-R.tar.gz

导入eclipse

导入工程

修改相关pom

1.修改工程根的pom.xml(红色部分)文件(解决编译时部分包找不到的问题)

<repository>
			<id>pentaho-public1</id>
			<name>/</name>
			<url>http://oss.sonatype.org/content/groups/public/</url>
		</repository>
		<repository>
			<id>pentaho-public2</id>
			<name>/</name>
			<url>https://nexus.pentaho.org/repository/proxy-public-release/</url>
		</repository>
		<repository>
			<id>pentaho-public3</id>
			<name>/</name>
			<url>https://nexus.pentaho.org/repository/proxy-public-snapshot/</url>
		</repository>

2.plugins目录,修改pom.xml文件内容

  <modules>
    <module>elasticsearch-bulk-insert</module>
  </modules>

3.plugins\elasticsearch-bulk-insert\core目录,修改pom.xml文件内容

<?xml version="1.0" encoding="UTF-8"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <modelVersion>4.0.0</modelVersion>

  <parent>
    <groupId>org.pentaho.di.plugins</groupId>
    <artifactId>elasticsearch-bulk-insert</artifactId>
    <version>8.2.0.0-342</version>
  </parent>

  <artifactId>elasticsearch-bulk-insert-core</artifactId>
  <version>8.2.0.0-342</version>

  <name>PDI Elasticsearch Bulk Insert Plugin Core</name>
  <description>Elasticsearch Bulk Insert Plugin</description>

  <properties>
    <pdi.version>8.2.0.0-342</pdi.version>
    <build.revision>${project.version}</build.revision>
    <timestamp>${maven.build.timestamp}</timestamp>
    <build.description>${project.description}</build.description>
    <maven.build.timestamp.format>yyyy/MM/dd hh:mm</maven.build.timestamp.format>
    <elasticsearch.version>7.3.0</elasticsearch.version>
  </properties>

  <dependencies>
    <dependency>
	    <groupId>org.elasticsearch</groupId>
	    <artifactId>elasticsearch</artifactId>
	    <version>${elasticsearch.version}</version>
	    <scope>compile</scope>
    </dependency>
    
    <dependency>
      <groupId>org.elasticsearch.client</groupId>
      <artifactId>elasticsearch-rest-high-level-client</artifactId>
      <version>7.3.0</version>
      </dependency>

     <dependency>
        <groupId>org.elasticsearch.client</groupId>
        <artifactId>transport</artifactId>
        <version>${elasticsearch.version}</version>
        <scope>compile</scope>
    </dependency>
    <dependency>
      <groupId>pentaho-kettle</groupId>
      <artifactId>kettle-engine</artifactId>
      <version>${pdi.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>pentaho-kettle</groupId>
      <artifactId>kettle-core</artifactId>
      <version>${pdi.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>pentaho-kettle</groupId>
      <artifactId>kettle-ui-swt</artifactId>
      <version>${pdi.version}</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.eclipse.swt</groupId>
      <artifactId>org.eclipse.swt.gtk.linux.x86_64</artifactId>
      <version>4.6</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>org.eclipse</groupId>
      <artifactId>jface</artifactId>
      <version>3.3.0-I20070606-0010</version>
      <scope>provided</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.mockito</groupId>
      <artifactId>mockito-all</artifactId>
      <version>1.9.5</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>pentaho-kettle</groupId>
      <artifactId>kettle-engine</artifactId>
      <classifier>tests</classifier>
      <version>${pdi.version}</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>pentaho-kettle</groupId>
      <artifactId>kettle-core</artifactId>
      <version>${pdi.version}</version>
      <classifier>tests</classifier>
      <scope>test</scope>
    </dependency>  
  </dependencies>

  <build>
    <resources>
      <resource>
        <filtering>true</filtering>
        <directory>src/main/resources</directory>
      </resource>
    </resources>
  </build>
</project>

修改源码

1.修改ElasticSearchBulk.java

package org.pentaho.di.trans.steps.elasticsearchbulk;

import java.io.IOException;

import java.net.UnknownHostException;

import java.util.ArrayList;

import java.util.Date;

import java.util.List;

import java.util.Map;

import java.util.concurrent.TimeUnit;

import org.apache.commons.lang.StringUtils;

import org.apache.http.HttpHost;

import org.elasticsearch.action.DocWriteRequest.OpType;

import org.elasticsearch.action.bulk.BulkItemResponse;

import org.elasticsearch.action.bulk.BulkRequest;

import org.elasticsearch.action.bulk.BulkResponse;

import org.elasticsearch.action.index.IndexRequest;

import org.elasticsearch.client.RequestOptions;

import org.elasticsearch.client.RestClient;

import org.elasticsearch.client.RestHighLevelClient;

import org.elasticsearch.client.transport.NoNodeAvailableException;

import org.elasticsearch.common.settings.Settings;

import org.elasticsearch.common.xcontent.XContentBuilder;

import org.elasticsearch.common.xcontent.XContentFactory;

import org.elasticsearch.common.xcontent.XContentType;

import org.pentaho.di.core.exception.KettleException;

import org.pentaho.di.core.exception.KettleStepException;

import org.pentaho.di.core.row.RowDataUtil;

import org.pentaho.di.core.row.RowMetaInterface;

import org.pentaho.di.core.row.ValueMetaInterface;

import org.pentaho.di.i18n.BaseMessages;

import org.pentaho.di.trans.Trans;

import org.pentaho.di.trans.TransMeta;

import org.pentaho.di.trans.step.BaseStep;

import org.pentaho.di.trans.step.StepDataInterface;

import org.pentaho.di.trans.step.StepInterface;

import org.pentaho.di.trans.step.StepMeta;

import org.pentaho.di.trans.step.StepMetaInterface;

import org.pentaho.di.trans.steps.elasticsearchbulk.ElasticSearchBulkMeta.Server;

/**

 * Does bulk insert of data into ElasticSearch

 *

 * @author webdetails

 * @since 16-02-2011

 */

public class ElasticSearchBulk extends BaseStep implements StepInterface {
   
   
  private static final String INSERT_ERROR_CODE = null;

  private static Class<?> PKG = ElasticSearchBulkMeta.class; // for i18n

  private ElasticSearchBulkMeta meta;

  private ElasticSearchBulkData data;

//  private Client client;

  private RestHighLevelClient client;

  private String index;

  private String type;

//  BulkRequestBuilder currentRequest;

  BulkRequest currentRequest = new BulkRequest();

  private int batchSize = 2;

  private boolean isJsonInsert = false;

  private int jsonFieldIdx = 0;

  private String idOutFieldName = null;

  private Integer idFieldIndex = null;

  private Long timeout = null;

  private TimeUnit timeoutUnit = TimeUnit.MILLISECONDS;

  private int numberOfErrors = 0;

//  private List<IndexRequestBuilder> requestsBuffer;

  private List<IndexRequest> requestsBuffer;

  private boolean stopOnError = true;

  private boolean useOutput = true;

  private Map<String, String> columnsToJson;

  private boolean hasFields;

  private IndexRequest.OpType opType = org.elasticsearch.action.DocWriteRequest.OpType.CREATE;

  public ElasticSearchBulk( StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta,

                            Trans trans ) {
   
   
    super( stepMeta, stepDataInterface, copyNr, transMeta, trans );

  }

  public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException {
   
   
    Object[] rowData = getRow();

    if ( rowData == null ) {
   
   
      if ( currentRequest != null && currentRequest.numberOfActions() > 0 ) {
   
   
        processBatch( false );

      }

      setOutputDone();

      return false;

    }

    if ( first ) {
   
   
      first = false;

      setupData();

//      currentRequest = client.prepareBulk();

//      requestsBuffer = new ArrayList<IndexRequestBuilder>( this.batchSize );

    //   try {
   
   
    //            client.bulk(currentRequest, RequestOptions.DEFAULT);

               // } catch (IOException e1) {
   
   
               //     rejectAllRows( e1.getLocalizedMessage() );

               //       String msg = BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e1.getLocalizedMessage() );

               //       logError( msg );

               //       throw new KettleStepException( msg, e1 );

               // }

      requestsBuffer = new ArrayList<IndexRequest>( this.batchSize );

      initFieldIndexes();

    }

    try {
   
   
      data.inputRowBuffer[data.nextBufferRowIdx++] = rowData;

      return indexRow( data.inputRowMeta, rowData ) || !stopOnError;

    } catch ( KettleStepException e ) {
   
   
      throw e;

    } catch ( Exception e ) {
   
   
      rejectAllRows( e.getLocalizedMessage() );

      String msg = BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e.getLocalizedMessage() );

      logError( msg );

      throw new KettleStepException( msg, e );

    }

  }

  /**

   * Initialize <code>this.data</code>

   *

   * @throws KettleStepException

   */

  private void setupData() throws KettleStepException {
   
   
    data.nextBufferRowIdx = 0;

    data.inputRowMeta = getInputRowMeta().clone(); // only available after first getRow();

    data.inputRowBuffer = new Object[batchSize][];

    data.outputRowMeta = data.inputRowMeta.clone();

    meta.getFields( data.outputRowMeta, getStepname(), null, null, this, repository, metaStore );

  }

  private void initFieldIndexes() throws KettleStepException {
   
   
    if ( isJsonInsert ) {
   
   
      Integer idx = getFieldIdx( data.inputRowMeta, environmentSubstitute( meta.getJsonField() ) );

      if ( idx != null ) {
   
   
        jsonFieldIdx = idx.intValue();

      } else {
   
   
        throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Error.NoJsonField" ) );

      }

    }

    idOutFieldName = environmentSubstitute( meta.getIdOutField() );

    if ( StringUtils.isNotBlank( meta.getIdInField() ) ) {
   
   
      idFieldIndex = getFieldIdx( data.inputRowMeta, environmentSubstitute( meta.getIdInField() ) );

      if ( idFieldIndex == null ) {
   
   
        throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Error.InvalidIdField" ) );

      }

    } else {
   
   
      idFieldIndex = null;

    }

  }

  private static Integer getFieldIdx( RowMetaInterface rowMeta, String fieldName ) {
   
   
    if ( fieldName == null ) {
   
   
      return null;

    }

    for ( int i = 0; i < rowMeta.size(); i++ ) {
   
   
      String name = rowMeta.getValueMeta( i ).getName();

      if ( fieldName.equals( name ) ) {
   
   
        return i;

      }

    }

    return null;

  }

  /**

   * @param rowMeta The metadata for the row to be indexed

   * @param row     The data for the row to be indexed

   */

  private boolean indexRow( RowMetaInterface rowMeta, Object[] row ) throws KettleStepException {
   
   
    try {
   
   
//      IndexRequestBuilder requestBuilder = client.prepareIndex( index, type );

//      requestBuilder.setOpType( this.opType );

      IndexRequest indexRequest = new IndexRequest(index);

      indexRequest.type(type);

      indexRequest.opType(this.opType);

      if ( idFieldIndex != null ) {
   
   
//        requestBuilder.setId( "" + row[idFieldIndex] ); // "" just in case field isn't string

        indexRequest.id("" + row[idFieldIndex]);

      }

      if ( isJsonInsert ) {
   
   
//        addSourceFromJsonString( row, requestBuilder );

        addSourceFromJsonString( row, indexRequest );

      } else {
   
   
//        addSourceFromRowFields( requestBuilder, rowMeta, row );

        addSourceFromRowFields( indexRequest, rowMeta, row );

      }

// currentRequest = new BulkRequest();

//      currentRequest.add( requestBuilder );

//      requestsBuffer.add( requestBuilder );

      currentRequest.add( indexRequest );

      requestsBuffer.add( indexRequest );

      if ( currentRequest.numberOfActions() >= batchSize ) {
   
   
        return processBatch( true );

      } else {
   
   
        return true;

      }

    } catch ( KettleStepException e ) {
   
   
      throw e;

    } catch ( NoNodeAvailableException e ) {
   
   
      throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulkDialog.Error.NoNodesFound" ) );

    } catch ( Exception e ) {
   
   
      throw new KettleStepException( BaseMessages.getString( PKG, "ElasticSearchBulk.Log.Exception", e

              .getLocalizedMessage() ), e );

    }

  }

//  /**

//   * @param row

//   * @param requestBuilder

//   */

//  private void addSourceFromJsonString( Object[] row, IndexRequestBuilder requestBuilder ) throws KettleStepException {
   
   
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

梁晓山(ben)

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值