hive之decimal数据类型相乘结果为null

HiveDecimalDivisionIssuewithGenericUDFs

最新推荐文章于 2024-05-20 19:25:01 发布

原创最新推荐文章于 2024-05-20 19:25:01 发布 · 702 阅读

1 ·

CC 4.0 BY-SA版权

文章标签：

#hive #数据仓库 #大数据 #sql #hadoop

Hive源码专栏收录该内容

8 篇文章

订阅专栏

1、问题描述

2、问题定位

2.1 GenericUDFOPDivide

2.2 GenericUDFOPMultiply

1、问题描述

结果值部分为null值代码

select order_amt,deal_amt,order_amt/deal_amt*(order_amt/deal_amt) from bigdata.test_decimal_tb where id = 1;

结果截图

2、问题定位

2.1 GenericUDFOPDivide

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.LongColDivideLongScalar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.LongScalarDivideLongColumn;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;

/**
 * Note that in SQL, the return type of divide is not necessarily the same
 * as the parameters. For example, 3 / 2 = 1.5, not 1. To follow SQL, we always
 * return a decimal for divide.
 */
@Description(name = "/", value = "a _FUNC_ b - Divide a by b", extended = "Example:\n"
    + "  > SELECT 3 _FUNC_ 2 FROM src LIMIT 1;\n" + "  1.5")
@VectorizedExpressions({LongColDivideLongColumn.class, LongColDivideDoubleColumn.class,
  DoubleColDivideLongColumn.class, DoubleColDivideDoubleColumn.class,
  LongColDivideLongScalar.class, LongColDivideDoubleScalar.class,
  DoubleColDivideLongScalar.class, DoubleColDivideDoubleScalar.class,
  LongScalarDivideLongColumn.class, LongScalarDivideDoubleColumn.class,
  DoubleScalarDivideLongColumn.class, DoubleScalarDivideDoubleColumn.class,
  DecimalColDivideDecimalColumn.class, DecimalColDivideDecimalScalar.class,
  DecimalScalarDivideDecimalColumn.class})
public class GenericUDFOPDivide extends GenericUDFBaseNumeric {

  public GenericUDFOPDivide() {
    super();
    this.opDisplayName = "/";
  }

  @Override
  protected PrimitiveTypeInfo deriveResultExactTypeInfo() {
    if (ansiSqlArithmetic) {
      return deriveResultExactTypeInfoAnsiSql();
    }
    return deriveResultExactTypeInfoBackwardsCompat();
  }

  protected PrimitiveTypeInfo deriveResultExactTypeInfoAnsiSql() {
    // No type promotion. Everything goes to decimal.
    return deriveResultDecimalTypeInfo();
  }

  protected PrimitiveTypeInfo deriveResultExactTypeInfoBackwardsCompat() {
    // Preserve existing return type behavior for division:
    // Non-decimal division should return double
    if (leftOI.getPrimitiveCategory() != PrimitiveCategory.DECIMAL
        && rightOI.getPrimitiveCategory() != PrimitiveCategory.DECIMAL) {
      return TypeInfoFactory.doubleTypeInfo;
    }

    return deriveResultDecimalTypeInfo();
  }

  @Override
  protected PrimitiveTypeInfo deriveResultApproxTypeInfo() {
    // Hive 0.12 behavior where double / decimal -> decimal is gone.
    return TypeInfoFactory.doubleTypeInfo;
  }

  @Override
  protected DoubleWritable evaluate(DoubleWritable left, DoubleWritable right) {
    if (right.get() == 0.0) {
      return null;
    }
    doubleWritable.set(left.get() / right.get());
    return doubleWritable;
  }

  @Override
  protected HiveDecimalWritable evaluate(HiveDecimal left, HiveDecimal right) {
    if (right.compareTo(HiveDecimal.ZERO) == 0) {
      return null;
    }

    HiveDecimal dec = left.divide(right);
    if (dec == null) {
      return null;
    }

    decimalWritable.set(dec);
    return decimalWritable;
  }

  /**
   * A balanced way to determine the precision/scale of decimal division result. Integer digits and
   * decimal digits are computed independently. However, when the precision from above reaches above
   * HiveDecimal.MAX_PRECISION, interger digit and decimal digits are shrunk equally to fit.
   */
  @Override
  protected DecimalTypeInfo deriveResultDecimalTypeInfo(int prec1, int scale1, int prec2, int scale2) {
    int intDig = Math.min(HiveDecimal.MAX_SCALE, prec1 - scale1 + scale2);
    int decDig = Math.min(HiveDecimal.MAX_SCALE, Math.max(6, scale1 + prec2 + 1));
    int diff = intDig + decDig -  HiveDecimal.MAX_SCALE;
    if (diff > 0) {
      decDig -= diff/2 + 1; // Slight negative bias.
      intDig = HiveDecimal.MAX_SCALE - decDig;
    }
    return TypeInfoFactory.getDecimalTypeInfo(intDig + decDig, decDig);
  }

}

2.2 GenericUDFOPMultiply

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
import org.apache.hadoop.hive.ql.exec.vector.expressions.gen.*;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;

@Description(name = "*", value = "a _FUNC_ b - Multiplies a by b")
@VectorizedExpressions({LongColMultiplyLongColumn.class, LongColMultiplyDoubleColumn.class,
  DoubleColMultiplyLongColumn.class, DoubleColMultiplyDoubleColumn.class,
  LongColMultiplyLongScalar.class, LongColMultiplyDoubleScalar.class,
  DoubleColMultiplyLongScalar.class, DoubleColMultiplyDoubleScalar.class,
  LongScalarMultiplyLongColumn.class, LongScalarMultiplyDoubleColumn.class,
  DoubleScalarMultiplyLongColumn.class, DoubleScalarMultiplyDoubleColumn.class,
  DecimalColMultiplyDecimalColumn.class, DecimalColMultiplyDecimalScalar.class,
  DecimalScalarMultiplyDecimalColumn.class})
public class GenericUDFOPMultiply extends GenericUDFBaseNumeric {

  public GenericUDFOPMultiply() {
    super();
    this.opDisplayName = "*";
  }

  @Override
  protected ByteWritable evaluate(ByteWritable left, ByteWritable right) {
    byteWritable.set((byte)(left.get() * right.get()));
    return byteWritable;
  }

  @Override
  protected ShortWritable evaluate(ShortWritable left, ShortWritable right) {
    shortWritable.set((short)(left.get() * right.get()));
    return shortWritable;
  }

  @Override
  protected IntWritable evaluate(IntWritable left, IntWritable right) {
    intWritable.set(left.get() * right.get());
    return intWritable;
  }

  @Override
  protected LongWritable evaluate(LongWritable left, LongWritable right) {
    longWritable.set(left.get() * right.get());
    return longWritable;
  }

  @Override
  protected FloatWritable evaluate(FloatWritable left, FloatWritable right) {
    floatWritable.set(left.get() * right.get());
    return floatWritable;
  }

  @Override
  protected DoubleWritable evaluate(DoubleWritable left, DoubleWritable right) {
    doubleWritable.set(left.get() * right.get());
    return doubleWritable;
  }

  @Override
  protected HiveDecimalWritable evaluate(HiveDecimal left, HiveDecimal right) {
    HiveDecimal dec = left.multiply(right);

    if (dec == null) {
      return null;
    }

    decimalWritable.set(dec);
    return decimalWritable;
  }

  @Override
  protected DecimalTypeInfo deriveResultDecimalTypeInfo(int prec1, int scale1, int prec2, int scale2) {
    int scale = Math.min(HiveDecimal.MAX_SCALE, scale1 + scale2 );
    int prec = Math.min(HiveDecimal.MAX_PRECISION, prec1 + prec2 + 1);
    return TypeInfoFactory.getDecimalTypeInfo(prec, scale);
  }

}

2.3 HiveDecimal

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.common.type;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.math.RoundingMode;

/**
 *
 * HiveDecimal. Simple wrapper for BigDecimal. Adds fixed max precision and non scientific string
 * representation
 *
 */
public class HiveDecimal implements Comparable<HiveDecimal> {
  public static final int MAX_PRECISION = 38;
  public static final int MAX_SCALE = 38;

  /**
   * Default precision/scale when user doesn't specify in the column metadata, such as
   * decimal and decimal(8).
   */
  public static final int USER_DEFAULT_PRECISION = 10;
  public static final int USER_DEFAULT_SCALE = 0;

  /**
   *  Default precision/scale when system is not able to determine them, such as in case
   *  of a non-generic udf.
   */
  public static final int SYSTEM_DEFAULT_PRECISION = 38;
  public static final int SYSTEM_DEFAULT_SCALE = 18;

  public static final HiveDecimal ZERO = new HiveDecimal(BigDecimal.ZERO);
  public static final HiveDecimal ONE = new HiveDecimal(BigDecimal.ONE);

  public static final int ROUND_FLOOR = BigDecimal.ROUND_FLOOR;
  public static final int ROUND_CEILING = BigDecimal.ROUND_CEILING;
  public static final int ROUND_HALF_UP = BigDecimal.ROUND_HALF_UP;

  private BigDecimal bd = BigDecimal.ZERO;

  private HiveDecimal(BigDecimal bd) {
    this.bd = bd;
  }

  public static HiveDecimal create(BigDecimal b) {
    return create(b, true);
  }

  public static HiveDecimal create(BigDecimal b, boolean allowRounding) {
    BigDecimal bd = normalize(b, allowRounding);
    return bd == null ? null : new HiveDecimal(bd);
  }

  public static HiveDecimal create(BigInteger unscaled, int scale) {
    BigDecimal bd = normalize(new BigDecimal(unscaled, scale), true);
    return bd == null ? null : new HiveDecimal(bd);
  }

  public static HiveDecimal create(String dec) {
    BigDecimal bd;
    try {
      bd = new BigDecimal(dec);
    } catch (NumberFormatException ex) {
      return null;
    }

    bd = normalize(bd, true);
    return bd == null ? null : new HiveDecimal(bd);
  }

  public static HiveDecimal create(BigInteger bi) {
    BigDecimal bd = normalize(new BigDecimal(bi), true);
    return bd == null ? null : new HiveDecimal(bd);
  }

  public static HiveDecimal create(int i) {
    return new HiveDecimal(new BigDecimal(i));
  }

  public static HiveDecimal create(long l) {
    return new HiveDecimal(new BigDecimal(l));
  }

  @Override
  public String toString() {
     return bd.toPlainString();
  }

  public HiveDecimal setScale(int i) {
    return new HiveDecimal(bd.setScale(i, RoundingMode.HALF_UP));
  }

  @Override
  public int compareTo(HiveDecimal dec) {
    return bd.compareTo(dec.bd);
  }

  @Override
  public int hashCode() {
    return bd.hashCode();
  }

  @Override
  public boolean equals(Object obj) {
    if (obj == null || obj.getClass() != getClass()) {
      return false;
    }
    return bd.equals(((HiveDecimal) obj).bd);
  }

  public int scale() {
    return bd.scale();
  }

  /**
   * Returns the number of digits (integer and fractional) in the number, which is equivalent
   * to SQL decimal precision. Note that this is different from BigDecimal.precision(),
   * which returns the precision of the unscaled value (BigDecimal.valueOf(0.01).precision() = 1,
   * whereas HiveDecimal.create("0.01").precision() = 2).
   * If you want the BigDecimal precision, use HiveDecimal.bigDecimalValue().precision()
   * @return
   */
  public int precision() {
    int bdPrecision = bd.precision();
    int bdScale = bd.scale();

    if (bdPrecision < bdScale) {
      // This can happen for numbers less than 0.1
      // For 0.001234: bdPrecision=4, bdScale=6
      // In this case, we'll set the type to have the same precision as the scale.
      return bdScale;
    }
    return bdPrecision;
  }

  public int intValue() {
    return bd.intValue();
  }

  public double doubleValue() {
    return bd.doubleValue();
  }

  public long longValue() {
    return bd.longValue();
  }

  public short shortValue() {
    return bd.shortValue();
  }

  public float floatValue() {
    return bd.floatValue();
  }

  public BigDecimal bigDecimalValue() {
    return bd;
  }

  public byte byteValue() {
    return bd.byteValue();
  }

  public HiveDecimal setScale(int adjustedScale, int rm) {
    return create(bd.setScale(adjustedScale, rm));
  }

  public HiveDecimal subtract(HiveDecimal dec) {
    return create(bd.subtract(dec.bd));
  }

  public HiveDecimal multiply(HiveDecimal dec) {
    return create(bd.multiply(dec.bd), false);
  }

  public BigInteger unscaledValue() {
    return bd.unscaledValue();
  }

  public HiveDecimal scaleByPowerOfTen(int n) {
    return create(bd.scaleByPowerOfTen(n));
  }

  public HiveDecimal abs() {
    return create(bd.abs());
  }

  public HiveDecimal negate() {
    return create(bd.negate());
  }

  public HiveDecimal add(HiveDecimal dec) {
    return create(bd.add(dec.bd));
  }

  public HiveDecimal pow(int n) {
    BigDecimal result = normalize(bd.pow(n), false);
    return result == null ? null : new HiveDecimal(result);
  }

  public HiveDecimal remainder(HiveDecimal dec) {
    return create(bd.remainder(dec.bd));
  }

  public HiveDecimal divide(HiveDecimal dec) {
    System.out.println(create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true));
    return create(bd.divide(dec.bd, MAX_SCALE, RoundingMode.HALF_UP), true);
  }

  /**
   * Get the sign of the underlying decimal.
   * @return 0 if the decimal is equal to 0, -1 if less than zero, and 1 if greater than 0
   */
  public int signum() {
    return bd.signum();
  }

  private static BigDecimal trim(BigDecimal d) {
    if (d.compareTo(BigDecimal.ZERO) == 0) {
      // Special case for 0, because java doesn't strip zeros correctly on that number.
      d = BigDecimal.ZERO;
    } else {
      d = d.stripTrailingZeros();
      if (d.scale() < 0) {
        // no negative scale decimals
        d = d.setScale(0);
      }
    }
    return d;
  }

  private static BigDecimal normalize(BigDecimal bd, boolean allowRounding) {
    if (bd == null) {
      return null;
    }

    bd = trim(bd);

    int intDigits = bd.precision() - bd.scale();

    if (intDigits > MAX_PRECISION) {
      return null;
    }

    int maxScale = Math.min(MAX_SCALE, Math.min(MAX_PRECISION - intDigits, bd.scale()));
    if (bd.scale() > maxScale ) {
      if (allowRounding) {
        bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
        // Trimming is again necessary, because rounding may introduce new trailing 0's.
        bd = trim(bd);
      } else {
        bd = null;
      }
    }

    return bd;
  }

  public static BigDecimal enforcePrecisionScale(BigDecimal bd, int maxPrecision, int maxScale) {
    if (bd == null) {
      return null;
    }

    bd = trim(bd);

    if (bd.scale() > maxScale) {
      bd = bd.setScale(maxScale, RoundingMode.HALF_UP);
    }

    int maxIntDigits = maxPrecision - maxScale;
    int intDigits = bd.precision() - bd.scale();
    if (intDigits > maxIntDigits) {
      return null;
    }

    return bd;
  }

  public static HiveDecimal enforcePrecisionScale(HiveDecimal dec, int maxPrecision, int maxScale) {
    if (dec == null) {
      return null;
    }

    BigDecimal bd = enforcePrecisionScale(dec.bd, maxPrecision, maxScale);
    if (bd == null) {
      return null;
    }

    return HiveDecimal.create(bd);
  }
}

2.4 BigDecimal

/**
     * Returns a {@code BigDecimal} whose value is <tt>(this ×
     * multiplicand)</tt>, and whose scale is {@code (this.scale() +
     * multiplicand.scale())}.
     *
     * @param  multiplicand value to be multiplied by this {@code BigDecimal}.
     * @return {@code this * multiplicand}
     */
    public BigDecimal multiply(BigDecimal multiplicand) {
        int productScale = checkScale((long) scale + multiplicand.scale);
        if (this.intCompact != INFLATED) {
            if ((multiplicand.intCompact != INFLATED)) {
                return multiply(this.intCompact, multiplicand.intCompact, productScale);
            } else {
                return multiply(this.intCompact, multiplicand.intVal, productScale);
            }
        } else {
            if ((multiplicand.intCompact != INFLATED)) {
                return multiply(multiplicand.intCompact, this.intVal, productScale);
            } else {
                return multiply(this.intVal, multiplicand.intVal, productScale);
            }
        }
    }

2.5 调用链

GenericUDFOPMultiply.evaluate() --> HiveDecimal.multiply() --> BigDecimal.multiply()--> HiveDecimal.normalize()

2.5.1 数据查询

select order_amt,deal_amt,order_amt/deal_amt*(order_amt/deal_amt) from bigdata.test_decimal_tb where id = 1;

表字段描述

运行结果

2.5.2 调试代码

select order_amt/deal_amt*(order_amt/deal_amt) from bigdata.test_decimal_tb where id = 1;

2.5.3 调试截图

3、解决建议

decimal类型使用乘法过程中尽量控制下精度。