ObjectInspector及其子类分析

本文介绍了Hive中ObjectInspector的概念及其在不同类型中的应用。ObjectInspector作为元数据接口,用于探查复杂对象结构,支持Primitive、List、Map及Struct等多种类型。文章详细讲解了不同类型的ObjectInspector实现原理,并展示了如何通过ObjectInspectorFactory创建实例。

ObjectInspector

ObjectInspector 帮助我们探查复杂对象的内部结构。一个 ObjectInspector 实例代表了一个具体的类型,和存储这种类型的数据在内存中的一个特定方法。
使用 ObjectInspectorFactory 创建 ObjectInspector 对象,因为可以可以保证相同的 ObjectInspector 仅有一个实例。

public interface ObjectInspector {
   
   

  public static enum Category {
   
   
    PRIMITIVE, LIST, MAP, STRUCT
  };

  /**
   * Returns the name of the data type that is inspected by this ObjectInspector.
   * This is used to display the type information to the user.
   * 
   * For primitive types, the type name is standardized.
   * For other types, the type name can be something like "list<int>", "map<int,string>",
   * java class names, or user-defined type names similar to typedef. 
   */
  public String getTypeName();
  
  /**
   * An ObjectInspector must inherit from one of the following interfaces
   * if getCategory() returns:
   * PRIMITIVE:  PrimitiveObjectInspector 
   * LIST:       ListObjectInspector 
   * MAP:        MapObjectInspector 
   * STRUCT:     StructObjectInspector 
   */
  public Category getCategory();

}

PrimitiveObjectInspector

public interface PrimitiveObjectInspector extends ObjectInspector{
   
   

  public Class<?> getPrimitiveClass();
}
StandardPrimitiveObjectInspector

StandardPrimitiveObjectInspector 提供了一种通用的处理各种 PrimitiveObjectInspector 的方法。他的构造方法的参数里需要 primitiveClass。通过 ObjectInspectorUtils,为各种 各种 PrimitiveObjectInspector 提供统一接口。

class StandardPrimitiveObjectInspector implements PrimitiveObjectInspector {
   
   

  Class<?> primitiveClass;
  
  /** Call ObjectInspectorFactory.getStandardPrimitiveObjectInspector instead.
   */
  protected StandardPrimitiveObjectInspector(Class<?> primitiveClass) {
   
   
    this.primitiveClass = primitiveClass;
  }

  public Class<?> getPrimitiveClass() {
   
   
    return primitiveClass;
  }

  public final Category getCategory() {
   
   
    return Category.PRIMITIVE;
  }

  public String getTypeName() {
   
   
    return ObjectInspectorUtils.getClassShortName(primitiveClass);
  }
}

ListObjectInspector

public interface ListObjectInspector extends ObjectInspector {
   
   

  // ** Methods that does not need a data object **
  public ObjectInspector getListElementObjectInspector();

  // ** Methods that need a data object **
  /** returns null for null list, out-of-the-range index.
   */
  public Object getListElement(Object data, int index);

  /** returns -1 for data = null.
   */
  public int getListLength(Object data);
  
  /** returns null for data = null.
   */
  public List<?> getList(Object data);

}
StandardListObjectInspector

StandardListObjectInspector 是 ListObjectInspector 的通用实现。数据可以存储为 List 或者 Array 对象。构建对象,需要提供元素的 ObjectInspector。

class StandardListObjectInspector implements ListObjectInspector {
   
   

  ObjectInspector listElementObjectInspector;
  
  /** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
   */
  protected StandardListObjectInspector(ObjectInspector listElementObjectInspector) {
   
   
    this.listElementObjectInspector = listElementObjectInspector;
  }

  public final Category getCategory() {
   
   
    return Category.LIST;
  }

  // without data
  public ObjectInspector getListElementObjectInspector() {
   
   
    return listElementObjectInspector;
  }
  
  // with data
  public Object getListElement(Object data, int index) {
   
   
    List<?> list = getList(data);
    if (list == null || index < 0 || index >= list.size()) {
   
   
      return null;
    }
    return list.get(index);
  }
  
  public int getListLength(Object data) {
   
   
    List<?> list = getList(data);
    if (list == null) return -1;
    return list.size();
  }
  
  public List<?> getList(Object data) {
   
   
    if (data == null) return null;
    if (data.getClass().isArray()) {
   
   
      data = java.util.Arrays.asList((Object[])data);
    }
    List<?> list = (List<?>) data;
    return list;
  }

  public String getTypeName() {
   
   
    return org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME 
        + "<" + listElementObjectInspector.getTypeName() + ">";
  }

}

MapObjectInspector

public interface MapObjectInspector extends ObjectInspector {
   
   

  // ** Methods that does not need a data object **
  // Map Type
  public ObjectInspector getMapKeyObjectInspector();

  public ObjectInspector getMapValueObjectInspector();

  // ** Methods that need a data object **
  // In this function, key has to be of the same structure as the Map expects.
  // Most cases key will be primitive type, so it's OK.
  // In rare cases that key is not primitive, the user is responsible for defining 
  // the hashCode() and equals() methods of the key class.
  public Object getMapValueElement(Object data, Object key);

  /** returns null for data = null.
   */
  public Map<?,?> getMap(Object data);

}
StandardMapObjectInspector

StandardMapObjectInspector 需要提供 key 和 value 的 ObjectInspector。

class StandardMapObjectInspector implements MapObjectInspector {
   
   

  ObjectInspector mapKeyObjectInspector;
  ObjectInspector mapValueObjectInspector;
  
  /** Call ObjectInspectorFactory.getStandardMapObjectInspector instead.
   */
  protected StandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
   
   
    this.mapKeyObjectInspector = mapKeyObjectInspector;
    this.mapValueObjectInspector = mapValueObjectInspector;
  }

  // without data
  public ObjectInspector getMapKeyObjectInspector() {
   
   
    return mapKeyObjectInspector;
  }
  public ObjectInspector getMapValueObjectInspector() {
   
   
    return mapValueObjectInspector;
  }

  // with data
  // TODO: Now we assume the key Object supports hashCode and equals functions.
  public Object getMapValueElement(Object data, Object key) {
   
   
    if (data == null || key == null) return null;
    Map<?,?> map = (Map<?,?>)data;
    return map.get(key);
  }
  int getMapSize(Object data) {
   
   
    if (data == null) return -1;
    Map<?,?> map = (Map<?,?>)data;
    return map.size();
  }
  public Map<?,?> getMap(Object data) {
   
   
    if (data == null) return null;
    Map<?,?> map = (Map<?,?>)data;
    return map;
  }

  public final Category getCategory() {
   
   
    return Category.MAP;
  }

  public String getTypeName() {
   
   
    return org.apache.hadoop.hive.serde.Constants.MAP_TYPE_NAME 
        + "<" + mapKeyObjectInspector.getTypeName() + "," 
        + mapValueObjectInspector.getTypeName() + ">";
  }
}

StructObjectInspector

public interface StructObjectInspector extends ObjectInspector {
   
   

  // ** Methods that does not need a data object **
  /** Returns all the fields. 
   */
  public List<? extends StructField> getAllStructFieldRefs();

  /** Look up a field.
   */
  public StructField getStructFieldRef(String fieldName);

  // ** Methods that need a data object **
  /** returns null for data = null.
   */
  public Object getStructFieldData(Object data, StructField fieldRef);

  /** returns null for data = null.
   */
  public List<Object> getStructFieldsDataAsList(Object data);
}
  • StructField
    StructField 提供字段名和该字段的 ObjectInspector。
/**
 * StructField is an empty interface.
 * 
 * Classes implementing this interface are considered to represent 
 * a field of a struct for this serde package.
 */
public interface StructField {
   
   

  /**
   * Get the name of the field.  The name should be always in lower-case.
   */
  String getFieldName();
  
  /**
   * Get the ObjectInspector for the field.
   */
  ObjectInspector getFieldObjectInspector();
  
}

StructObjectInspector 有四个子类,分别是 LazySimpleStructObjectInspector,

LazySimpleStructObjectInspector
public class LazySimpleStructObjectInspector implements StructObjectInspector {
   
   

  public static final Log LOG = LogFactory.getLog(LazySimpleStructObjectInspector.class.getName());
  
  protected static class MyField implements StructField {
   
   
    protected int fieldID;
    protected String fieldName;
    protected ObjectInspector fieldObjectInspector;
    
    public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
   
   
      this.fieldID = fieldID;
      this.fieldName = fieldName.toLowerCase();
      this.fieldObjectInspector = fieldObjectInspector;
    }

    public int getFieldID() {
   
   
      return fieldID;
    }
    public String getFieldName() {
   
   
      return fieldName;
    }
    public ObjectInspector getFieldObjectInspector() {
   
   
      return fieldObjectInspector;
    }
    
    public String toString() {
   
   
      return "" + fieldID + ":" + fieldName;
    }
  }
  
  protected List<MyField> fields;
  
  public String getTypeName() {
   
   
    return ObjectInspectorUtils.getStandardStructTypeName(this);
  }
  
  /** Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
   */
  protected LazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
   
   
    init(structFieldNames, structFieldObjectInspectors
评论
成就一亿技术人!
拼手气红包6.0元
还能输入1000个字符
 
红包 添加红包
表情包 插入表情
 条评论被折叠 查看
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值