ObjectInspector
ObjectInspector 帮助我们探查复杂对象的内部结构。一个 ObjectInspector 实例代表了一个具体的类型,和存储这种类型的数据在内存中的一个特定方法。
使用 ObjectInspectorFactory 创建 ObjectInspector 对象,因为可以可以保证相同的 ObjectInspector 仅有一个实例。
public interface ObjectInspector {
public static enum Category {
PRIMITIVE, LIST, MAP, STRUCT
};
/**
* Returns the name of the data type that is inspected by this ObjectInspector.
* This is used to display the type information to the user.
*
* For primitive types, the type name is standardized.
* For other types, the type name can be something like "list<int>", "map<int,string>",
* java class names, or user-defined type names similar to typedef.
*/
public String getTypeName();
/**
* An ObjectInspector must inherit from one of the following interfaces
* if getCategory() returns:
* PRIMITIVE: PrimitiveObjectInspector
* LIST: ListObjectInspector
* MAP: MapObjectInspector
* STRUCT: StructObjectInspector
*/
public Category getCategory();
}
PrimitiveObjectInspector
public interface PrimitiveObjectInspector extends ObjectInspector{
public Class<?> getPrimitiveClass();
}
StandardPrimitiveObjectInspector
StandardPrimitiveObjectInspector 提供了一种通用的处理各种 PrimitiveObjectInspector 的方法。他的构造方法的参数里需要 primitiveClass。通过 ObjectInspectorUtils,为各种 各种 PrimitiveObjectInspector 提供统一接口。
class StandardPrimitiveObjectInspector implements PrimitiveObjectInspector {
Class<?> primitiveClass;
/** Call ObjectInspectorFactory.getStandardPrimitiveObjectInspector instead.
*/
protected StandardPrimitiveObjectInspector(Class<?> primitiveClass) {
this.primitiveClass = primitiveClass;
}
public Class<?> getPrimitiveClass() {
return primitiveClass;
}
public final Category getCategory() {
return Category.PRIMITIVE;
}
public String getTypeName() {
return ObjectInspectorUtils.getClassShortName(primitiveClass);
}
}
ListObjectInspector
public interface ListObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
public ObjectInspector getListElementObjectInspector();
// ** Methods that need a data object **
/** returns null for null list, out-of-the-range index.
*/
public Object getListElement(Object data, int index);
/** returns -1 for data = null.
*/
public int getListLength(Object data);
/** returns null for data = null.
*/
public List<?> getList(Object data);
}
StandardListObjectInspector
StandardListObjectInspector 是 ListObjectInspector 的通用实现。数据可以存储为 List 或者 Array 对象。构建对象,需要提供元素的 ObjectInspector。
class StandardListObjectInspector implements ListObjectInspector {
ObjectInspector listElementObjectInspector;
/** Call ObjectInspectorFactory.getStandardListObjectInspector instead.
*/
protected StandardListObjectInspector(ObjectInspector listElementObjectInspector) {
this.listElementObjectInspector = listElementObjectInspector;
}
public final Category getCategory() {
return Category.LIST;
}
// without data
public ObjectInspector getListElementObjectInspector() {
return listElementObjectInspector;
}
// with data
public Object getListElement(Object data, int index) {
List<?> list = getList(data);
if (list == null || index < 0 || index >= list.size()) {
return null;
}
return list.get(index);
}
public int getListLength(Object data) {
List<?> list = getList(data);
if (list == null) return -1;
return list.size();
}
public List<?> getList(Object data) {
if (data == null) return null;
if (data.getClass().isArray()) {
data = java.util.Arrays.asList((Object[])data);
}
List<?> list = (List<?>) data;
return list;
}
public String getTypeName() {
return org.apache.hadoop.hive.serde.Constants.LIST_TYPE_NAME
+ "<" + listElementObjectInspector.getTypeName() + ">";
}
}
MapObjectInspector
public interface MapObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
// Map Type
public ObjectInspector getMapKeyObjectInspector();
public ObjectInspector getMapValueObjectInspector();
// ** Methods that need a data object **
// In this function, key has to be of the same structure as the Map expects.
// Most cases key will be primitive type, so it's OK.
// In rare cases that key is not primitive, the user is responsible for defining
// the hashCode() and equals() methods of the key class.
public Object getMapValueElement(Object data, Object key);
/** returns null for data = null.
*/
public Map<?,?> getMap(Object data);
}
StandardMapObjectInspector
StandardMapObjectInspector 需要提供 key 和 value 的 ObjectInspector。
class StandardMapObjectInspector implements MapObjectInspector {
ObjectInspector mapKeyObjectInspector;
ObjectInspector mapValueObjectInspector;
/** Call ObjectInspectorFactory.getStandardMapObjectInspector instead.
*/
protected StandardMapObjectInspector(ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
this.mapKeyObjectInspector = mapKeyObjectInspector;
this.mapValueObjectInspector = mapValueObjectInspector;
}
// without data
public ObjectInspector getMapKeyObjectInspector() {
return mapKeyObjectInspector;
}
public ObjectInspector getMapValueObjectInspector() {
return mapValueObjectInspector;
}
// with data
// TODO: Now we assume the key Object supports hashCode and equals functions.
public Object getMapValueElement(Object data, Object key) {
if (data == null || key == null) return null;
Map<?,?> map = (Map<?,?>)data;
return map.get(key);
}
int getMapSize(Object data) {
if (data == null) return -1;
Map<?,?> map = (Map<?,?>)data;
return map.size();
}
public Map<?,?> getMap(Object data) {
if (data == null) return null;
Map<?,?> map = (Map<?,?>)data;
return map;
}
public final Category getCategory() {
return Category.MAP;
}
public String getTypeName() {
return org.apache.hadoop.hive.serde.Constants.MAP_TYPE_NAME
+ "<" + mapKeyObjectInspector.getTypeName() + ","
+ mapValueObjectInspector.getTypeName() + ">";
}
}
StructObjectInspector
public interface StructObjectInspector extends ObjectInspector {
// ** Methods that does not need a data object **
/** Returns all the fields.
*/
public List<? extends StructField> getAllStructFieldRefs();
/** Look up a field.
*/
public StructField getStructFieldRef(String fieldName);
// ** Methods that need a data object **
/** returns null for data = null.
*/
public Object getStructFieldData(Object data, StructField fieldRef);
/** returns null for data = null.
*/
public List<Object> getStructFieldsDataAsList(Object data);
}
- StructField
StructField 提供字段名和该字段的 ObjectInspector。
/**
* StructField is an empty interface.
*
* Classes implementing this interface are considered to represent
* a field of a struct for this serde package.
*/
public interface StructField {
/**
* Get the name of the field. The name should be always in lower-case.
*/
String getFieldName();
/**
* Get the ObjectInspector for the field.
*/
ObjectInspector getFieldObjectInspector();
}
StructObjectInspector 有四个子类,分别是 LazySimpleStructObjectInspector,
LazySimpleStructObjectInspector
public class LazySimpleStructObjectInspector implements StructObjectInspector {
public static final Log LOG = LogFactory.getLog(LazySimpleStructObjectInspector.class.getName());
protected static class MyField implements StructField {
protected int fieldID;
protected String fieldName;
protected ObjectInspector fieldObjectInspector;
public MyField(int fieldID, String fieldName, ObjectInspector fieldObjectInspector) {
this.fieldID = fieldID;
this.fieldName = fieldName.toLowerCase();
this.fieldObjectInspector = fieldObjectInspector;
}
public int getFieldID() {
return fieldID;
}
public String getFieldName() {
return fieldName;
}
public ObjectInspector getFieldObjectInspector() {
return fieldObjectInspector;
}
public String toString() {
return "" + fieldID + ":" + fieldName;
}
}
protected List<MyField> fields;
public String getTypeName() {
return ObjectInspectorUtils.getStandardStructTypeName(this);
}
/** Call ObjectInspectorFactory.getLazySimpleStructObjectInspector instead.
*/
protected LazySimpleStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
init(structFieldNames, structFieldObjectInspectors

本文介绍了Hive中ObjectInspector的概念及其在不同类型中的应用。ObjectInspector作为元数据接口,用于探查复杂对象结构,支持Primitive、List、Map及Struct等多种类型。文章详细讲解了不同类型的ObjectInspector实现原理,并展示了如何通过ObjectInspectorFactory创建实例。
最低0.47元/天 解锁文章
189

被折叠的 条评论
为什么被折叠?



