Hive FieldSchema的核心属性。
public class FieldSchema {
private String name; // required
private String type; // required
private String comment; // required
}
HiveCatalog获取
public CatalogBaseTable getTable(ObjectPath tablePath)
throws TableNotExistException, CatalogException {
checkNotNull(tablePath, "tablePath cannot be null");
// 获取hiveTable 此Table中字段类型是字符串存储的。
Table hiveTable = getHiveTable(tablePath);
return instantiateCatalogTable(hiveTable, hiveConf);
}
instantiateCatalogTable 将HiveTable转化为Flink CatalogBaseTable
private CatalogBaseTable instantiateCatalogTable(Table hiveTable, HiveConf hiveConf) {
List<FieldSchema> fields = getNonPartitionFields(hiveConf, hiveTable);
tableSchema =
HiveTableUtil.createTableSchema(
fields,
hiveTable.getPartitionKeys(),
notNullColumns,
primaryKey.orElse(null));
}
HiveTableUtil
public static TableSchema createTableSchema(
List<FieldSchema> cols,
List<FieldSchema> partitionKeys,
Set<String> notNullColumns,
UniqueConstraint primaryKey) {
List<FieldSchema> allCols = new ArrayList<>(cols);
allCols.addAll(partitionKeys);
String[] colNames = new String[allCols.size()];
DataType[] colTypes = new DataType[allCols.size()];
for (int i = 0; i < allCols.size(); i++) {
FieldSchema fs = allCols.get(i);
colNames[i] = fs.getName();
// 核心代码 将字符串的类型如"varchar(20)" 转化为DataTypes.VARCHAR(20);
colTypes[i] = HiveTypeUtil.toFlinkType(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
if (notNullColumns.contains(colNames[i])) {
colTypes[i] = colTypes[i].notNull();
}
}
TableSchema.Builder builder = TableSchema.builder().fields(colNames, colTypes);
if (primaryKey != null) {
builder.primaryKey(
primaryKey.getName(), primaryKey.getColumns().toArray(new String[0]));
}
return builder.build();
}
toFlinkType是将String的类型名称实际转化为DataType的工具。
/**
* Convert Hive data type to a Flink data type.
*
* @param hiveType a Hive data type
* @return the corresponding Flink data type
*/
public static DataType toFlinkType(TypeInfo hiveType) {
checkNotNull(hiveType, "hiveType cannot be null");
switch (hiveType.getCategory()) {
case PRIMITIVE:
return toFlinkPrimitiveType((PrimitiveTypeInfo) hiveType);
case LIST:
ListTypeInfo listTypeInfo = (ListTypeInfo) hiveType;
return DataTypes.ARRAY(toFlinkType(listTypeInfo.getListElementTypeInfo()));
case MAP:
MapTypeInfo mapTypeInfo = (MapTypeInfo) hiveType;
return DataTypes.MAP(
toFlinkType(mapTypeInfo.getMapKeyTypeInfo()),
toFlinkType(mapTypeInfo.getMapValueTypeInfo()));
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType;
List<String> names = structTypeInfo.getAllStructFieldNames();
List<TypeInfo> typeInfos = structTypeInfo.getAllStructFieldTypeInfos();
DataTypes.Field[] fields = new DataTypes.Field[names.size()];
for (int i = 0; i < fields.length; i++) {
fields[i] = DataTypes.FIELD(names.get(i), toFlinkType(typeInfos.get(i)));
}
return DataTypes.ROW(fields);
default:
throw new UnsupportedOperationException(
String.format("Flink doesn't support Hive data type %s yet.", hiveType));
}
}
toFlinkPrimitiveType
简单看一下就是switch ,且并没有对DataType进行bridegeTo操作。使用的是默认内置类型。
private static DataType toFlinkPrimitiveType(PrimitiveTypeInfo hiveType) {
checkNotNull(hiveType, "hiveType cannot be null");
switch (hiveType.getPrimitiveCategory()) {
case CHAR:
return DataTypes.CHAR(((CharTypeInfo) hiveType).getLength());
case VARCHAR:
return DataTypes.VARCHAR(((VarcharTypeInfo) hiveType).getLength());
case STRING:
return DataTypes.STRING();
case BOOLEAN:
return DataTypes.BOOLEAN();
case BYTE:
return DataTypes.TINYINT();
case SHORT:
return DataTypes.SMALLINT();
case INT:
return DataTypes.INT();
case LONG:
return DataTypes.BIGINT();
case FLOAT:
return DataTypes.FLOAT();
case DOUBLE:
return DataTypes.DOUBLE();
case DATE:
return DataTypes.DATE();
case TIMESTAMP:
return DataTypes.TIMESTAMP(9);
case BINARY:
return DataTypes.BYTES();
case DECIMAL:
DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) hiveType;
return DataTypes.DECIMAL(
decimalTypeInfo.getPrecision(), decimalTypeInfo.getScale());
default:
throw new UnsupportedOperationException(
String.format(
"Flink doesn't support Hive primitive type %s yet", hiveType));
}
}