背景:
在hive中编写UDF时,我们可能会加载外部的资源文件,可以将资源打到Jar中 ,将资源放在hdfs上,在使用UDF时通过参数就可以加载到。
代码
public class TransformCodeToCode extends GenericUDF {
private MapredContext context;
private Configuration conf;
private FileSystem fileSystem;
private boolean parseFlag = false;
private Map<String,String> codeToCodeMap = new HashMap();
@Override
public void configure(MapredContext context) {
//通过context 我们可以获取到外部传入的配置即:-hiveconf 传入的变量值
this.context = context;
}
//基本类型(Primitive),集合(List),键值对映射(Map),结构体(Struct),联合体(Union)。
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
//校验参数长度是否为2
if (objectInspectors == null || objectInspectors.length != 2) {
throw new IllegalArgumentException("The function transform accepts 2 parameters.");
}
//校验参数是否为PRIMITIVE类型
ObjectInspector inspectorOne = objectInspectors[0];
if (!ObjectInspector.Category.PRIMITIVE.equals(inspectorOne.getCategory())) {
throw new UDFArgumentException("Desired parameter type:PRIMITIVE, actual parameter type:" + inspectorOne.getTypeName());
}
ObjectInspector inspectorTwo = objectInspectors[1];
if (!ObjectInspector.Category.PRIMITIVE.equals(inspectorTwo.getCategory())) {
throw new UDFArgumentException("Desired parameter type:PRIMITIVE, actual parameter type:" + inspectorTwo.getTypeName());
}
return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
}
@Override
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
String fieldValue = String.valueOf(deferredObjects[0].get());
String filename = String.valueOf(deferredObjects[1].get());
FSDataInputStream open = null;
BufferedReader bf = null;
String returnStr;
try {
//如果解析过,先从map中获取
if(parseFlag){
returnStr = codeToCodeMap.get(fieldValue);
if(returnStr != null){
return returnStr;
}else {
return fieldValue;
}
}
//初始化conf
if(conf == null){
if (context != null) {
conf = context.getJobConf();
} else {
conf = SessionState.get().getConf();
}
}
//读取hdfs上的文件
String filePath = "/data/resource/" + filename + ".txt";
fileSystem = FileSystem.get(conf);
open = fileSystem.open(new Path(filePath));
bf = new BufferedReader(new InputStreamReader(open));
String lineStr;
String frontStr;
String laterStr = "";
//遍历文件内容,放入map
while ((lineStr = bf.readLine()) != null) {
String[] strArr = lineStr.split("\\|\\^\\|");
int length = strArr.length;
if(length < 1){
continue;
}
frontStr = strArr[0];
if(length >= 3){
laterStr = strArr[2];
}
codeToCodeMap.put(frontStr, laterStr);
}
parseFlag = true;
returnStr = codeToCodeMap.get(fieldValue);
if(returnStr != null){
return returnStr;
}else {
return fieldValue;
}
} catch (Exception e) {
throw new HiveException("reader hdfs file Exception" + e.getMessage());
} finally {
try {
if (open != null) {
open .close();
}
if (bf != null) {
bf.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public void close() throws IOException {
fileSystem.close();
}
@Override
public String getDisplayString(String[] strings) {
return "TransformCodeToCode(" + strings[0] + "," + strings[1] + ")";
}
}