报错详情:
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.hive.ql.exec.Utilities.copyTableJobPropertiesToConf(Lorg/apache/hadoop/hive/ql/plan/TableDesc;Lorg/apache/hadoop/conf/Configuration;)V
at org.apache.spark.sql.hive.HadoopTableReader$.initializeLocalJobConfFunc(TableReader.scala:349)
at org.apache.spark.sql.hive.HadoopTableReader$$anonfun$12.apply(TableReader.scala:300)
at org.apache.spark.sql.hive.HadoopTableReader$$anonfun$12.apply(TableReader.scala:300)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$5$$anonfun$apply$3.apply(HadoopRDD.scala:180)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$5$$anonfun$apply$3.apply(HadoopRDD.scala:180)
at scala.Option.foreach(Option.scala:257)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$5.apply(HadoopRDD.scala:180)
at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$5.apply(HadoopRDD.scala:177)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:171)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:200)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:251)
at scala.Option.getOrElse(Option.scala:121)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:251)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:253)
产生原因:spark与hive版本不匹配
分析过程:
根据报错堆栈找到对应代码:
def initializeLocalJobConfFunc(path: String, tableDesc: TableDesc)(jobConf: JobConf) {
FileInputFormat.setInputPaths(jobConf, Seq[Path](new Path(path)): _*)
if (tableDesc != null) {
HiveTableUtil.configureJobPropertiesForStorageHandler(tableDesc, jobConf, true)
Utilities.copyTableJobPropertiesToConf(tableDesc, jobConf)
}
val bufferSize = System.getProperty("spark.buffer.size", "65536")
jobConf.set("io.file.buffer.size", bufferSize)
}
public static void copyTableJobPropertiesToConf(TableDesc tbl, JobConf job) throws HiveException {
Properties tblProperties = tbl.getProperties();
Iterator var3 = tblProperties.stringPropertyNames().iterator();
while(var3.hasNext()) {
String name = (String)var3.next();
if (job.get(name) == null) {
String val = (String)tblProperties.get(name);
if (val != null) {
job.set(name, StringEscapeUtils.escapeJava(val));
}
}
}
Map<String, String> jobProperties = tbl.getJobProperties();
if (jobProperties != null) {
Iterator var9 = jobProperties.entrySet().iterator();
while(var9.hasNext()) {
Entry<String, String> entry = (Entry)var9.next();
job.set((String)entry.getKey(), (String)entry.getValue());
}
}
try {
Map<String, String> jobSecrets = tbl.getJobSecrets();
if (jobSecrets != null) {
Iterator var12 = jobSecrets.entrySet().iterator();
while(var12.hasNext()) {
Entry<String, String> entry = (Entry)var12.next();
job.getCredentials().addSecretKey(new Text((String)entry.getKey()), ((String)entry.getValue()).getBytes());
UserGroupInformation.getCurrentUser().getCredentials().addSecretKey(new Text((String)entry.getKey()), ((String)entry.getValue()).getBytes());
}
}
} catch (IOException var7) {
throw new HiveException(var7);
}
}
报错信息显示第二个方法入参是Configuration类型,从源码中可以看出第二个方法入参明明是JobConf,怎么变成Configuration了。JobConf是Configuration子类,这里已经很明显入参类型被转换成父类,导致方法签名不一致抛出异常,但源码中是没有转换,传入的是JobConf类型,问题是出在编译后的代码自动转为父类了,反编译class文件查看
产生原因是该spark jar包与低版本hive一起编译,导致编译的class文件入参类型JobConf转为父类了,查看低版本hive方法如下:
public static void copyTableJobPropertiesToConf(TableDesc tbl, Configuration job) {
Properties tblProperties = tbl.getProperties();
for(String name: tblProperties.stringPropertyNames()) {
if (job.get(name) == null) {
String val = (String) tblProperties.get(name);
if (val != null) {
job.set(name, StringEscapeUtils.escapeJava(val));
}
}
}
Map<String, String> jobProperties = tbl.getJobProperties();
if (jobProperties == null) {
return;
}
for (Map.Entry<String, String> entry : jobProperties.entrySet()) {
job.set(entry.getKey(), entry.getValue());
}
}