对接hive
(1)将Hudi目录编译好的hudi-hadoop-mr-bundle-0.9.0.jar,复制到hive的lib下让hive支持hudi,需要重启hiveserver2服务,或者不加入也可,在执行hive sql 时add jar也可
先复制分发jar包到hiveserver2节点
[xxx@xxx target]# rsync -rvl hudi-hadoop-mr-bundle-0.9.0.jar xxx@xxx:/data/software/
如下图已经放置在hive下
编写测试数据
形成member.log日志文件
import com.alibaba.fastjson.JSONObject;
import java.io.FileNotFoundException;
import java.io.PrintStream;
import java.util.Properties;
import java.util.Random;
public class TestJson {
// public static void main(String[] args) throws FileNotFoundException {
// PrintStream mytxt=new PrintStream("member.log");
// PrintStream out=System.out;
// System.setOut(mytxt);
// Random random = new Random();
// for (int i = 0; i < 1000000; i++) {
// JSONObject model = new JSONObject();
// model.put("uid", i);
// model.put("fullname", "王" + i);
// model.put("ad_id", random.nextInt(9));
// model.put("iconurl", "-");
// model.put("dt", "20200918");
// model.put("dn", "WebA");
// model.put("uuid",i);
// System.out.println(model.toJSONString());
// }
// System.setOut(out);
// }
}
将上面构造的member.log放在hdfs的 /tmp/ods/member.log
pom.xml文件,为了将一些依赖引入打到包中
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.dxt</groupId>
<artifactId>sparkDataFrame</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-client</artifactId>
<scope>provided</scope>
<version>0.9.0</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-spark-bundle_2.11</artifactId>
<scope>provided</scope>
<version>0.9.0</version>
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
<artifactId>hudi-hadoop-mr-bundle</artifactId>
<exclusions>
<exclusion>
<groupId>jackson-databind</groupId>
<artifactId>com.fasterxml.jackson.core</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
</exclusion>
</exclusions>
<version>0.9.0</version>
<scope>provided</scope>
</dependency>
<!-- <!– Spark的依赖引入 –>-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_2.11</artifactId>
<scope>provided</scope>
<version>2.4.4</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.scala-lang</groupId>-->
<!-- <artifactId>scala-library</artifactId>-