自定义实现函数
java编码要求
pom.xml 和类的代码都在最下面
(1)继承于UDF类
(2)方法规定
-a、Implement one or more methods named evaluate
实现至少一个evaluate方法
-b、evaluate should never be a void method.
However it can return null if needed.
evaluate方法的不能是一个void (无返回值)方法。如果一定没有返回值,可以是返回null
-c、方法的参数和返回值的类型:Java类型或者Hadoop类型。
推荐使用Hadoop类型。
(3)打成jar包
mvn package
linux
(1)打成jar包在hive命令行下使用下列命令
add jar /opt/cdh5.7.6/hive-1.1.0-cdh5.7.6/hadoop-1.0-SNAPSHOT.jar
(2)构成函数(指定数据库,只在该数据库中有效)
CREATE FUNCTION db_hive.removeQuato AS 'com.huadian.hive.udf.RemoveQuato'
create function 数据库名.方法名 as '类的reference路径'
(3)测试
show functions
构成函数(步骤2) 时没写数据库名,方法也会自动加上数据库名
用法
SELECT
removeQuato(ip) AS ip,
FROM
tb_ip ;
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.huadian.bigdata</groupId>
<artifactId>hadoop</artifactId>
<version>1.0-SNAPSHOT</version>
<repositories>
<repository>
<id>aliyun</id>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</repository>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
<repository>
<id>jboss</id>
<url>http://repository.jboss.com/nexus/content/groups/public</url>
</repository>
</repositories>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>1.7</maven.compiler.source>
<maven.compiler.target>1.7</maven.compiler.target>
<hadoop.version>2.7.3</hadoop.version>
<hive.version>1.2.1</hive.version>
</properties>
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!-- Hive Client -->
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-service</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>${hive.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-jdbc</artifactId>
<version>${hive.version}</version>
</dependency>
</dependencies>
<build>
<pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) -->
<plugins>
<plugin>
<artifactId>maven-clean-plugin</artifactId>
<version>3.0.0</version>
</plugin>
<!-- see http://maven.apache.org/ref/current/maven-core/default-bindings.html#Plugin_bindings_for_jar_packaging -->
<plugin>
<artifactId>maven-resources-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.7.0</version>
</plugin>
<plugin>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.20.1</version>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.0.2</version>
</plugin>
<plugin>
<artifactId>maven-install-plugin</artifactId>
<version>2.5.2</version>
</plugin>
<plugin>
<artifactId>maven-deploy-plugin</artifactId>
<version>2.8.2</version>
</plugin>
</plugins>
</pluginManagement>
</build>
</project>
CountToolsUDF类
package com.huadian.hive.udf;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
//继承udf
public class RemoveQuato extends UDF {
//实现evaluate方法,输入返回值类型hadoop
public Text evaluate(Text votetools){
String value =votetools.toString();
//输入是否为空
if (StringUtils.isBlank(value)){
return new Text("");
}
//去除2段空格和替换 引号 "
String replace = value.trim().replace("\"", "");
return new Text(replace);
}
}