Teradata SQL programming

最新推荐文章于 2021-02-15 11:42:08 发布

light50000

最新推荐文章于 2021-02-15 11:42:08 发布

阅读量1k

点赞数 1

Teradata的SQL设计和Oracle真不是一个水平, 一点美感的没有.
上个世纪它靠着MPP一招鲜吃变天, 居然做了十多年数据仓库的老大, 时过境迁, 现在有不少SQL On Hadoop 产品已经出来了, 考虑到scale out的成本和能力, Teradata 数据仓库优势荡然全无. 将来必将会被SQL on Hadoop/Spark替代.

毕竟在Teradata上做了几年, 也该写点总结. 下面是我常用的一些编程知识

–字符串函数
SELECT ‘FirstName’ || ’ ’ || ‘LastName’ as Full_Name;
CHAR2HEXINT (‘A’)
–would result in the value ‘0041’.
LOWER()
substr()
TRIM()
UPPER()
CHARACTERS() 或 character_length() 得到字符串的长度
SELECT position (‘a’ in ‘Name’)
oracle version replace(), no way. write your code

—type() 可返回字段的类型,
SELECT
CAST(100.00 AS DECIMAL(15,2)) * CAST(100.00 AS DECIMAL(15,2)) AS C1
, TYPE(C1)

–获取字段类型
select type(‘abc’)

–get current date
select current_date
select current_time
select current_timestamp(0) --不带毫秒
select current_timestamp --带6位毫秒, 最高精度

–日期加减
select current_date+1 --得到明天
select add_months(current_timestamp,1) --得到下一个月
select add_months(current_date ,1)-current_date --两个日期相差多少天
select add_months(current_timestamp,1) -current_timestamp day(4) to second --两个时间戳相减, 仍是时间戳

–使用INTERVAL进行时间日期的增减
Select current_date + interval ‘1’ year
Select current_date - interval ‘1’ year
Select current_date + interval ‘1’ month
Select current_date + interval ‘1’ day
Select current_timestamp + interval ‘1’ hour
Select current_timestamp + interval ‘1’ minute
Select current_timestamp + interval ‘1’ second

–两个 timestamp 相减, 结果仍是一个timestamp, 而日期相减, 结果为相差的天数
select cast (endtime as timestamp(0) format ‘yyyy-mm-ddbhh:mi:ss’) -
cast (starttime as timestamp(0) format ‘yyyy-mm-ddbhh:mi:ss’) DAY(4) TO SECOND ,
a. * from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus=‘Done’;
The DAY(4) specifies four digits of precision, and allows for a maximum of 9999 days, or
approximately 27 years.

– timestamp 相减, 前3个写法, 要求时间跨度不能太大, 会溢出的
select ((current_timestamp + interval ‘1’ hour ) -current_timestamp) day(4) --得到的差多少天
select ((current_timestamp + interval ‘1’ hour ) -current_timestamp) hour(4) --得到的差多少小时
select ((current_timestamp + interval ‘25’ hour ) -current_timestamp) minute(4) --得到的差多少分
select ((current_timestamp + interval ‘1’ hour ) -current_timestamp) day(4) to second --得到的是timestamp

–统计执行总时长
select txdate
,sum(extract(DAY from duration)) * 24.000
+sum(extract(HOUR from duration)) *1.000
+sum(extract(MINUTE from duration)) /60.000
+sum(extract(SECOND from duration)) /3600.000
as duration_hours from
(
select cast (endtime as timestamp(0) format ‘yyyy-mm-ddbhh:mi:ss’) -
cast (starttime as timestamp(0) format ‘yyyy-mm-ddbhh:mi:ss’) day(4) TO SECOND duration,
txdate from PETL.ETL_JOB_STATUS A
where 1=1
and jobstatus=‘Done’
and A.txdate>=date’2012-06-01’
and A.txdate<date’2012-07-19’
) xx
group by xx.txdate
order by xx.txdate
;

–从日期中提取年月日
select EXTRACT(YEAR FROM current_date)
YEAR
MONTH
DAY
HOUR
MINUTE
SECOND
TIMEZONE_HOUR
TIMEZONE_MINUTE

–几个有用的查询
select user
select session
select role
select * from sys_calendar.calendar

–字符转日期
select date’2012-05-12’
select CAST(‘20120512’ AS DATE FORMAT ‘YYYYMMDD’)

–字符转 timestamp
select cast (‘20120512 231056’ as timestamp(0) FORMAT ‘YYYYMMDDBHHMISS’ )

–日期转字符串
–recommended usage, statement 1
SELECT CAST(cast(current_date AS FORMAT ‘yyyymmdd’) as varchar(8))
select current_date (format ‘YYYYMMDD’) (varchar(8))

–timestamp 转字符串
–recommended usage, statement 1
SELECT CAST(CAST(TIMESTAMP’2010-03-12 14:32:45’ AS FORMAT ‘yyyymmddbhh:mi:ssbt’) AS varchar(20));
select current_timestamp (format ‘YYYYMMDDBHHMISS’) (varchar(15))
SELECT CAST(TIMESTAMP’2010-03-12 14:32:45’ AS FORMAT ‘yyyymmddbhh:mi:ssbt’) (varchar(20));

–error when runing
select current_date (format ‘YYYYMMDD’) (varchar(8))||‘abc’
–how to achieve
select CAST(cast(current_date AS FORMAT ‘yyyymmdd’) as varchar(8))||‘abc’

–合并日期和时间为 timestamp
SELECT CAST(CAST(CURRENT_DATE AS FORMAT ‘YYYY-MM-DD’) || ’ ’ || CAST(CAST(CURRENT_TIME AS FORMAT ‘HH:MI:SS’) AS CHAR(8)) AS TIMESTAMP(0));

–searched case 语句
case
when AA=v1 then r1
when AA=v2 then r2
else null
end
–value case 语句
case AA
when v1 then r1
when v2 then r2
else null
end

–case 变种 NULLIF
NULLIF returns NULL if its arguments are equal. Otherwise, it returns its first argument,
scalar_expression_1.
–case 变种 COALESCE
COALESCE returns NULL if all its arguments evaluate to null. Otherwise, it returns the value
of the first non-null argument in the scalar_expression list.
oracle:nvl(f1,f2…),teradata:coalesce(f1,f2…)

=====================================
建表

–创建 MULTISET 表
MULTISET：默认为 SET；
NO LOG：默认为 LOG，LOG指示维护交易日志，NO LOG 的性能更好；

create MULTISET table t1
(f1 integer, f2 integer) PRIMARY INDEX ( f1 ) ;
;

–创建临时表
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS PDATA.EQP_PERF_HIS
WITH NO DATA
PRIMARY index (fab_code)
;
CREATE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
–WITH DATA
PRIMARY index (fab_code)
;
真正的临时表 volatile , session 结束后自动drop, 也可以手动删除
CREATE VOLATILE MULTISET TABLE PDATA.EQP_PERF_HIS_SS1_CUR_I AS (select * from PDATA.EQP_PERF_HIS)
WITH NO DATA
–WITH DATA
PRIMARY index (fab_code)

还有一种 GLOBAL TEMPORARY TABLE, 不常用.

大小写敏感
–default, 大小写不敏感
select f from (select ‘a’ f ) dual_a where 1=1 and f = ‘A’
select f from (select ‘a’ f ) dual_a where 1=1 and f like ‘%A%’
–大小写敏感的写法
select f from (select ‘a’ f ) dual_a where 1=1 and f(casespecific) like ‘%A%’

select * from scott.emp where ename(CASESPECIFIC) = ‘FAN’;–使用关键字CASESPECIFIC将区分大小写

转义 _ 字符, 下例是使用\取转义_
select * from like_escape_test where some_text like ‘%_%’ escape ‘’;

top n 语句
select top 10 * from table_a order by field_a;

UPDATE Specifying a Correlation Name for the Updated Table in the FROM Clause
需要说明的是, SET 子句中的目标字段不能加表名alias.
UPDATE e
FROM employee AS e, department AS d
SET salary = salary * 1.05
WHERE e.emp_no = d.emp_no
AND d.name LIKE ‘%Support%’

update join 语句
UPDATE employee
SET salary_amount=salary_amount * 1.10
WHERE employee.dep_no = department.dep_no
AND department.name LIKE ‘%Support%’
;

Note: In an update, you can’t use the ON clause,
so the join condition is specified in the WHERE clause.

在SP中, 可以使用变量, 但在Macro中, 是不能使用变量. 声明变量必须放在SP的开头部分. 语法为:
DECLARE vcount INTEGER DEFAULT 0;
DECLARE temp1, par1 VARCHAR(40) DEFAULT NULL;

Teradata没有 oracle的打印功能, 下面的语句并不能输出
PRINT ‘EmpNo:’, vcount;

使用游标 cursor
FOR loopvar AS cur1 CURSOR FOR
SELECT employee_number, department_number FROM employee
DO
PRINT ‘EmpNo:’, loopvar.employee_number;
END FOR;
上面的例子中示范了游标的规则：
1.声明游标，需要使用 FOR 语句。
2.要赋予游标一个名字，例子中的名字为 cur1
3.要给循环赋一个名字 loopvar

定义SP
REPLACE PROCEDURE PDATA.SP_WAT_PARAMETERS()
BEGIN
END;

定义Macro
REPLACE MACRO MARTVIEW_KPI.EIS_INDEX_RESULT_MACRO(SQL_Date VARCHAR(20))
AS
(
);

– 动态执行sql语句
–===========================
SET Sql_text = ‘DELETE FROM temp_Table’ ;
CALL DBC.SYSEXECSQL(:Sql_text) ;

=====================================
– bteq 命令行工具

bteq工具的调用方法是:
bteq <sql_file.btq >log.txt

下面是sql_file.btq文件的内容
.LOGON ip/userid,pwd

drop table SMCTA.SS1SDBSDB_TB_INFO_EQP4687 ;
;
.IF ERRORCODE <> 0 THEN .GOTO QUITWITHERROR;

SELECT 1 ;

.IF ACTIVITYCOUNT = 0 THEN .GOTO QUITWITHNOERROR;
.GOTO QUITWITHNOERROR;

.LABEL QUITWITHERROR
.LOGOFF;
.QUIT 12;

.LABEL QUITWITHNOERROR
.LOGOFF;
.QUIT 0;

.LOGOFF
上面是sql_file.btq文件的内容

=====================================
– jdbc访问

JDBC connection string:
url=“jdbc:teradata://edwprd/TMODE=TERA,CHARSET=ASCII,CLIENT_CHARSET=cp936,DATABASE=TESTDB,lob_support=off”
edwprd为IP, 考虑到Teradata是多节点一体机, 最好是在hosts中, 定义一个域名解析规则, 这样就有了多节点冗余的功能.

Hosts:
153.65.129.189 edwprd dbccop1
153.65.129.190 edwprd dbccop2
153.65.129.191 edwprd dbccop3
153.65.129.192 edwprd dbccop4