背景
- MySQL数据同步到HIVE时,要写HIVE的建表语句
数据类型全写STRING不够好,对此写个自动转换程序
转换后不一定是最终的,仍可能要微调
- 开发语言:Python3
MySQL列数据类型转成HIVE的
from re import fullmatch
def column_type_mysql2hive(mysql_column_type):
"""MySQL列数据类型转成HIVE的"""
if fullmatch('^tinyint.+unsigned', mysql_column_type):
return 'SMALLINT'
elif fullmatch('^tinyint.*', mysql_column_type):
return 'TINYINT'
elif fullmatch('^smallint.+unsigned', mysql_column_type):
return 'INT'
elif fullmatch('^smallint.*', mysql_column_type):
return 'SMALLINT'
elif fullmatch('^mediumint.*', mysql_column_type):
return 'INT'
elif fullmatch('^int.+unsigned', mysql_column_type):
return 'BIGINT'
elif fullmatch('^int.*', mysql_column_type):
return 'INT'
elif fullmatch('^bigint.+unsigned', mysql_column_type):
return 'BIGINT'
elif fullmatch('^bigint.*', mysql_column_type):
return 'BIGINT'
elif fullmatch('^double.*', mysql_column_type):
return 'DOUBLE'
elif fullmatch('^float.*', mysql_column_type):
return 'FLOAT'
elif fullmatch(r'^decimal.*', mysql_column_type):
return mysql_column_type.replace(' unsigned', '').upper()
else:
return 'STRING'
| MySQL |
HIVE |
| tinyint |
TINYINT |
| tinyint unsigned |
SMALLINT |
| smallint |
SMALLINT |
| smallint unsigned |
INT |
| mediumint 和 mediumint unsigned |
INT |
| int |
INT |
| int unsigned |
BIGINT |
| bigint |
BIGINT |
| bigint unsigned |
BIGINT 或 STRING |
| decimal |
DECIMAL |
| float |
FLOAT |
| double |
DOUBLE |
- 按上述映射关系转,不在上述表格的数据类型统一转
STRING
- 注意:
1、无符号BIGINT转HIVE的BIGINT可能会越界;如果越界,就考虑转STRING或DECIMAL
2、建议bit类型数据,用bin函数转成字符串,再导入到HDFS
自动生成MySQL表对应HIVE建表语句完整代码
from re import fullmatch
from pymysql import Connection
SQL_COLUMNS = '''
SELECT
`COLUMN_NAME` -- 列名
,`COLUMN_TYPE` -- 类型
,`COLUMN_COMMENT` -- 列注释
FROM `information_schema`.`COLUMNS`
WHERE `TABLE_SCHEMA`='{TABLE_SCHEMA}'
AND `TABLE_NAME`='{TABLE_NAME}'
ORDER BY `ORDINAL_POSITION`;
'''.strip().format
SQL_COMMENT = '''
SELECT `TABLE_COMMENT`
FROM `information_schema`.`TABLES`
WHERE `TABLE_SCHEMA`='{TABLE_SCHEMA}'
AND `TABLE_NAME`='{TABLE_NAME}';
'''.strip().format
HIVE_PREFIX = 'ods_'
HIVE_DDL = '''
CREATE EXTERNAL TABLE `{table}`(
{columns}
) COMMENT '{table_comment}'
PARTITIONED BY (`ymd` STRING COMMENT '年月日');
'''.strip().format
MYSQL_DDL = "SHOW CREATE TABLE `{TABLE_SCHEMA}`.`{TABLE_NAME}`".format
def column_type_mysql2hive(mysql_column_type):
"""MySQL列数据类型转成HIVE的"""
if fullmatch('^tinyint.+unsigned', mysql_column_type):
return 'SMALLINT'
elif fullmatch('^tinyint.*', mysql_column_type):
return 'TINYINT'
elif fullmatch('^smallint.+unsigned', mysql_column_type):
return 'INT'
elif fullmatch('^smallint.*', mysql_column_type)