为了将历史数据分区,创建ODS2
生成分区字段,后面会根据分区字段往hdfs中写数据
#!/usr/bin/python
# -*- coding:utf-8 -*-
import os
import pymysql
import pymssql
import re
import sys
hosts = "2.3.4.5."
username = "ronex"
密码= "ronex"
database = "xx"
env = 'pro'
schema = 'pro_ods'
source = 'nc_pro'
'''table_list = ['purchase_invoice_header','purchase_invoice_line','purchase_order_header','purchase_order_line',
'purchase_request_header','purchase_request_line','purchasein_bill_header','purchasein_bill_line',
'receive_order_header','receive_order_line','saleout_bill_header','saleout_bill_line',
'sales_invoice_header','sales_invoice_line','sales_order_header','sales_order_line',
'delivery_order_header','delivery_order_line','payment_order_header','payment_order_line']
'''
table_list = ['ic_saleout_b','ic_saleout_h']
#table_list = sys.argv[1].split(",")
def get_table_info(table,schema = schema,ispartition = True):
'''
#table = 为表名,mysql,hive表名一致
#schema = 为hive中的库名
#ispartition : 是否分区默认为分区
'''
cols = []
create_head = '''
create external table if not exists {0}.{1}('''.format(schema,'ods_2_'+table )
if ispartition:
create_tail = r'''
partitioned by(inc_day string)
row format delimited fields terminated by '\001'
location '/{0}/{1}/{2}/{3}';'''.format(env,schema,source,'ods_2_'+table)
else:
create_tail = r'''
row format delimited fields terminated by '\001'
location '/{0}/{1}/{2}/{3}';'''.format(env,schema,source,'ods_2_'+table)
connection=pymssql.connect(host=hosts,
user=username,
password=password,
database=database,
charset='utf8'
)
try:
#获取一个游标
with connection.cursor() as cursor:#connection.cursor(cursor=pymssql.cursors.DictCursor) as cursor:#
#sql='SHOW FULL FIELDS FROM {0}'.format(table)
sql='select column_name,data_type from information_schema.columns where table_name = \'{0}\''.format(table)
cout=cursor.execute(sql) #返回记录条数
try:
for row in cursor:#cursor.fetchall()
#print(type(row))
#print(row[0])
cols.append(row[0])
if 'bigint' in row[1]:
colunm_t = "bigint"
elif 'int' in row[1] or 'tinyint' in row[1] or 'bigint' in row[1] or 'smallint' in row[1] or 'mediumint' in row[1]:
colunm_t = "bigint"
elif 'double' in row[1] or 'float' in row[1] or 'decimal' in row[1]:
colunm_t = "decimal"
else:
colunm_t = "string"
create_head += row[0] + '\t'+ colunm_t+ ',\n'
except Exception as e:
print('程序异常!')
print(e)
finally:
connection.close()
create_str = create_head[:-2] + '\n' + ')'+ create_tail
return cols,create_str # 返回字段列表与你建表语句
#cols,create_str = get_table_info(sys.argv[1])
for i in range(0,len(table_list)):
cols,create_str = get_table_info(table_list[i])
#print('===================='+table_list[i]+'======================')
print(create_str)
#print('\n\n\n\n')
os.system('hive -e "' + create_str +'"')