#coding=utf-8
__author__ = 'Administrator'
import pymysql
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
class GetTableinfo:
#得到表列表
def getTableList(self,database):
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
import time
time.sleep(0.01)
courser=conn.cursor()
#我们所有的表都是内部表
courser.execute("select * from tbls where db_id=%s and tbl_type='MANAGED_TABLE' ",database)
#courser.execute("select * from tbls where db_id=%s and tbl_type='EXTERNAL_TABLE' ",database)
#得到所有内部部
value=courser.fetchall()
courser.close()
conn.close()
return value
#得到是否是分区表
def ispartitions(self,tbl_id):
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
import time
time.sleep(0.01)
courser=conn.cursor()
courser.execute("select * from partition_keys where tbl_id='%s ' ORDER BY INTEGER_IDX ",tbl_id)
value=courser.fetchall()
courser.close()
conn.close()
return value
def getClouminf(self,DB_ID,SD_ID):
#select * from columns_v2 where CD_ID =(select CD_ID from sds a where a.SD_ID='4808262') ;
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
import time
time.sleep(0.01)
courser=conn.cursor()
sql= "select DISTINCT b.TBL_ID,b.TBL_NAME,a.* from columns_v2 a,tbls b,sds c where b.DB_ID =%s and b.SD_ID = c.SD_ID and c.CD_ID=a.CD_ID and b.TBL_ID=%s ORDER by a.INTEGER_IDX"%(DB_ID,SD_ID)
courser.execute(sql)
value=courser.fetchall()
courser.close()
conn.close()
return value
def getSEDInof(self,DB_ID,table_ID):
sql="select DISTINCT SLIB,b.tbl_name from SERDES a,tbls b,sds c where b.SD_ID=c.SD_ID and a.SERDE_ID = c.SERDE_ID and b.DB_ID=%s and b.TBL_ID=%s"%(DB_ID,table_ID)
import time
time.sleep(0.01)
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
courser=conn.cursor()
courser.execute(sql)
value=courser.fetchall()
courser.close()
conn.close()
return value
def getFilesTermINATED(self,DB_ID,table_id):
long_sql="""
SELECT DISTINCT PARAM_VALUE,d.TBL_NAME
FROM serde_params a, serdes b, sds c,tbls d
WHERE (a.PARAM_KEY = 'field.delim' or a.PARAM_KEY ='serialization.format')
AND a.SERDE_ID = b.SERDE_ID
AND c.SERDE_ID = b.SERDE_ID
AND d.SD_ID = c.SD_ID
AND d.DB_ID = %s
AND d.TBL_ID =%s;"""
sql=long_sql%(DB_ID,table_id)
import time
time.sleep(0.02)
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
courser=conn.cursor()
courser.execute(sql)
value=courser.fetchall()
courser.close()
conn.close()
return value
def getInputFormat(self,DB_ID,table_id):
long_sql="select b.TBL_NAME, a.INPUT_FORMAT,a.OUTPUT_FORMAT,a.LOCATION from sds a ,tbls b where a.SD_ID=b.SD_ID and b.DB_ID=%s and b.TBL_ID=%s "
sql=long_sql%(DB_ID,table_id)
import time
time.sleep(0.01)
conn = pymysql.connect(host='127.0.0.1', port=3306, user='pig', passwd='123456', db='hive', charset='utf8')
courser=conn.cursor()
courser.execute(sql)
value=courser.fetchall()
courser.close()
conn.close()
return value
def delpartitions(self,sds_id):
self.getClouminf(self,sds_id)
return None
def delnopartitions(self,sds_id):
return None
def get_create_table(self,table_name):
LONGSQL="""DROP TABLE IF EXISTS %s;
CREATE TABLE %s ("""%(table_name,table_name)
return LONGSQL
def get_create_cloume(self,columns_list):
sql=""
len_col=len(columns_list)
tmp_item=1
for column in columns_list:
if tmp_item == len_col:
if column[3] == None:
sql=sql+" `"+column[4]+"` " + column[5]+")"
else:
sql=sql+" `"+column[4]+"` " + column[5]+" COMMENT '"+ column[3]+"')"
else:
if column[3] == None:
sql=sql+" `"+column[4]+"` " + column[5]+",\n"
else:
sql=sql+" `"+column[4]+"` " + column[5]+" COMMENT '"+ column[3]+"',\n"
tmp_item=tmp_item+1
return sql
def get_partion_item(self,partitionlist):
sql="PARTITIONED BY (\n"
len_col=len(partitionlist)
tmp_item=1
#print partitionlist
for column in partitionlist:
if tmp_item == len_col:
if column[1] == None:
sql=sql+" `"+column[2]+"` " + column[3]+")"
else:
sql=sql+" `"+column[2]+"` " + column[3]+" COMMENT '"+ column[1]+"')"
else:
if column[1] == None:
sql=sql+" `"+column[2]+"` " + column[3]+",\n"
else:
sql=sql+" `"+column[2]+"` " + column[3]+" COMMENT '"+ column[1]+"',\n"
tmp_item=tmp_item+1
return sql
def get_filerow_file_data(self,serintf,clome_file,inoutFormat,table_name):
st1r='\t'
last_sql=""
if serintf[0][0] == 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe':
if str(clome_file[0][0]) == str(1):
last_sql="""ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'%s'
OUTPUTFORMAT
'%s';
"""%(inoutFormat[0][1],inoutFormat[0][2])
return last_sql
else:
if clome_file[0][0] == u'\t':
st1r=r'\t'
elif clome_file[0][0] == u',':
st1r=","
elif clome_file[0][0] == u'$':
st1r=r"$"
elif clome_file[0][0] == u'\005':
st1r=r'\\005'
elif clome_file[0][0] == r'|':
st1r="|"
last_sql="""ROW FORMAT DELIMITED
FIELDS TREMINATED BY '%s'
LINES TREMINATED BY '\\n'
STORED AS INPUTFORMAT
'%s'
OUTPUTFORMAT
'%s';
"""%(st1r,inoutFormat[0][1],inoutFormat[0][2])
return last_sql
else:
last_sql="""ROW FORMAT DELIMITED
FIELDS TREMINATED BY '\t'
LINES TERMINATED BY '\\n'
STORED AS INPUTFORMAT
'%s'
OUTPUTFORMAT
'%s';
"""%(inoutFormat[0][1],inoutFormat[0][2])
sql_alter="alter table %s SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; "%(table_name)
last_sql=last_sql+"\n"+sql_alter
return last_sql
if __name__ == "__main__":
dbs_id="351"
table_list = GetTableinfo().getTableList(dbs_id)
print table_list
for table in table_list:
table_name = table[7]
table_tbl_id= table[0]
sds_id=table[6]
##得到表的所有字段
columns=GetTableinfo().getClouminf(dbs_id,table_tbl_id)
#得到是否是分区表
partitionsvalue=GetTableinfo().ispartitions(table_tbl_id)
if len(partitionsvalue) != 0 :
#不是分区表
columns_len = len(columns)
partitionsvalue_len= len(partitionsvalue)
get_len=columns_len-partitionsvalue_len
#表字段
columns_list=columns[0:get_len+1]
#分区字段
partitionlist=partitionsvalue
#ser字段
serintf = GetTableinfo().getSEDInof(dbs_id,table_tbl_id)
#得到行文分隔符,和列分隔符
hive_line='\n'
#列分隔符
clome_file= GetTableinfo().getFilesTermINATED(dbs_id,table_tbl_id)
#输入,输出格式
inoutFormat= GetTableinfo().getInputFormat(dbs_id,table_tbl_id)
create_table_sql=GetTableinfo().get_create_table(table_name)
create_table_colume=GetTableinfo().get_create_cloume(columns_list)
create_talbe_partion=GetTableinfo().get_partion_item(partitionlist)
#print create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion
last_sql= GetTableinfo().get_filerow_file_data(serintf,clome_file,inoutFormat,table_name)
myfilesql=""
try:
myfilesql= create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion+'\n'+last_sql
with open("c:\\base.txt","a+") as myfile:
myfile.write(str(myfilesql).decode("utf-8"))
finally:
pass
else:
#print columns_list
#表字段
columns_list=columns
#ser字段
serintf=GetTableinfo().getSEDInof(dbs_id,table_tbl_id)
print serintf
if serintf == ():
continue
#得到行文分隔符,和列分隔符
hive_line='\n'
#列分隔符
clome_file= GetTableinfo().getFilesTermINATED(dbs_id,table_tbl_id)
#输入,输出格式
inoutFormat= GetTableinfo().getInputFormat(dbs_id,table_tbl_id)
create_table_sql=GetTableinfo().get_create_table(table_name)
create_table_colume=GetTableinfo().get_create_cloume(columns_list)
last_sql=GetTableinfo().get_filerow_file_data(serintf,clome_file,inoutFormat,table_name)
myfilesql=""
try:
myfilesql= "\n"+create_table_sql+'\n'+create_table_colume+'\n'+last_sql
with open("c:\\base.txt","a+") as myfile:
myfile.write(myfilesql.decode("utf-8"))
#myfilesql= create_table_sql+' \n'+create_table_colume+'\n'+create_talbe_partion+'\n'+last_sql
finally:
pass
hive迁移
最新推荐文章于 2024-01-12 23:19:04 发布