练手
1、python脚本连接hive并保存查询结果
#!/usr/bin/env
import subprocess
#直接运行hql命令
cmd = "hive -e 'set hive.exec.dynamic.partition=true;\
set hive.auto.convert.join=false;\
select * from tmp.tmp_owneruid; \
' > uid.txt "
r = subprocess.Popen(cmd,shell=True)
r.wait()
#运行含参数的hql文件
cmd = "hive -hiveconf day1=2019-01-23 -f ./sql/country_usernum.sql > usernum.txt "
r = subprocess.Popen(cmd,shell=True)
r.wait()
其中,
hive通过-f调用:https://blog.youkuaiyun.com/kwu_ganymede/article/details/50291993
2、linux命令:保存python脚本的输出日志
python test.py > ./log/1.log 2>&1
其中,
- “python”表示执行python代码
- “test.py”表示python的源代码文件
- “1.log”表示输出的日志文件
- “>”表示将打印信息重定向到日志文件
- “2>&1”表示将标准错误输出转变化标准输出,可以将错误信息也输出到日志文件中(0-> stdin, 1->stdout, 2->stderr)
关于输入输出重定向:http://blog.sina.com.cn/s/blog_6babbcb801016tti.html
实战
1、建立文件夹,其中包括:
~~~~
hql.py - 提取hql查询结果并保存
~~~~
mail.py - 整理查询结果并发送邮件
~~~~
mail_address.txt - 目标邮件地址
~~~~
table_head.txt - 邮件排版(表头和标题)
~~~~
sql - 存放hql语句文件夹 每个表格查询语句一个文件
~~~~
result - 存放查询结果的文件夹
~~~~
log - 存放日志的文件夹
2、TODO: python脚本连接hive 保存结果
#!/usr/bin/env
import os
import re
import time
import datetime
import subprocess
#确定查询日期
datedif = int(sys.argv[1]) #外部输入
today = datetime.datetime.now()
delta = datetime.timedelta(days=datedif)
delta_date = today - delta
sql_date = delta_date.strftime('%Y-%m-%d')
#查询hql并保存结果
sql_file = {} #sql文件和结果文件名的dict
for file in os.listdir('./sql'):
sql_result = re.match(r'(\w*)\.sql',file).group(1)
sql_file[file] = sql_result+'_'+sql_date+'.txt'
print(sql_file)
for key,value in sql_file.items():
cmd = "hive -hiveconf day1=%s -f ./sql/%s > %s " % (sql_date,key,value)
r = subprocess.Popen(cmd,shell=True)
r.wait()
- TODO: 整理成邮件并发送邮件
import os,sys,re,datetime
import smtplib
from email.mime.text import MIMEText #构造邮件
from email.mime.multipart import MIMEMultipart
from email.header import Header
#-----------------------整理成邮件html----------------------
def th_line_html(table_head): #输入['国家','系统','用户数']
result = []
for i in table_head:
result.append('<th>' + i + '</th>')
result = ''.join(result)
result = '<tr>\n'+ result + '\n</tr>'
return result
def td_line_html(table_content):
result = []
for i in table_content:
result.append('<td>' + i + '</td>')
result = ''.join(result)
result = '<tr>\n'+ result + '\n</tr>'
return result
def table_html(table_head,result_file_name): #输入表头['国家','系统','用户数']和表格内容xx.txt
html1 = th_line_html(table_head)
html2 = []
with open('./result/'+result_file_name,'r') as jg:
for line in jg.readlines():
line = line.replace('\n','')
line = line.split('\t')
html2.append(td_line_html(line))
result = '<table class=long_table>\n'+html1+'\n'.join(html2)+'\n</table>'
return result
#确定查询日期
# datedif = int(sys.argv[1]) #外部输入
datedif = 2
today = datetime.datetime.now()
delta = datetime.timedelta(days=datedif)
delta_date = today - delta
sql_date = delta_date.strftime('%Y-%m-%d')
#制作邮件html
head_dict = {}
html = ''
with open('table_head.txt','r') as th:
for line in th.readlines():
if ':' in line:
table_name = line.split(':')[0]
table_head = line.split(':')[1].replace('\n','').split(',')
head_dict[table_name] = table_head
table_txt_name = table_name+'_'+sql_date+'.txt'
html += table_html(table_head,table_txt_name)
else:
html += '\n'
html += '\n<h4>' + line.replace('\n','') + '</h4>\n'
style = '''
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<style>
.long_table table {
font-family: Arial, "Microsoft YaHei";
width:100%;
margin:15px 0
}
.long_table th {
background-color:#708090;
color:#F5F5F5
}
.long_table,.long_table th,.long_table td
{
font-size:0.95em;
text-align:center;
padding:4px;
border:1px solid #dddddd;
border-collapse:collapse
}
.long_table tr:nth-child(odd){
background-color:#DCDCDC;
}
.long_table tr:nth-child(even){
background-color:#fdfdfd;
}
</style>'''
html = style + html
#---------------------------------发送邮件-----------------------
#发html邮件(无附件)
def send_mail(to_address):
smtpObj = smtplib.SMTP_SSL('mail.xx.tv') #如果SMTP服务器不支持TLS,则用SSL
smtpObj.login('data@xx.tv','data')
msg = MIMEText(html,'html','utf-8') #发送HTML邮件
msg['From'] = 'datagroup <%s>' % 'data@xx.tv'
msg['To'] = Header(to_address,'utf-8')
msg['Subject'] = Header('Daily Report_'+sql_date,'utf-8')
sendmailStatus = smtpObj.sendmail('data@xx.tv',to_address,msg.as_string()) #返回失败字典,为空表示发送成功
smtpObj.quit()
return(sendmailStatus)
#发html邮件(有附件)
def send_mail_attachment(to_address,attach_file_path,attach_file_name):
smtpObj = smtplib.SMTP_SSL('mail.xx.tv') #如果SMTP服务器不支持TLS,则用SSL
smtpObj.login('data@xx.tv','data')
msg = MIMEMultipart()
msg['From'] = 'datagroup <%s>' % 'data@xx.tv'
msg['To'] = Header(to_address,'utf-8')
msg['Subject'] = Header('Daily Report_'+sql_date,'utf-8')
#邮件正文内容
msg.attach(MIMEText(html,'html','utf-8'))
#邮件附件
att = MIMEText(open(attach_file_path,'rb').read(),'base64','utf-8')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename=%s' % (attach_file_name)
msg.attach(att)
#发送
sendmailStatus = smtpObj.sendmail('data@xx.tv',to_address,msg.as_string()) #返回失败字典,为空表示发送成功
smtpObj.quit()
return(sendmailStatus)
#发html邮件(可选有无附件)
def send_mail_2(to_address,attach_file_path=False,attach_file_name=False):
smtpObj = smtplib.SMTP_SSL('mail.xx.tv') #如果SMTP服务器不支持TLS,则用SSL
smtpObj.login('data@xx.tv','data')
if attach_file_path:
msg = MIMEMultipart()
#邮件正文内容
msg.attach(MIMEText(html,'html','utf-8'))
#邮件附件
att = MIMEText(open(attach_file_path,'rb').read(),'base64','utf-8')
att["Content-Type"] = 'application/octet-stream'
att["Content-Disposition"] = 'attachment; filename=%s' % (attach_file_name)
msg.attach(att)
else:
msg = MIMEText(html,'html','utf-8')
#发送
msg['From'] = 'datagroup <%s>' % 'data@xx.tv'
msg['To'] = Header(to_address,'utf-8')
msg['Subject'] = Header('Daily Report_'+sql_date,'utf-8')
sendmailStatus = smtpObj.sendmail('data@xx.tv',to_address,msg.as_string()) #返回失败字典,为空表示发送成功
smtpObj.quit()
return(sendmailStatus)
#测试发html邮件
with open('mail_address.txt','r') as ad:
for address in ad.readlines():
sendmailStatus = send_mail_2(address)
if sendmailStatus != {}:
print('There was a problem sending mail: %s:' % sendmailStatus)
# #测试发附件邮件
# attach_file_path = './result/country_usernum_2019-01-23.txt'
# attach_file_name = 'country_usernum_2019-01-23.txt'
# with open('mail_address.txt','r') as ad:
# for address in ad.readlines():
# sendmailStatus = send_mail_2(address,attach_file_path=attach_file_path,attach_file_name=attach_file_name)
# if sendmailStatus != {}:
# print('There was a problem sending mail: %s:' % sendmailStatus)
- Linux设置定时任务 - 运行任务清除结果
crontab -l #查看linux定时任务
crontab -e #编辑linux定时任务
30 09 * * * python /home/xx/auto_mail/hql.py 1 > /home/xx/auto_mail/log/hql.log 2>&1
30 11 * * * python /home/xx/auto_mail/mail.py 1 > /home/xx/auto_mail/log/mail.log 2>&1
00 19 * * * rm /home/xx/auto_mail/result/*.txt