##python 读取pdf文本内容
#!/usr/bin/env python3
#-*- coding:utf-8 -*-
# pip3 install pdfminer3k
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBoxHorizontal
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter, PDFTextExtractionNotAllowed
from pdfminer.pdfdevice import PDFDevice
def read_pdf(pdf_name, result_name):
# 以二进制读模式打开
fp = open(pdf_name, 'rb')
# 用文件对象来创建一个pdf文档分析器
parser = PDFParser(fp)
# 创建一个pdf文档
doc = PDFDocument()
# 连接分析器 与文档对象
parser.set_document(doc)
doc.set_parser(parser)
# 提供初始密码,如果没有密码 就创建一个空的字符串
doc.initialize('')
# 检测文档是否提供txt转换,不提供就抛出异常
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
# 创建PDf 资源管理器 来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
with open(result_name,"w",encoding="u8") as fd_out:
# 循环遍历列表,每次处理一个page的内容
for i,page in enumerate(doc.get_pages(),1):
index = "===========《第{}页》===========".format(i)
print(index)
fd_out.write(index + "\n")
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
for x in layout:
# 这里layout是一个LTPage对象 里面存放着 这个page解析出的各种对象 一般包括LTTextBox,
# LTFigure, LTImage, LTTextBoxHorizontal 等等 想要获取文本就获得对象的text属性
if not isinstance(x, LTTextBoxHorizontal):
continue
results = x.get_text()
print(results)
fd_out.write(results)
if __name__ == '__main__':
pdf_name = 'test.pdf'
result = 'test.txt'
read_pdf(pdf_name, result)
##python 生成excel
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import MySQLdb
from datetime import datetime
import cx_Oracle
import os
import xlwt
import sys
reload(sys)
import time
sys.setdefaultencoding('utf-8')
os.environ['NLS_LANG'] = 'SIMPLIFIED CHINESE_CHINA.UTF8'
import datetime # 导入日期时间模块
today = datetime.date.today() # 获得今天的日期
yesterday = today - datetime.timedelta(days=1)
i = 0
def write_data_to_excel(name, result):
# 将sql作为参数传递调用get_data并将结果赋值给result,(result为一个嵌套元组)
result = result
print result
# 实例化一个Workbook()对象(即excel文件)
wbk = xlwt.Workbook(encoding='utf-8')
# 新建一个名为Sheet1的excel sheet。此处的cell_overwrite_ok =True是为了能对同一个单元格重复操作。
sheet = wbk.add_sheet('Sheet1', cell_overwrite_ok=True)
# 获取当前日期,得到一个datetime对象如:(2016, 8, 9, 23, 12, 23, 424000)
#today = datetime.today()
# 将获取到的datetime对象仅取日期如:2016-8-9
#today_date = datetime.date(today)
# 遍历result中的没个元素。
titlelist=['系统标识','交易失败笔数','交易成功笔数','交易总笔数','交易成功率']
# for i in xrange(len(titlelist)):
# sheet.write(1, i, titlelist[i])
# wbk.save(name + str(yesterday) + '.xls')
for i in xrange(len(result)):
# 对result的每个子元素作遍历,
if i==0:
for x in xrange(len(titlelist)):
sheet.write(0, x, titlelist[x])
for j in xrange(len(result[i])):
y=i+1
# 将每一行的每个元素按行号i,列号j,写入到excel中。
print result[i][j]
sheet.write(y, j, result[i][j])
# 以传递的name+当前日期作为excel名称保存。
wbk.save(name + str(yesterday) + '.xls')
def get_data():
applist=['NCBS','LOAN','IBPS','IBANK']
list = [[] for i in range(len(applist))]
print list
i=0
for x in applist:
print i
print x
conn = cx_Oracle.connect('esbdata/esbdata@1.1.1.1/esbdb')
cur = conn.cursor()
mysql="select /*+parallel(t 8)*/ cnt1 as 失败笔数,(cnt2 - cnt1) as 成功笔数,cnt2 as 总笔数 ,(cnt2 - cnt1)/cnt2 as 成功率 from (select (select count(*) \
from esb2_trans_log t \
where t.trans_date >= \
trunc(sysdate) \
and t.trans_date <= \
trunc(sysdate+1) \
and (t.respmsg like '%%Read timed out%%' or t.respmsg like '%%异常%%' or \
t.respmsg like '%%超时%%' or t.respmsg like '%%通讯%%失败%%') \
and t.logicsystem='%s' \
and t.esbserviceflowno<>'000000' \
and t.flowstepid in ('4','E') ) cnt1 , \
( select /*+parallel(t 8)*/ count(*) \
from esb2_trans_log t \
where t.trans_date >= \
trunc(sysdate) \
and t.trans_date <= \
trunc(sysdate+1) \
and t.logicsystem='%s' \
and t.flowstepid in ('4','E') \
) cnt2 from dual)" %(x,x)
print mysql
cur.execute(mysql)
arr01= cur.fetchone()
print arr01
title='交易系统成功率'
title = title.decode('utf-8').encode('gbk')
db_dict = {title: arr01}
cur.close()
conn.close
list[i].append(x)
list[i].append(arr01[0])
list[i].append(arr01[1])
list[i].append(arr01[2])
list[i].append(arr01[3])
# # 遍历字典每个元素的key和value。
# for k, v in db_dict.items():
# # 用字典的每个key和value调用write_data_to_excel函数。
# write_data_to_excel(k, v)
i=i+1
return list
title='交易系统成功率'
title = title.decode('utf-8').encode('gbk')
arr02=get_data()
db_dict = {title: arr02}
for k, v in db_dict.items():
write_data_to_excel(k, v)