读写csv文件:
读csv文件:
import csv
def readCsv(path):
infoList = []
with open(path, "r") as f:
allFileInfo = csv.reder(f)
print (allFileInfo)
for row in allFileInfo:
infoList.append(row)
return infoList
path = r"路径"
info = readCsv(path)
readCsv ("路径")
写csv文件:
import csv
def writeCsv(path, data):
with open (path, "w") as f:
write = csv.write(f)
for rouData in data:
write.writerow(rowData)
path = r"路径"
writeCsv(path, [["1", "2", "3"],["4", "5", "6"],["7", "8", "9"]])
读取pdf文件:
import sys
import importlib
importlib.reload(sys)
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LTTextBoxHorizontal, LAParams
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
def readPDF(path, toPath):
f = open(path, "rb")
#创建分析器
parser = PDFParser(f)
#创建PDF文档
pdfFile = PDFDocument()
#链接
parser.set_document(pdfFile)
pdfFile.set_parser(parser)
#初始化密码
pdfFile.initialize()
#检测文档是否提供txt转换
if not pdfFile.is_extractable:
raise PDFTextExtractionNotAllowed
else:
#解析数据
#数据管理器
manager = PDFResourceManager()
#创建一个pdf设备对象
laparams = LAParams()
device = PDFPageAggregator(manager, laparams=laparams)
#解释器对象
interpreter = PDFPageInterpreter(manager, device)
#开始循环处理,每次处理一页
for page in pdfFile.get_pages():
interpreter.process_page(page)
#循环处理图层
layout = device.get_result()
for x in layout:
if (isinstance(x, LTTextBoxHorizontal)):
with open(toPath, "a", encoding="utf-8") as f:
str = x.get_text()
print(str)
f.write(str )
path = r"G:\mysql以及高级服务所有\\SQUID1.pdf"
toPath = r"G:\mysql以及高级服务所有\1.txt"
readPDF(path, toPath)