一、对excel的写操作实例:
将一个列表的数据写入excel, 第一行是标题,下面行数具体的数据
import xlwt
#只能写不能读
stus = [['姓名', '年龄', '性别', '分数'],
['mary', 20, '女', 89.9],
['mary', 20, '女', 89.9],
['mary', 20, '女', 89.9],
['mary', 20, '女', 89.9]
]
book = xlwt.Workbook()#新建一个excel
sheet = book.add_sheet('case1_sheet')#添加一个sheet页
row = 0#控制行
for stu in stus:
col = 0#控制列
for s in stu:#再循环里面list的值,每一列
sheet.write(row,col,s)
col+=1
row+=1
book.save('stu_1.xls')#保存到当前目录下
二、对excel 的读操作:
import xlrd
#只能读不能写
book = xlrd.open_workbook('stu.xls')#打开一个excel
sheet = book.sheet_by_index(0)#根据顺序获取sheet
sheet2 = book.sheet_by_name('case1_sheet')#根据sheet页名字获取sheet
print(sheet.cell(0,0).value)#指定行和列获取数据
print(sheet.cell(0,1).value)
print(sheet.cell(0,2).value)
print(sheet.cell(0,3).value)
print(sheet.ncols)#获取excel里面有多少列
print(sheet.nrows)#获取excel里面有多少行
print(sheet.get_rows())#
for i in sheet.get_rows():
print(i)#获取每一行的数据
print(sheet.row_values(0))#获取第一行
for i in range(sheet.nrows):#0 1 2 3 4 5
print(sheet.row_values(i))#获取第几行的数据
print(sheet.col_values(1))#取第一列的数据
for i in range(sheet.ncols):
print(sheet.col_values(i))#获取第几列的数据
三、对excel的修改操作:
将excel中的某个值修改并重新保存
from xlutils.copy import copy
import xlrd
#xlutils:修改excel
book1 = xlrd.open_workbook('stu.xls')
book2 = copy(book1)#拷贝一份原来的excel
# print(dir(book2))
sheet = book2.get_sheet(0)#获取第几个sheet页,book2现在的是xlutils里的方法,不是xlrd的
sheet.write(1,3,0)
sheet.write(1,0,'hello')
book2.save('stu.xls')
实战代码
from selenium import webdriver
from time import sleep
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait as Wait
from selenium.webdriver.common.by import By
from xlutils.copy import copy
import xlrd
url = 'http://mlldoc.meilele.com/DocLib99/Forms/AllItems.aspx'
d = webdriver.Chrome() # 创建浏览器对象
d.get(url)
d.maximize_window()
# 写入Excel表表格
def update(i, j, value):
book1 = xlrd.open_workbook('stu_1.xls') # 打开Excel表格
book2 = copy(book1) # 拷贝一份原来的excel
sheet = book2.get_sheet(0) # 获取第几个sheet页,book2现在的是xlutils里的方法,不是xlrd的
sheet.write(i, j, value) # 写入数据
book2.save('stu_1.xls') # 保存
def aaa(qqq):
for i in range(3, 33):
o = str(i)
values = []
for j in range(4, 11):
p = str(j)
# 循环获取表格数据 o表示第几行,p表示第几列
text = d.find_element_by_xpath('//*[@id="onetidDoclibViewTbl0"]/tbody/tr[' + o + ']/td[' + p + ']').text
values.append(text)
# update(qqq + i - 3, j - 4, text)
d.find_element_by_xpath('//*[@id="onetidDoclibViewTbl0"]/tbody/tr[' + o + ']/td[3]').click()
value = 'FormControl_V1_I1_S4_I3_T20'
Wait(d, 100).until(EC.presence_of_element_located((By.ID, value)))
element = d.find_element(By.ID, value)
text2 = element.get_attribute('value')
values.append(text2)
book1 = xlrd.open_workbook('stu_1.xls') # 打开Excel表格
book2 = copy(book1) # 拷贝一份原来的excel
sheet = book2.get_sheet(0) # 获取第几个sheet页,book2现在的是xlutils里的方法,不是xlrd的
row = qqq + i - 3
for col in range(0, len(values)):
# print(values[col])
sheet.write(row, col, values[col]) # 写入数据
book2.save('stu_1.xls') # 保存
d.back()
print('第' + str(qqq + i - 3) + '行爬取成功')
def xyy():
nextPage = '//*[@id="bottomPagingCellWPQ2"]/table/tbody/tr//*[@alt="下一页"]'
target = d.find_element_by_xpath(nextPage)
d.execute_script("arguments[0].scrollIntoView();", target) # 拖动到可见的元素去
target.click()
if __name__ == '__main__':
a = 0
# 控制台返回的数字/30得到页码
b = int(827 / 30)
# 从第几页开始
for i in range(1, b + 1):
xyy()
a = i
for i in range(a, 145):
aaa(i * 30)
xyy()
# d.quit()