import os
os.chdir('C:/Users/wenwen/Desktop')
import requests
from bs4 import BeautifulSoup as bs
from selenium import webdriver
import time
import pandas as pd
url = 'http://www.chntsxz.cn/forum.php?mod=viewthread&tid=1425'
driver = webdriver.Chrome()
driver.get(url)
time.sleep(5)
html = driver.page_source
soup = bs(html,'html.parser')
title = soup.title
title.string
'复制不了乌镇,拿什么感动游客? - 专题研究 中国特色小镇网 ’
body = soup.find(class_='t_fsz')
body = body.text
body=body.replace('\n','')
body=body.replace('\xa0','')
body=body.replace('下载次数','')
body=body.replace('下载附件','')
body=body.replace('上传','')
body
driver.find_element_by_class_name('ant-pagination-next').click()
soup.find_all(class_='y')
page=[1425,1411,1399,1391,1383,1380,1369,1366,1355,1346,1324,1301,1290,1283,1281,1248,1246,1225,1222,1215,1210,1195,1174,1172,1141,1129,1114,
1102,1093,1089]
paper=[]
for i in page:
url = 'http://www.chntsxz.cn/forum.php?mod=viewthread&tid={}'.format(i)
driver = webdriver.Chrome()
driver.get(url)
time.sleep(5)
html = driver.page_source
soup = bs(html,'html.parser')
title = soup.title
title = title.string
print(title)
body = soup.find(class_='t_fsz')
body = body.text
body=body.replace('\n','')
body=body.replace('\xa0','')
body=body.replace('下载次数','')
body=body.replace('下载附件','')
body=body.replace('上传','')
print(body)
paper.append(title)
paper.append(body)
with open(r'D:\特色小镇.txt', 'a') as f:
f.write(title)
f.write(body)
f.write('\n')
driver.quit()
title_all=[]
for i in range(154):
print(paper[2*i])
title_all.append(paper[2*i])
paper_all=[]
for i in range(154):
paper_all.append(paper[2*i+1])