import urllib.request
from lxml import etree
url = 'https://www.jiangxi.gov.cn/art/2024/10/5/art_393_5024212.html'
headers = {
'User-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
}
request = urllib.request.Request(url=url,headers=headers)
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
tree = etree.HTML(content)
text1 = tree.xpath('//div[@id="zoom"]/p/text()')
text2 = ' '.join(tree.xpath('//div[@id="zoom"]/p/text()'))
#其实text1是列表,里面有很多个值
#第一段
text10 = text1[0]
#第二段
text11 = text1[1]
#第三段
text12 = text1[2]
print(text1)
print(text10)
print(text11)
print(text12)
print("\n\n\n以下是整合:\n",text2)
把多段文字合并
22万+

被折叠的 条评论
为什么被折叠?



