from bs4 import BeautifulSoup
from w3lib.html import remove_comments # 删除页面中的注释
from w3lib.html import replace_entities
with open("...html", "r") as f:
html = f.read()
soup = BeautifulSoup(remove_comments(html), "html.parser")
soup.text # 获取所有文本内容
bs4解析HTML文件中文本
最新推荐文章于 2023-12-25 23:40:23 发布