目录
一、独立标签采集
import requests
from bs4 import BeautifulSoup
import re
import bs4
kv = {'User-Agent': 'Mozilla/5.0'}
def getHTMLText(url):
try:
r = requests.get(url, headers=kv, timeout=30)
r.raise_for_status() # 不是200就报错
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillList(ulist, html):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('p')
print(tds)
def main():
uinfo = []
url = "https://www.phb123.com/city/renkou/rk.html"
html = getHTMLText(url)
fillList(uinfo, html)
main()
输出:
[<p>中国</