# coding=UTF-8
from urllib2 import urlopen, HTTPError
from bs4 import BeautifulSoup
def getName(url):
try:
html = urlopen(url)
except HTTPError as e:
return e
try:
bs = BeautifulSoup(html.read(), 'html.parser')
nameList = bs.findAll('span', {'class': 'green'})
for name in nameList:
print (name.get_text()) #get_text()获取标签里的类容
except AttributeError as e:
return e
if __name__ == '__main__':
title = getName('网页URL')
获取指定子标签(打印表格所有的子标签内容)
def getChildren():
html = urlopen('网页URL')
bs = BeautifulSoup(html, 'html.parser')
for child in bs.find('table' , {'id':'list'}).children:
print (child)
获取兄弟标签(打印表格除第一行标题外的所有标签内容)
def getNext():
html = urlopen('网页URL')
bs = BeautifulSoup(htm