import re
import urllib
from bs4 import BeautifulSoup
def gethtml(url):
page= urllib.urlopen(url)
html=page.read()
return html
html=gethtml("http://zjk.58.com/shouji/?PGTID=0d000000-0000-0ab0-fc5a-bd32d8a09b17&ClickID=1&qq-pf-to=pcqq.group")
soup=BeautifulSoup(html)
print(soup.prettify())
一段小代码输出58的网址代码方便查看
翻阅BS文档,查找出来俩个方法在标题上试了试
title=soup.find_all("a",class_="t")
for tlt in title:
print tlt.get_text()
print "\n"