#-*-coding:utf-8 -*-
import httplib2
import re
import sys
import json
from bs4 import BeautifulSoup
import urllib2
h = httplib2.Http()
url = "http://verified.weibo.com/fame/licaizhuanjia/?rt=0&srt=4&letter=l"
resp,content = h.request(url)
pattern = re.compile('<script>STK && STK.pageletM && STK.pageletM.view\((.*?)\)<\/script>')
result = pattern.findall(content)
jsonResult = json.loads(result[3])
soup = BeautifulSoup(jsonResult['html'])
print soup
resultes = soup('div','select_user')
print resultes
import httplib2
import re
import sys
import json
from bs4 import BeautifulSoup
import urllib2
h = httplib2.Http()
url = "http://verified.weibo.com/fame/licaizhuanjia/?rt=0&srt=4&letter=l"
resp,content = h.request(url)
pattern = re.compile('<script>STK && STK.pageletM && STK.pageletM.view\((.*?)\)<\/script>')
result = pattern.findall(content)
jsonResult = json.loads(result[3])
soup = BeautifulSoup(jsonResult['html'])
print soup
resultes = soup('div','select_user')
print resultes
本文介绍了一种使用Python爬取特定网页上微博认证专家信息的方法。通过解析HTTP响应内容并利用正则表达式和BeautifulSoup库提取所需数据。重点展示了如何定位到包含专家信息的HTML元素。
801

被折叠的 条评论
为什么被折叠?



