#coding=utf8
'''
题目一: 写一个网页数据操作类。完成下面的功能:
提示:需要用到urllib模块
get_httpcode()获取网页的状态码,返回结果例如:200,301,404等 类型为int
get_htmlcontent() 获取网页的内容。返回类型:str
get_linknum()计算网页的链接数目。
'''
import urllib
import re
class mywebapp(object):
def __init__(self):
pass
def get_httpcode(self,url):
print url
if not url.startswith('http://') and not url.startswith('https://'):
raise TypeError,'Url must start with http:// or https:// .....'
res = urllib.urlopen(url)
page_status = res.getcode()
print 'http status is %d' %page_status
def get_htmlcontent(self,url):
if not (url.startswith('http://') or url.startswith('https://')):
raise TypeError, 'Url must start with http:// or https:// .....'
content = urllib.urlopen(url)
print content.read()
def get_linknum(self,url):
content = urllib.urlopen(url)
print 'url is start'
replace_content = content.read().replace(' ','')
urls = re.findall(r'(http|ftp|https):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?',replace_content,re.I)
for i in urls :
print i
#print len(content.split('<a href=')) - 1
myapp = mywebapp()
print myapp.get_httpcode("http://www.baidu.com")
print myapp.get_htmlcontent('http://www.baidu.com')
print myapp.get_linknum('http://www.baidu.com')
Python自学-第12次作业

最新推荐文章于 2024-04-21 08:36:27 发布
