<pre name="code" class="python"># coding: utf-8
'''
Created on 2014-7-24
@author: Administrator
'''
import urllib2
from urllib2 import Request
import re
import sys
def p(f):
print '%s.%s(): %s' % (f.__module__, f.__name__, f())
# 返回当前系统所使用的默认字符编码
p(sys.getdefaultencoding)
req=Request('http://www.qiushibaike.com/article/62599902?list=hot&s=4689411')
req.add_header('User-Agent', 'aa')
response = urllib2.urlopen(req)
html = response.read()
#print html
myItems = re.findall('<div.*?class="content".*?title="(.*?)">(.*?)</div>',html,re.S)
print myItems
for i in myItems: #myItems是list集合
for j in range(len(i)): #i是一个集合 len(i)为2 j的值为0和1
print i[j]
当一起打印的时候 中文就会有乱码, 分开迭代的读取的话 就会正常打印中文,很奇怪