项目4:新闻聚合-Python基础教程
from nntplib import NNTP, decode_header
from urllib.request import urlopen
import textwrap
import re
class Newsagent:
“”"
可将新闻源中的新闻分发到新闻目的地的对象
“”"
def __init__(self):
self.sources = []
self.destinations = []
def add_source(self, source):
self.sources.append(source)
def addDestination(self, dest):
self.destinations.append(dest)
def distribute(self):
"""
从新闻源获取所有的新闻,并将其分发到所有的新闻目的地
"""
items = []
for source in self.sources:
items.extend(source.get_items())
for dest in self.destinations:
dest.receive_items(items)
class NewsItem:
“”"
由标题和正文组成的简单新闻
“”"
def __init__(self, title, body):
self.title = title
self.body = body
class NNTPsource:
“”"
从NNTP新闻组获取新闻的新闻源
“”"
def __init__(self, servername, group, howmany):
self.servername = servername
self.group = group
self.howmany = howmany
def get_item(self):
server = NNTP(self.servername)
resp, count, first, last, name = server.group(self.group)
start = last - howmany + 1
resp, overviews = server.over((start, last))
for id, over in overviews:
title = decode_header(over['subject'])
resp, info = server.body(id)
body = '\n'.join(line.decode('latin') for line in info.lines) + '\n\n'
yield NewsItem(title, body)
server.quit()
class SimpleWebSource:
“”"
使用正则表达式从网页中提取新闻的新闻源
“”"
def __init__(self, url, title_pattern, body_pattern, encoding='utf-8'):
self.url = url
self.title_pattern = re.compile(title_pattern)
self.body_pattern = re.compile(body_pattern)
self.encoding = encoding
def get_items(self):
text = urlopen(self.url).read().decode(self.encoding)
titles = self.title_pattern.findall(text)
bodies = self.body_pattern.findall(text)
for title, body in zip(titles, bodies):
yield NewsItem(title, textwrap.fill(body) + '\n')
class PlainDestination:
“”"
以纯文本的方式显示所有新闻的新闻目的地
“”"
def receive_items(self, items):
for item in items:
print(item.title)
print('-' * len(item.title))
print(item.body)
class HTMLDestination:
“”"
"""
def __init__(self, filename):
self.filename = filename
def receive_items(self, items):
out = open(self.filename, 'w')
print("""
<html>
<head>
<title>Today's News</title>
</head>
<body>
<h1>Today's News</h1>
""", file=out)
print('<ul', file=out)
id = 0
for item in items:
id += 1
print('<li><a href="#{}">{}</a></li>'.format(id, item.title), file=out)
print('</ul>', file=out)
id = 0
for item in items:
id += 1
print('<h2><a name="{}">{}</a></h2>'.format(id, item.title), file=out)
print('<pre>{}</pre>'.format(item.body), file=out)
print("""
</body>
</html>
""", file=out)
def runDefaultSetup():
“”"
默认的新闻源和目的地配置,
“”"
agent = Newsagent()
reuters_url = 'http://www.reuters.com/news/world'
reuters_title = r'<h2><a href = "[^"]*"\s*>(.*?)</a>'
reuters_body = r'</h2><p>(.*?)</p>'
reuters = SimpleWebSource(reuters_url, reuters_title, reuters_body)
agent.add_source(reuters)
clpa_server = 'news.ntnu.no'
clpa_group = 'comp.lang.python.announce'
clpa_howmany = 10
clpa = NNTPsource(clpa_server, clpa_group, clpa_howmany)
agent.add_source(clpa)
agent.addDestination(PlainDestination())
agent.addDestination(HTMLDestination('news.html'))
agent.distribute()
if name == ‘main’: runDefaultSetup()
出现报错:
/usr/local/bin/python3.6 /Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py
Traceback (most recent call last):
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1318, in do_open
encode_chunked=req.has_header(‘Transfer-encoding’))
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1026, in _send_output
self.send(msg)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 964, in send
self.connect()
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 936, in connect
(self.host,self.port), self.timeout, self.source_address)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py”, line 724, in create_connection
raise err
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py”, line 713, in create_connection
sock.connect(sa)
TimeoutError: [Errno 60] Operation timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 166, in
if name == ‘main’: runDefaultSetup()
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 163, in runDefaultSetup
agent.distribute()
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 28, in distribute
items.extend(source.get_items())
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 81, in get_items
text = urlopen(self.url).read().decode(self.encoding)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 223, in urlopen
return opener.open(url, data, timeout)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 526, in open
response = self._open(req, data)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 544, in _open
‘_open’, req)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 504, in _call_chain
result = func(*args)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 60] Operation timed out>
Process finished with exit code 1
请问是为什么呢