项目4:新闻聚合-Python基础教程

本文介绍了一个基于Python的新闻聚合器的设计与实现过程,该聚合器能够从不同的新闻源如网页和NNTP新闻组抓取新闻,通过定义的新闻源和目的地类,将抓取的新闻以纯文本和HTML格式展示。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

项目4:新闻聚合-Python基础教程

from nntplib import NNTP, decode_header
from urllib.request import urlopen
import textwrap
import re
class Newsagent:
“”"
可将新闻源中的新闻分发到新闻目的地的对象
“”"

def __init__(self):
    self.sources = []
    self.destinations = []

def add_source(self, source):
    self.sources.append(source)

def addDestination(self, dest):
    self.destinations.append(dest)

def distribute(self):
    """
    从新闻源获取所有的新闻,并将其分发到所有的新闻目的地
    """
    items = []
    for source in self.sources:
        items.extend(source.get_items())
    for dest in self.destinations:
        dest.receive_items(items)

class NewsItem:
“”"
由标题和正文组成的简单新闻
“”"

def __init__(self, title, body):
    self.title = title
    self.body = body

class NNTPsource:
“”"
从NNTP新闻组获取新闻的新闻源
“”"

def __init__(self, servername, group, howmany):
    self.servername = servername
    self.group = group
    self.howmany = howmany

def get_item(self):
    server = NNTP(self.servername)
    resp, count, first, last, name = server.group(self.group)

    start = last - howmany + 1

    resp, overviews = server.over((start, last))

    for id, over in overviews:
        title = decode_header(over['subject'])
        resp, info = server.body(id)
        body = '\n'.join(line.decode('latin') for line in info.lines) + '\n\n'
        yield NewsItem(title, body)
    server.quit()

class SimpleWebSource:
“”"
使用正则表达式从网页中提取新闻的新闻源
“”"

def __init__(self, url, title_pattern, body_pattern, encoding='utf-8'):
    self.url = url
    self.title_pattern = re.compile(title_pattern)
    self.body_pattern = re.compile(body_pattern)
    self.encoding = encoding

def get_items(self):
    text = urlopen(self.url).read().decode(self.encoding)
    titles = self.title_pattern.findall(text)
    bodies = self.body_pattern.findall(text)
    for title, body in zip(titles, bodies):
        yield NewsItem(title, textwrap.fill(body) + '\n')

class PlainDestination:
“”"
以纯文本的方式显示所有新闻的新闻目的地
“”"

def receive_items(self, items):
    for item in items:
        print(item.title)
        print('-' * len(item.title))
        print(item.body)

class HTMLDestination:
“”"

"""

def __init__(self, filename):
    self.filename = filename

def receive_items(self, items):

    out = open(self.filename, 'w')
    print("""
    <html>
        <head>
            <title>Today's News</title>
        </head>
        <body>
        <h1>Today's News</h1>
    """, file=out)

    print('<ul', file=out)
    id = 0
    for item in items:
        id += 1
        print('<li><a href="#{}">{}</a></li>'.format(id, item.title), file=out)
    print('</ul>', file=out)

    id = 0
    for item in items:
        id += 1
        print('<h2><a name="{}">{}</a></h2>'.format(id, item.title), file=out)
        print('<pre>{}</pre>'.format(item.body), file=out)

    print("""
    </body>
    </html>
    """, file=out)

def runDefaultSetup():
“”"
默认的新闻源和目的地配置,
“”"

agent = Newsagent()

reuters_url = 'http://www.reuters.com/news/world'
reuters_title = r'<h2><a href = "[^"]*"\s*>(.*?)</a>'
reuters_body = r'</h2><p>(.*?)</p>'
reuters = SimpleWebSource(reuters_url, reuters_title, reuters_body)

agent.add_source(reuters)

clpa_server = 'news.ntnu.no'
clpa_group = 'comp.lang.python.announce'
clpa_howmany = 10
clpa = NNTPsource(clpa_server, clpa_group, clpa_howmany)

agent.add_source(clpa)

agent.addDestination(PlainDestination())
agent.addDestination(HTMLDestination('news.html'))

agent.distribute()

if name == ‘main’: runDefaultSetup()

出现报错:
/usr/local/bin/python3.6 /Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py
Traceback (most recent call last):
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1318, in do_open
encode_chunked=req.has_header(‘Transfer-encoding’))
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1239, in request
self._send_request(method, url, body, headers, encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1285, in _send_request
self.endheaders(body, encode_chunked=encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1234, in endheaders
self._send_output(message_body, encode_chunked=encode_chunked)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 1026, in _send_output
self.send(msg)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 964, in send
self.connect()
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/http/client.py”, line 936, in connect
(self.host,self.port), self.timeout, self.source_address)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py”, line 724, in create_connection
raise err
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/socket.py”, line 713, in create_connection
sock.connect(sa)
TimeoutError: [Errno 60] Operation timed out

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 166, in
if name == ‘main’: runDefaultSetup()
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 163, in runDefaultSetup
agent.distribute()
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 28, in distribute
items.extend(source.get_items())
File “/Users/ben.fei/Desktop/python基础教程/23zhang/newagent2.py”, line 81, in get_items
text = urlopen(self.url).read().decode(self.encoding)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 223, in urlopen
return opener.open(url, data, timeout)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 526, in open
response = self._open(req, data)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 544, in _open
‘_open’, req)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 504, in _call_chain
result = func(*args)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1346, in http_open
return self.do_open(http.client.HTTPConnection, req)
File “/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/urllib/request.py”, line 1320, in do_open
raise URLError(err)
urllib.error.URLError: <urlopen error [Errno 60] Operation timed out>

Process finished with exit code 1
请问是为什么呢

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值