练手
from pyquery import PyQuery as pq
import json
for num in range(11):
doc = pq(url='https://movie.douban.com/top250?start=%d&filter=' % (num*25))
ol = doc.find('ol')
a = ol.find('li').items()
num = 0
for t in a:
film = {
'title': " ".join(t(".title").text().split()),
'directors': " ".join(t.find('p:first-child').text().split()),
'comment': " ".join(t(".inq").text().split()),
'score': " ".join(t(".rating_num").text().split())
}
print(film)
with open('douban.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(film, ensure_ascii=False) + '\n')