排名书名所在标签

import requests
from bs4 import BeautifulSoup
import bs4
def main(page):
url = u'http://bang.dangdang.com/books/fivestars/01.00.00.00.00.00-recent30-0-0-1-'+str(page)
html = requests.get(url)
html.encoding = 'gb2312'
bs = BeautifulSoup(html.content, 'lxml')
a = bs.find_all('div', 'name')
file = open('../file/out.txt', mode='a+', encoding='utf-8')
for i in a:
file.write(str(i)+'\n')
file.close()
if __name__ == '__main__':
for i in range(1, 26):
main(i)
进一步提取文本
import requests
from bs4 import BeautifulSoup
import bs4
def main():
file = open('../file/out1.txt', mode='w+', encoding='utf-8')
with open('../file/out.txt', mode='r', encoding='utf-8') as f:
for i in f:
bs = BeautifulSoup(i, 'lxml')
file.write(str(bs.div.a['title'])+'\n')
file.close()
if __name__ == '__main__':
main()