# 爬取兄弟连老师信息,先建一个teacher文件夹
from bs4 import BeautifulSoup
import requests
from urllib import request
import json
base_url = 'http://www.itxdl.cn/activity/teacher/teacher_lieibiao/'
response = requests.get(base_url)
response.encoding = 'gb2312'
html = response.text
html = BeautifulSoup(html,'lxml')
# 所有老师的div
teacher_list = html.select('div.php_jiangshi_liebiao')
teachers = []
with open('teacher.json','w',encoding='utf-8') as f:
for teacher in teacher_list:
name = teacher.select('h1')[0].text
name = name.strip()
industry = teacher.select('p')[0].text
img = teacher.select('div.php_jiangshi_img img')[0]['src']
fname = img.split('/')[-1]
request.urlretrieve(img,'./teacher/' + fname)
item = {
'name' : name,
'industry' : industry,
'img' : img
}
teachers.append(item)
f.write(json.dumps(teachers,indent=4,ensure_ascii=False))
# 还有一个自动生成的老师图片信息
兄弟连学python
Python学习交流、资源共享群:563626388 QQ