
import scrapy
class ItcastSpider(scrapy.Spider):
name = "itcast"
allowed_domains = ["itcast.cn"]
start_urls = ["https://www.itheima.com/teacher.html#ajavaee?cz-pc-dh"]
def parse(self, response):
# with open('itcast.html', "wb") as f:
# f.write(response.body)
# pass
# node_list = response.xpath('//html/body/div[1]/div[6]/div/div[2]/div[6]/div/div[2]/div/ul/li/div')
node_list = response.xpath('//div[@class="li_txt"]')
for node in node_list:
temp = {}
temp['name'] = node.xpath('./h3/text()')[0].extract()
temp['title'] = node.xpath('./h4/text()')[0].extract()
temp['desc'] = node.xpath('./p/text()')[0].extract()
yield temp
# print(temp, '---temp')
# break
# print(node_list, '---')