scrapy框架学习

最新推荐文章于 2025-12-01 22:36:13 发布

原创最新推荐文章于 2025-12-01 22:36:13 发布 · 133 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#scrapy #学习 #python

import scrapy


class ItcastSpider(scrapy.Spider):
    name = "itcast"
    allowed_domains = ["itcast.cn"]
    start_urls = ["https://www.itheima.com/teacher.html#ajavaee?cz-pc-dh"]

    def parse(self, response):
        # with open('itcast.html', "wb") as f:
        #     f.write(response.body)
        # pass

        # node_list = response.xpath('//html/body/div[1]/div[6]/div/div[2]/div[6]/div/div[2]/div/ul/li/div')
        node_list = response.xpath('//div[@class="li_txt"]')

        for node in node_list:
            temp = {}

            temp['name'] = node.xpath('./h3/text()')[0].extract()
            temp['title'] = node.xpath('./h4/text()')[0].extract()
            temp['desc'] = node.xpath('./p/text()')[0].extract()

            yield temp
            # print(temp, '---temp')
            # break
        # print(node_list, '---')