python爬虫

泰国旅游信息爬取

# _*_ coding:utf-8 _*_ 
import requests
from lxml import etree
import json

class BangkokTravel:
    """
    爬取取曼谷旅游的信息
    """
    def __init__(self):
        pass

    def getPage(self):
        """
        获取页面内容,以及解析出想要的内容
        """
        url = "https://www.mafengwo.cn/sales/0--M11045P泰国-0-0-0-0-0.html"
        headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"}
        html = requests.get(url,headers = headers)
        html.encoding = "utf-8"
        # print(html.text)
        xml = etree.HTML(html.text)
        #返回每个节点的位置
        node_list = xml.xpath('//a[@class="item clearfix"]')
        for node in node_list:

            #获取进入下一个页面的链接 ,,,detail_url
            detail_url = node.xpath('./@href')[0]
            print(detail_url)

            #标题,,,,tittle
            tittle = node.xpath('.//h3/text()')[0].replace("                             ","")
            print(tittle)

            #价格,,,price
            price = node.xpath('.//span[@class="price"]/strong/text()')[0]
            print(price)

            #图片,,,,image_url
            image_url = node.xpath('.//img/@src')[0]
            print(image_url)

            #已售,,,,yet_sale
            yet_sale= node.xpath('.//div[@class="info"]/p[1]/text()')[0]
            print(yet_sale)

            #班期,,,time
            time = node.xpath('.//span[@class="t"]/text()')[0]
            print(time)
            self.write_Content(detail_url,tittle,price,image_url,yet_sale,time)

    def write_Content(self,detail_url,tittle,price,image_url,yet_sale,time):
        """
        对爬取的信息进行保存
        """
        items = {
            "详情连接":detail_url,
            "标题":tittle,
            "价格":price,
            "图片链接":image_url,
            "已经售出":yet_sale,
            "启程日期":time
        }

        with open("泰国.json","a",encoding="utf8") as f:
            f.write(str(json.dumps(items,ensure_ascii=False)+"\n"))
            f.close()
        

        

if __name__ == "__main__":
    bangkok = BangkokTravel()
    bangkok.getPage()

在这里插入图片描述

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值