Div和Span.htm

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">


<html xmlns="http://www.w3.org/1999/xhtml">
<head>
    <title></title>


    <link href="StyleSheet.css" rel="stylesheet" type="text/css" />
    <style type="text/css">
      input.cls
      {
          background-color:Silver;
          width:auto;
          height:auto;
          
          }
       div.cls
       {
           background-color:Yellow;
           width:500px;
           height:200px;
           
           }
    </style>


</head>
<body>
    <input type="button" name="button_1" value="我是按钮"  class="cls"/>
    <input type="text" name="text_1" value="我是文本框"  class="cls" />
    <br />
     <input type="button" name="button_1" value="我是按钮"  class="cls"/>
    <input type="text" name="text_1" value="我是文本框"  class="cls" />
    <br />
    
    <div class="cls">
        我是层!!!
    </div>


    <div> 
        <p>
            <span>我是span</span>
        </p>  
    </div>


</body>
</html>
import requests import random import parsel visited_urls = set() # 保存已经访问过的链接 page = 1 while True: # 循环爬取 url = f'https://travel.qunar.com/travelbook/list.htm?page={page}&order=hot_heat' html_data = requests.get(url).text selector = parsel.Selector(html_data) lis = selector.css('.list_item h2 a::attr(href)').getall() # 遍历当前页面中的所有链接 for li in lis: detail_url = 'https://travel.qunar.com' + li if detail_url in visited_urls: # 如果链接已经访问过,则跳过 continue visited_urls.add(detail_url) # 将链接加入集合中 detail_html = requests.get(detail_url).text time.sleep(random.randint(3, 5)) detail_selector = parsel.Selector(detail_html) title = detail_selector.css('.b_crumb_cont *:nth-child(3)::text').get() comment = detail_selector.css('.title.white::text').get() view_count = detail_selector.css('.view_count::text').get() date = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.when > p > span.data::text').get() days = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.howlong > p > span.data::text').get() money = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.howmuch > p > span.data::text').get() character = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.who > p > span.data::text').get() play_list = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.how > p > span.data > span::text').getall() for play in play_list: print(play) print(title, comment, view_count, date, days, money, character, play_list, detail_url) time.sleep(random.randint(3, 5)) # 判断是否需要继续爬取 if len(lis) == 0 or page >= 20: break page += 1这段代码怎么将爬取出来的所有数据保存到csv文件
06-13
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值