python爬虫-股吧

import requests
from bs4 import BeautifulSoup
import time

headers={
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
    }


def parse_onepage(page):
    global count
    global t
    global t1
    
    a = requests.get("https://guba.eastmoney.com/default,1_{}.html".format(page*20),headers=headers)
    soup = BeautifulSoup(a.text,"lxml")
    items = soup.findAll("ul",{"class":"newlist"})[0]
    items = items.findAll("li")
    for i in items:
        read = i.findAll("cite")[0]
        read = read.text.replace(" ","")
        read = read.replace("\n","")
        read = read.replace("\r","")
        
        comment = i.findAll("cite")[1]
        comment = comment.text.replace(" ","")
        comment = comment.replace("\n","")
        comment = comment.replace("\r","")
        
        title = i.span.a.attrs["title"]
        
        source = i.findAll("cite")[2].text
        source=source.replace("\n","")
        
        t = i.findAll("cite")[3].text
        
        if k > 1:
            if t == t1:
                print("跳过此条")
                continue
        
        print([read,comment,title,source,t])
        count += 1
        print(count)
        with open("股吧1.csv","a") as f:
            f.write("{},{},{},{},{}\n".format(read,comment,title,source,t))

count = 1
for k in range(1400):
    if k > 1:
        t1=t
    parse_onepage(k)
    time.sleep(0.5)
    



评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

山林里的迷路人

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值