python爬虫-股吧

最新推荐文章于 2024-03-30 13:15:46 发布

山林里的迷路人

最新推荐文章于 2024-03-30 13:15:46 发布

阅读量754

点赞数 1

分类专栏： python爬虫

本文链接：https://blog.youkuaiyun.com/weixin_44822403/article/details/119707610

版权

python爬虫专栏收录该内容

5 篇文章

订阅专栏

import requests
from bs4 import BeautifulSoup
import time

headers={
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"
    }


def parse_onepage(page):
    global count
    global t
    global t1
    
    a = requests.get("https://guba.eastmoney.com/default,1_{}.html".format(page*20),headers=headers)
    soup = BeautifulSoup(a.text,"lxml")
    items = soup.findAll("ul",{"class":"newlist"})[0]
    items = items.findAll("li")
    for i in items:
        read = i.findAll("cite")[0]
        read = read.text.replace(" ","")
        read = read.replace("\n","")
        read = read.replace("\r","")
        
        comment = i.findAll("cite")[1]
        comment = comment.text.replace(" ","")
        comment = comment.replace("\n","")
        comment = comment.replace("\r","")
        
        title = i.span.a.attrs["title"]
        
        source = i.findAll("cite")[2].text
        source=source.replace("\n","")
        
        t = i.findAll("cite")[3].text
        
        if k > 1:
            if t == t1:
                print("跳过此条")
                continue
        
        print([read,comment,title,source,t])
        count += 1
        print(count)
        with open("股吧1.csv","a") as f:
            f.write("{},{},{},{},{}\n".format(read,comment,title,source,t))

count = 1
for k in range(1400):
    if k > 1:
        t1=t
    parse_onepage(k)
    time.sleep(0.5)