查找某人在某微博裏留下的所有留言

最新推荐文章于 2024-11-23 19:45:46 发布

KalariKong

最新推荐文章于 2024-11-23 19:45:46 发布

阅读量3.4k

点赞数 1

CC 4.0 BY-SA版权

分类专栏： Python黑科技

本文链接：https://blog.youkuaiyun.com/ssy8stephy/article/details/51013339

Python黑科技专栏收录该内容

20 篇文章

订阅专栏

本文介绍了一种使用Python爬取指定微博用户页面上特定评论ID链接的方法。通过发送请求获取网页内容，并利用正则表达式解析页面来抓取评论链接。此外，还包括了一个读取文本文件中的链接并进一步处理的流程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

#coding=utf-8
__author__ = 'user'
import requests
import sys
import re
import time

def getcomment(url,cook,commentid):
    urlre=requests.get(url,cookies=cook)
    html=urlre.text
    cook2=urlre.cookies
    print(html.encode('utf-8'))
    print('')

    mystate2=re.findall('</a> <a href="http://weibo.cn/comment/(.*?)&rl=0#cmtfrm(.*?)</a>',html,re.S)
    k=0

    f=open('1.txt','r')
    a=f.read()
    f.close()
    f=open('1.txt','w+')
    f.write(a)

    for i in mystate2:
        k=k+1
        meow=re.findall('\[(.*?)\]',i[1],re.S)
        if meow[0]!='0':
            ans=re.search(commentid,i[0])
            if ans:
                commentlink='http://weibo.cn/comment/'+i[0]+'&rl=0#cmtfrm'
                #print (k)
                #print (commentlink)
                f.write(commentlink+'\n')

    f.close()

    print (k)
    if k<1:
        time.sleep(2)
        getcomment(url,cook,commentid)

if __name__=='__main__':
    cook={"Cookie":""}

    for i in range(1,76+1):
        print('start%d'%i)
        url='http://weibo.cn/u/111111?page=%d'%i
        commentid='111111'
        getcomment(url,cook,commentid)

</pre><pre name="code" class="python"><pre name="code" class="python">#coding=utf-8
__author__ = 'user'
import requests
import sys
import re
import time

#<meta http-equiv="refresh"


def readtxt():
    k=0
    ff=open('1.txt','r')
    a=ff.readline()
    while a:
        k=k+1
        print(k)
        if k>=1:
            print(a)
            test(a,a)
        a=ff.readline()
    ff.close()

def test(url0,url):
    cook={"Cookie":""}


    urlre=requests.get(url,cookies=cook)
    html=urlre.text
    print('read')
    if re.search('comment',html,re.S):
        zy=re.search('123456',html,re.S)
        if zy:
            f=open('2.txt','r')
            text=f.read()
            f.close()
            f=open('2.txt','w+')
            f.write(text)
            f.write(url0+'\n')
            f.close()
        else:
            nexturl=re.search('method="post"><div><a href="(.*?)">',html,re.S)
            if nexturl:
                nexturl=nexturl.group(1)
                nexturl='http://weibo.cn'+nexturl
                nexturl=re.sub('&','&',nexturl)
                test(url0,nexturl)
            else:
                print(html.encode('utf-8'))
    else:
        time.sleep(1)
        test(url0,url)

if __name__=='__main__':
    readtxt()

最後123456在111111的微博下留下的留言的LINK都在2.txt中了。