问卷星python自动提交

本文介绍了一种利用Python实现的针对带有验证码的在线问卷调查网站的爬虫技术。通过抓取网页请求、分析Cookie、识别验证码及构造POST数据等步骤,实现了自动化填写并提交问卷的功能。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

这是需要验证码的爬虫,不需要验证码的只需在此爬虫修改一些
1.下载fiddler4(用于抓包)
方式自行百度
2.查看并分析cookie
里面的变量记得保证随机

        'Host': 'www.wjx.cn',#host地址
        'Connection': 'keep-alive',
        'X-Forwarded-For': ip,#自行设置ip,随机ip99%为外国ip,如果要中国大陆的,自行搜索
        'Origin': 'https://www.wjx.cn',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',#伪装浏览器
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': '*/*',
        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',#问卷地址
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep,#cookie是最重要的,如果本cookie不能用,抓包换cookie,记得Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep(这句)变量改成这样
        'RA-Ver': '2.4',
        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',

3.识别验证码
参考http://blog.youkuaiyun.com/gcs1024/article/details/77807537
4.添加data
抓包分析传输数据(选项),每个问卷都不一样
5.
其他的杂项自行分析吧
示例代码

import random
import requests
import urllib.parse
import urllib.request
from PIL import Image
import pytesseract
import os
import random
from time import time,strftime, localtime
import time as t
qid=str(16454455)
rnqian=str(2063096382)
def download(qid,header,i):
        url='https://www.wjx.cn/AntiSpamImageGen.aspx?q='+qid+'&t='+str(int(time() * 1000))
        req = urllib.request.Request(url,headers=header)
        data = urllib.request.urlopen(req).read()
        pic = open('%d.gif'%(i),'wb')
        pic.write(data)
        pic.close()
def binarizing(img): #input: gray image
    threshold=30
    pixdata = img.load()
    w, h = img.size
    for y in range(h):
        for x in range(w):
            if pixdata[x, y] > threshold:
                pixdata[x, y] = 255
            else:
                pixdata[x, y] = 0
    return img
def depoint(img):   #input: gray image
    pixdata = img.load()
    w,h = img.size
    for y in range(1,h-1):
        for x in range(1,w-1):
            count = 0
            if pixdata[x,y-1] > 245:
                count = count + 1
            if pixdata[x,y+1] > 245:
                count = count + 1
            if pixdata[x-1,y] > 245:
                count = count + 1
            if pixdata[x+1,y] > 245:
                count = count + 1
            if count >2:
                pixdata[x,y] = 255
    return img
def shibie(img):
    imgry = img.convert('L')
    threshold = 140
    table = []
    for i in range(256):
        if i < threshold:
            table.append(0)
        else:
            table.append(1)
    out = imgry.point(table, '1')
    print(str(pytesseract.image_to_string(out)).strip())
    return(str(pytesseract.image_to_string(out)).strip())#适用于简单二维码
def post(qid,rnqian,i):
    timeg=str(int(time() * 1000))
    t.sleep(10)
    timep=str(int(time() * 1000))
    ip=str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4))+'.'+str(random.randint(1,4))
    rnhou=str(random.randint(10000000,99999999))
    headerget={
        'Host': 'www.wjx.cn',
        'Connection': 'keep-alive',
        'X-Forwarded-For': ip,
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)      Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',
        'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611;  lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; LastActivityJoin=16276361,101135441472; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316;    Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timeg,
        'RA-Ver': '2.4',
        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',
    }
    headerpost = {
        'Host': 'www.wjx.cn',
        'Connection': 'keep-alive',
        'X-Forwarded-For': ip,
        'Origin': 'https://www.wjx.cn',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36 EXT/6d8a2f10c62d11e7gqpxa53987ed19aa47e3/2.4',
        'Content-Type': 'application/x-www-form-urlencoded',
        'Accept': '*/*',
        'Referer': 'https://www.wjx.cn/jq/'+qid+'.aspx',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cookie': '.ASPXANONYMOUS=Se6Dlf-S0wEkAAAAMzEyZGYyZmUtYzBmYi00YWM3LWIyMTEtMTEzZWI0YzkzMmZhi6xL6iHoMTghIlPoznFqbYuLd1s1; spiderregkey=www.wjx.cn%c2%a7%c2%a71; baidutgkey=%u95EE%u5377%u661FBH%7C2%7Cbaidu; _uab_collina=151065406900158178719624; SojumpSurvey=01022D8896C0612BD508FE2D28A847832BD508000670002D00740065007300740000012F00FF29B0D12A4780F0718D63D71441EC14F08F69B611; lllogcook=1; LastCheckUpdateDate=1; ASP.NET_SessionId=4mbujabo1zx2a1imb0pw40k0; LastActivityJoin=16276361,101135464182; _umdata=C234BF9D3AFA6FE7FD70ECA73142BFB1DAA8AC4CAD8E980472CE17B2B4815B078B6B64C8E7D1428ACD43AD3E795C914CB6CD457CEA3135697A8EEEB6A2679E66; Hm_lvt_21be24c80829bd7a683b2c536fcf520b=1510624314,1510653859,1510658882,1510665316; Hm_lpvt_21be24c80829bd7a683b2c536fcf520b='+timep,
        'RA-Ver': '2.4',
        'RA-Sid': '6d8a2f10c62d11e7gqpxa53987ed19aa47e3',
    }
    download(qid,headerget,i)
    t.sleep(5)
    img = Image.open('%d.gif'%(i)).convert("L")
    img = binarizing(img)
    img = depoint(img)
    yanzhengma=shibie(img)
    timec=str(int(time() * 1000))
    thedata = {'submitdata': '1$'+str(random.randint(1,5))+'}2$'+str(random.randint(1,10))+'}3$'+str(random.randint(1,3))+'}4$'+str(random.randint(1,4))+'}5$1<'+str(random.randint(1,9))+',2<'+str(random.randint(1,5))+',3<'+str(random.randint(1,5))+',4<'+str(random.randint(1,5))+',5<'+str(random.randint(1,5))+',6<'+str(random.randint(1,5))+',7<'+str(random.randint(1,5))+',8<'+str(random.randint(1,5))+',9<'+str(random.randint(1,5))+'}6$'+str(random.randint(1,3))+'}7$'+str(random.randint(1,7))+'}8$'+str(random.randint(1,3))+'|'+str(random.randint(3,6))+'|'+str(random.randint(7,9))+'}9$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}10$'+str(random.randint(1,3))+'}11$'+str(random.randint(1,4))+'}12$1<1,2<4,3<6,4<3,5<8,6<3,7<6,8<5}13$'+str(random.randint(1,4))+'|'+str(random.randint(5,7))+'}14$2|5}15$'+str(random.randint(1,2))+'}16$'+str(random.randint(1,2))+'}17$'+str(random.randint(1,2))+'}18$'+str(random.randint(1,2))+'}19$'+str(random.randint(1,2))+'}20$'+str(random.randint(1,4))+'}21$'+str(random.randint(1,3))}
    url1='https://www.wjx.cn/handler/processjq.ashx?submittype=1&curID='+qid+'&t='+timec+'&starttime='+(str(strftime("%Y/%m/%d%H:%M:%S", localtime())).replace('/','%2F')).replace(':','%3A')+'&validate_text='+str(yanzhengma)+'&rn='+rnqian+'&sd='+('https://www.wjx.cn/'.replace('/','%2F')).replace(':','%3A')
#改rn    
    t.sleep(10)

    r = requests.post(url1, headers = headerpost,data = thedata,allow_redirects=False)
    print(r.text)
main函数(自写)
(可参考http://download.youkuaiyun.com/download/gcs1024/10122645)
main(qid,rnqian)
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值