python用爬虫实现有道词典翻译

本文介绍了使用Python进行有道翻译API调用的代码实现,包括请求头和代理设置。在遇到50错误后,发现可能是由于反爬机制导致,通过删除URL中的特定字符可以继续翻译但翻译质量下降。参考链接提供了一种可能的解决方案,但仅翻译了部分内容。文章最后表达了对研究反爬策略和翻译API优化的兴趣。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import requests


proxies = {
    'http':'http://127.0.0.1:8080',
    'https':'https://127.0.0.1:8080',
}


header = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:83.0) Gecko/20100101 Firefox/83.0',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Referer': 'http://fanyi.youdao.com/',
    'Cookie': 'OUTFOX_SEARCH_USER_ID_NCOO=1422156429.9226038; _ga=GA1.2.1838985276.1602664866; OUTFOX_SEARCH_USER_ID="1822624125@10.108.160.17"; YOUDAO_MOBILE_ACCESS_TYPE=1; JSESSIONID=aaayS7bFJT-DNchaEz9yx; ___rl__test__cookies=1607350003985'
}
postUrl = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
postData = {
    "i": "An issue was discovered in SonicBOOM riscv-boom 3.0.0. For LR, it does not avoid acquiring a reservation in the case where a load translates successfully but still generates an exception.",
    "from": "AUTO",
    "to": "AUTO",
    "smartresult": "dict",
    "client": "fanyideskweb",
    "salt": "16073500039953",
    "sign": "ca5cbc4b7613777d6546e147622e5c47",
    "lts": "1607350003995",
    "bv": "abf85f8020851128b561472c8a7b924d",
    "doctype": "json",
    "version": "2.1",
    "keyfrom": "fanyi.web",
    "action": "FY_BY_REALTlME",
    "Accept": "application/json, text/javascript, */*; q=0.01"
}


def youdaofangyi(postUrl,postData,header,proxies):
    # 登录
    print("开始")
    responseRes = requests.post(url=postUrl,proxies=proxies,data=postData, headers=header)

    print(f"statusCode = {responseRes.status_code}")
    return responseRes

ire = youdaofangyi(postUrl, postData,header,proxies)
print(ire.json())

实现拼接

import json

import requests


header = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:83.0) Gecko/20100101 Firefox/83.0',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Referer': 'http://fanyi.youdao.com/',
    'Cookie': 'OUTFOX_SEARCH_USER_ID_NCOO=1422156429.9226038; _ga=GA1.2.1838985276.1602664866; OUTFOX_SEARCH_USER_ID="1822624125@10.108.160.17"; YOUDAO_MOBILE_ACCESS_TYPE=1; JSESSIONID=aaayS7bFJT-DNchaEz9yx; ___rl__test__cookies=1607350003985'
}
postUrl = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
postData = {
    "i": "An issue was discovered in SonicBOOM riscv-boom 3.0.0. For LR, it does not avoid acquiring a reservation in the case where a load translates successfully but still generates an exception.",
    "from": "AUTO",
    "to": "AUTO",
    "smartresult": "dict",
    "client": "fanyideskweb",
    "salt": "16073500039953",
    "sign": "ca5cbc4b7613777d6546e147622e5c47",
    "lts": "1607350003995",
    "bv": "abf85f8020851128b561472c8a7b924d",
    "doctype": "json",
    "version": "2.1",
    "keyfrom": "fanyi.web",
    "action": "FY_BY_REALTlME",
    "Accept": "application/json, text/javascript, */*; q=0.01"
}

def youdaofangyi(postUrl,postData,header):
    # 登录
    # print("开始")
    responseRes = requests.post(url=postUrl,data=postData, headers=header)
    # print(f"statusCode = {responseRes.status_code}")
    return responseRes

def youdaojieguo(postUrl,postData,header):
    result = youdaofangyi(postUrl,postData,header).json()
    # print(result)
    result1 = result["translateResult"]
    # print('@' * 12)
    # print(result1)

    translateResult = ""
    for re in result1:
        # print('*'*12)
        # print(re)
        for ree in re:
            translateResult = translateResult+ree['tgt']

    return translateResult

    # print(result1)
    # data = json.load(result)
    # translateResul = result
    # print(translateResul)

    # reference_data = json.dumps(translateResul["cve"]["references"]["reference_data"])
    # print(result.json())


youdaojieguo(postUrl,postData,header)
print(youdaojieguo(postUrl,postData,header))

正当我兴冲冲的准备翻译,然后,发现报50错误[哭]

比较了一下发送的包,应该是做了反爬机制。

找到一个解决方案是将url的translate_o中的_o删掉就好,但是!翻译质量直线下降。

参考链接是利用爬虫,但是只翻译前两句?😭白嫖不到

有空研究一下[生活不易,🐱🐱叹息]

参考链接:

https://blog.youkuaiyun.com/mr_amnesia/article/details/109701417

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值