aiohttp_requests库的学习 json(content_type='??')

本文介绍了一个关于使用aiohttp库进行异步HTTP请求时遇到的问题:尝试以不正确的MIME类型解码JSON数据导致的ContentTypeError错误。通过调整aiohttp的json解析参数,成功解决了这一问题。
from aiohttp_requests import requests

async def req(URL):
    headers = {"Accept": 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        "Accept-Language": "zh-CN,zh;q=0.9", "Host": "xin.baidu.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", }
    resp = await requests.get(URL,headers=headers,timeout=10,verify_ssl=False)
    resp_text = await resp.json(encoding='utf-8')
    return resp_text

if __name__ == "__main__":
    print("HELLO WORLD")
    URL = "https://xin.baidu.com/detail/basicAjax?pid=xlTM-TogKuTw9PzC-u6VwZxUBuZ5J7WMewmd"
    loop = asyncio.get_event_loop()
    loop.run_until_complete(req(URL))

运行上面的程序会报错

aiohttp.client_exceptions.ContentTypeError: 0, message='Attempt to decode JSON with unexpected mimetype: text/html; charset=utf-8'

解决方案

from aiohttp_requests import requests

async def req(URL):
    headers = {"Accept": 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        "Accept-Language": "zh-CN,zh;q=0.9", "Host": "xin.baidu.com",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36", }
    resp = await requests.get(URL,headers=headers,timeout=10,verify_ssl=False)
    resp_text = await resp.json(content_type='text/html',encoding='utf-8')
    return resp_text

if __name__ == "__main__":
    print("HELLO WORLD")
    URL = "https://xin.baidu.com/detail/basicAjax?pid=xlTM-TogKuTw9PzC-u6VwZxUBuZ5J7WMewmd"
    loop = asyncio.get_event_loop()
    loop.run_until_complete(req(URL))

 

import requests import sseclient import json import time from datetime import datetime import logging # 配置日志记录器,将日志输出到group_log.log文件 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def assistant_service(query: str, citics_cus_app_id: str): data = { 'messages': [], 'question_time': datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'message_id': 1, 'question': [{'type': 'text', 'value': query}], 'web_search_flag': False, 'session_id': '', 'app_id': citics_cus_app_id, "stream":False } url = 'http://ai-api.citicsinfo.com/assistant/chat' headers = { 'API-KEY': 'CAP_ass1stanT', } response = requests.post(url, json=data, headers=headers, stream=False) return response.json() def agent_callback(message): agent_id = message['agent_id'] query = message['query'] resp = assistant_service(query=query, citics_cus_app_id=agent_id) return resp['answer'][0]['total_answer'] def receive_message(): msg = agent_consumer.receive() message_data = msg.data().decode('utf-8') message = json.loads(message_data) content = agent_callback(message) agent_consumer.acknowledge(msg) logging.info(content) return message_data if __name__=='__main__': from db.client import pulsar_client from pulsar import ConsumerType agent_consumer = pulsar_client.subscribe(topic='iops/cap/wework_group_agent', subscription_name='fundop-subsrcibe-shared', consumer_type=ConsumerType.Shared) while True: message_data = receive_message() time.sleep(0.5) 将这个代码改写成异步任务
07-23
我现在有两个脚本,login.py和zaixian.py,两个脚本有依赖关系,现在需要你把两个脚本优化合并成1个脚本,要求合并后的脚本功能不变。请完整输出合并后的脚本内容 login.py的原始脚本内容是: #!/usr/bin/python3 # coding=utf-8 import io import sys import time import requests import json import re import base64 from urllib.parse import urlparse, urljoin, quote import urllib3 import gzip import zlib import brotli import chardet from typing import Optional, Tuple, Dict # 禁用SSL警告 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') SUC_RES = { 'resCode': 200, 'resTime': 0, 'keyword': 'SUCCESS', 'message': [] } FAIL_RES = { 'resCode': 500, 'resTime': 0, 'keyword': 'FAILED', 'message': [] } # 封装解码 class HttpResponseProcessor: def __init__(self, url: str, headers: Optional[Dict] = None): """ 初始化响应处理器 :param url: 请求的URL :param headers: 请求头,默认为None """ self.url = url self.headers = headers or {} self.response = None self.raw_content = None self.text_content = None self.encoding = None self.status_code = None def fetch_response(self): """ 发送HTTP请求并获取响应 :return: None """ try: self.response = requests.get( url=self.url, headers=self.headers, allow_redirects=False, # 禁用自动重定向 stream=True, # 流模式获取原始响应 verify=False ) self.status_code = self.response.status_code self.raw_content = self.response.content except Exception as e: raise Exception(f"请求失败: {str(e)}") def print_response_headers(self): """ 打印响应头信息 :return: None """ if not self.response: raise Exception("尚未获取响应,请先调用 fetch_response()") def decode_content(self) -> str: """ 尝试解码内容为文本 :return: 解码后的文本内容 """ if not self.raw_content: raise Exception("尚未获取原始内容,请先调用 fetch_response()") try: # 检测内容编码 result = chardet.detect(self.raw_content) encoding_detected = result.get('encoding') if result else None # 尝试解码 if encoding_detected: try: self.text_content = self.raw_content.decode(encoding_detected) self.encoding = encoding_detected return self.text_content except UnicodeDecodeError: # 如果检测到的编码解码失败,则尝试其他编码 pass # 尝试常见编码 for encoding in ['utf-8', 'gbk', 'gb2312', 'latin1']: try: self.text_content = self.raw_content.decode(encoding) self.encoding = encoding break except: continue else: # 如果都无法解码,则使用替换错误字符的方式解码 try: self.text_content = self.raw_content.decode('utf-8', errors='replace') self.encoding = 'utf-8' except: self.text_content = "无法解码内容" self.encoding = None return self.text_content except Exception as e: # 将内容保存到文件以便分析 with open('response.bin', 'wb') as f: f.write(self.raw_content) raise Exception(f"内容解码失败: {str(e)}") def process_response(self) -> Tuple[int, Optional[str], Optional[str]]: """ 完整处理响应的便捷方法 :return: (status_code, text_content, encoding) """ self.fetch_response() self.print_response_headers() text = self.decode_content() return self.status_code, text, self.encoding def print_err_result(e): FAIL_RES['error'] = e exit(1) def make_request(url, params=None, data=None, method='get', session=None): try: start = time.time() req_func = session.get if session else requests.get if method.lower() == 'post': req_func = session.post if session else requests.post headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', 'Accept': 'application/json,text/plain,text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'X-Requested-With': 'XMLHttpRequest', 'accept-encoding': 'gzip, deflate, br,zstd' } response = req_func( url, params=params, data=data, verify=False, headers=headers ) res_time = (time.time() - start) * 1000 if response.status_code in [200, 302]: SUC_RES['resTime'] = int(res_time) SUC_RES['message'].append(f"请求 {url} 成功") return response else: FAIL_RES[ 'error'] = f"请求失败,状态码: {response.status_code}, 响应内容: {response.text}, 头信息:{session.headers if session else None}" FAIL_RES['message'].append(f"请求 {url} 失败") return None except Exception as e: print_err_result(f"请求过程中发生错误: {str(e)}") return None def cas_login(username, password) -> Tuple[Optional[str], Optional[dict]]: # 使用会话保持cookies session = requests.Session() token = None try: # 第一步:获取lt令牌 params1 = { 'service': 'https://www.fifedu.com/iplat/ssoservice', 'get-lt': 'true', 'n': str(int(time.time() * 1000)), 'callback': 'jsonpcallback', '_': str(int(time.time() * 1000)) } url1 = "https://cycore.fifedu.com/cas-server/login" response1 = make_request(url1, params=params1, session=session) if not response1: return None, {} # 1. 检查响应是否以jsonpcallback开头 if not response1.text.startswith('jsonpcallback'): raise ValueError("响应格式不符合预期,不是JSONP格式") # 2. 提取括号内的JSON部分 json_str = response1.text[len('jsonpcallback('):-2] # 去掉首尾的jsonpcallback(); # 3. 将字符串解析为字典 try: data = json.loads(json_str) except json.JSONDecodeError: raise ValueError("JSON解析失败,响应内容: " + response1.text) # 4. 提取所需的值 lt = data.get('lt', '') execution = data.get('execution', '') if not lt or not execution: raise ValueError("响应中缺少lt或execution字段") # 第二步:提交登录表单 # 注意:密码是base64编码的,但这里我们直接使用传入的密码(原始代码中密码是base64编码的,所以这里我们直接使用) # 实际上,在登录请求中,密码应该是明文还是编码?根据观察,原始代码中密码是base64编码的,但登录表单提交的是原始密码还是编码后的? # 由于我们传入的password已经是base64编码(从get_credentials中获取的),但实际登录接口可能需要明文,所以这里需要先解码? # 但是,在原始代码中,密码是直接以base64字符串形式传入的,而登录接口是否要求base64编码?需要根据实际接口要求。 # 由于我们不清楚,所以先按照原始代码的方式,直接传入base64字符串作为密码。 data2 = { 'service': 'https://cycore.fifedu.com/iplat/ssoservice', 'callback': 'logincallback', 'isajax': 'true', 'isframe': 'true', '_eventId': 'submit', 'serviceURL': 'null', 'lt': lt, 'type': 'pwd', 'execution': execution, 'username': username, 'password': password, '_': str(int(time.time() * 1000)) } url2 = "https://cycore.fifedu.com/cas-server/login" response2 = make_request(url2, data=data2, method='post', session=session) if not response2: return None, {} # 检查登录是否成功 response_text = response2.text.strip() if response_text.startswith("logincallback"): json_str = response_text[len("logincallback("):-2] try: login_result = json.loads(json_str) except json.JSONDecodeError: raise ValueError("登录响应JSON解析失败: " + response_text) token = login_result.get("token", "") ticket = login_result.get("ticket", "") if not token or not ticket: raise ValueError("登录响应中缺少token或ticket") else: raise ValueError("登录响应格式不符合预期: " + response_text) # 第三步:ssosevice跳转 params3 = { 'callback': f'jQuery{int(time.time() * 1000000)}_{int(time.time() * 1000)}', 'action': 'login', '_': str(int(time.time() * 1000)) } # 更新请求头,注意:这里我们不再手动设置Cookie,而是由session自动管理 session.headers.update({ 'Referer': 'https://www.fifedu.com/iplat/fifLogin/scuai/index.html?service=https://assess.fifedu.com/testcenter/home/teacher_index', 'Accept': '*/*', 'Accept-encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36' }) url3 = "https://www.fifedu.com/iplat/ssoservice" response3 = session.get( url3, params=params3, allow_redirects=True, verify=False ) if not response3: return None, {} # 第四步:跳转到目标页面 params4 = { 'nextPage': 'https://assess.fifedu.com/testcenter/home/teacher_index', } url4 = "https://www.fifedu.com/iplat/ssoservice" # 注意:这里我们不再手动设置Cookie头,而是由session自动管理 session.headers.update({ 'Referer': 'https://www.fifedu.com/iplat/fifLogin/scuai/index.html?service=https://assess.fifedu.com/testcenter/home/teacher_index', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', 'Accept-encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'priority': 'u=0, i', 'Upgrade-Insecure-Requests': '1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36', }) response4 = session.get(url4, params=params4, verify=False) if not response4: return None, {} # ...(前面的代码保持不变)... # 第五步:跳转到业务接口 url5 = "https://assess.fifedu.com/testcenter/home/getUser" session.headers.update({ 'Referer': 'https://assess.fifedu.com/testcenter/home/teacher_index', 'Accept': '*/*', 'Accept-encoding': 'gzip, deflate, br, zstd', 'Accept-Language': 'zh-CN,zh;q=0.9', 'priority': 'u=0, i', 'Cookie': f'prod-token={token}', # 设置token到Cookie 'cache-control': 'no-cache', 'pragma': 'no-cache', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36' }) response5 = session.get(url5, verify=False) if not response5: return None, {} # 检查第五步的响应 print(f"\n===== 第五步响应信息 =====") print(f"响应码: {response5.status_code}") print(f"响应内容 (前1000字符):\n{response5.text[:1000]}") print(f"Session Cookies: {session.cookies.get_dict()}") print(f"Token: {token}") print("=" * 30) # 获取session中的cookies sess = session.cookies.get_dict() if token and sess: return token, sess else: return None, {} # ...(后面的代码保持不变)... except Exception as e: print(f"CAS登录过程中发生错误: {str(e)}", file=sys.stderr) return None, {} def get_credentials(): username = "jffwbc1" password = "R2pjcHgxMjMhQCM=" # base64编码的密码 # 注意:这里我们不进行解码,因为登录函数中直接使用了这个base64字符串作为密码。 # 但是,根据实际接口,可能需要明文密码,那么就需要先解码: # password = base64.b64decode(password).decode('utf-8') # 但是,原始代码中直接使用base64字符串作为密码,所以我们先保持原样。 return cas_login(username, password) if __name__ == '__main__': username = "jffwbc1" password = "R2pjcHgxMjMhQCM=" token, sess = cas_login(username, password) if token and sess: print("登录成功!") print(f"Token: {token}") print(f"Session Cookies: {sess}") else: print("登录失败!") zaixian.py的原始脚本内容是: #!/usr/bin/python3 # coding=utf-8 from zaixian_cas_login import HttpResponseProcessor from zaixian_cas_login import get_credentials import sys import time import requests import json from urllib.parse import urlparse, urljoin SUC_RES = { 'resCode': 200, 'resTime': 0, 'keyword': 'SUCCESS', 'message': "调用成功", 'apiMessage': None # 新增字段,用于存储接口返回的message } FAIL_RES = { 'resCode': 500, 'resTime': 0, 'keyword': 'FAILED', 'message': "调用失败", 'apiMessage': None # 新增字段,用于存储接口返回的message } def print_err_result(e): FAIL_RES['error'] = e print(json.dumps(FAIL_RES, ensure_ascii=False)) exit(1) def _requests(full_url, params='{}'): try: # 处理null参数的情况 if params is None or params.lower() == 'null': params = '{}' # 解析参数 param = params.replace("'", '"') pars = json.loads(param) # 获取请求数据,默认为空字典 data = pars.get('data', {}) # 获取请求方法,默认为GET method = pars.get('method', 'GET').upper() # 获取期望的message值,用于断言判断 expected_message = pars.get('expectedMessage', None) # 添加协议前缀(如果不存在) if not full_url.startswith(('http://', 'https://')): full_url = 'https://' + full_url # 解析URL以验证格式 parsed_url = urlparse(full_url) if not parsed_url.netloc: raise ValueError("无效的URL格式,缺少域名部分") # 确保路径以/开头 if not parsed_url.path.startswith('/'): full_url = urljoin(full_url, '/') isSuccess = True start = time.time() response_data = None # 用于存储解析后的响应数据 api_message = None # 用于存储接口返回的message try: # 获取token和session token, sess = get_credentials() if token is None or sess is None: raise ValueError("无法获取有效的token或session") # 设置请求头,包括Cookie和Session headers = { 'Cookie': f'prod-token={token}', 'Content-Type': 'application/json' } # 根据method参数决定使用GET还是POST if method == 'POST': res = requests.post(url=full_url, json=data, headers=headers, verify=False) else: res = requests.get(url=full_url, json=data, headers=headers, verify=False) # 新增解压解码处理过程 processor = HttpResponseProcessor(full_url, headers=res.headers) processor.response = res processor.raw_content = res.content processor.status_code = res.status_code try: # 解码内容 text_content = processor.decode_content() # 如果内容是JSON,可以解析为字典 try: response_data = json.loads(text_content) # 尝试获取接口返回的message字段 api_message = response_data.get('message', None) except json.JSONDecodeError: pass except Exception as e: raise e except requests.exceptions.SSLError as e: message = 'SSL证书验证失败' FAIL_RES['message'] = message print_err_result(str(e)) except Exception as e: message = '调用出现异常' FAIL_RES['message'] = message print_err_result(str(e)) # 计算耗时 res_time = (time.time() - start) * 1000 # 解析响应 try: if response_data is None: response_data = res.json() statusCode = response_data.get('statusCode', res.status_code) except ValueError: statusCode = res.status_code # 判断请求是否成功 if 200 != res.status_code: isSuccess = False FAIL_RES['error'] = '调用网关拨测中间服务失败' message = 'resCode:' + str(res.status_code) FAIL_RES['message'] = message FAIL_RES['apiMessage'] = api_message if 200 != statusCode: isSuccess = False FAIL_RES['error'] = '调用失败' try: message = 'resInfo:' + str(response_data.get('responseBody', 'No response body')) FAIL_RES['message'] = message FAIL_RES['apiMessage'] = api_message except (ValueError, AttributeError): message = 'resInfo: Invalid JSON response' FAIL_RES['message'] = message FAIL_RES['apiMessage'] = api_message # 成功处理 SUC_RES['resTime'] = int(res_time) SUC_RES['apiMessage'] = api_message # 如果有预期的message值,进行断言判断 if expected_message is not None and api_message != expected_message: isSuccess = False FAIL_RES['error'] = 'message断言失败' FAIL_RES['message'] = f"接口返回的message({api_message})与预期({expected_message})不符" FAIL_RES['apiMessage'] = api_message # 输出结果 if isSuccess: print(json.dumps(SUC_RES, ensure_ascii=False)) else: print(json.dumps(FAIL_RES, ensure_ascii=False)) except json.JSONDecodeError as e: print_err_result(f"JSON参数解析失败: {str(e)}") except ValueError as e: print_err_result(str(e)) except Exception as e: print_err_result(f"未知错误: {str(e)}") if __name__ == '__main__': # args = sys.argv[1:] # if len(args) < 1: # raise Exception(''' # 参数不足 # 用法: ./http_requests.py 完整URL [JSON参数] # 示例: ./http_requests.py "https://api.example.com/endpoint" '{"data":{"key":"value"}, "method":"POST", "expectedMessage":"预期消息"}' # ''') # # full_url = args[0] # # 处理null参数的情况 # params = args[1] if len(args) > 1 and args[1].lower() != 'null' else '{}' full_url = "https://assess.fifedu.com/testcenter/home/getSysSubjectList" params = '{"data":{"isSwitch":""},"method":"POST"}' _requests(full_url, params) 我现在有两个脚本,login.py和zaixian.py,两个脚本有依赖关系,现在需要你把两个脚本优化合并成1个脚本,要求合并后的脚本功能不变。请完整输出合并后的脚本内容
08-08
评论 4
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值