【无标题】

最新推荐文章于 2025-05-11 08:42:46 发布

爬虫小韩

最新推荐文章于 2025-05-11 08:42:46 发布

阅读量653

点赞数

文章标签： python

本文链接：https://blog.youkuaiyun.com/hanqianglj/article/details/124129103

版权

import string
import requests
from urllib import request, error
import time
from bs4 import BeautifulSoup
import os
import chardet
import array as arr
from lxml import etree

url = ‘http://ggzyjy.xzfwzx.putian.gov.cn/images/bg.jpg’

re = requests.get(url)

cookies = re.cookies

cookie = requests.utils.dict_from_cookiejar(cookies)

head = re.headers

re.encoding=requests.utils.get_encodings_from_content(re.text)

print(cookie,head, re.encoding)

def http_take(url1, url2= None, head=None, body=None):
‘’’
外部配置body
body{
‘username’:‘xxx’
‘password’:‘xxx’
}
‘’’
session = requests.session()
resp = session.post(url1, head, body)
‘’’
需要获取到
Connection: # 如果是keep-alive则表示是长连接
Content-Encoding # 判断内容编码格式，选取解码格式
Content-Type # 判断文件类型，如是文本文件需要进行解码
Cookie # 用户身份识别标识，用于http长连接
Status Code # 请求的响应状态，判断错误原因

'''
resp2 = session.get(url2, head)



return resp.status_code, resp2, session

resp = requests.get(url)

resp.encoding = chardet.detect(resp.content)[‘encoding’]

cookie = resp.cookies

print(cookie)

def get_Cookies():
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4515.159 Safari/537.36’
}
url = ‘http://ggzyjy.xzfwzx.putian.gov.cn/ptsq/005002/005002003/005002003001/subPageright.html’
session = requests.session()
s = session.post(url,headers=headers)
s.encoding=requests.utils.get_encodings_from_content(s.text)
cookie = session.cookies
a = cookie.get_dict()
return a, s
print(get_Cookies())