import string
import requests
from urllib import request, error
import time
from bs4 import BeautifulSoup
import os
import chardet
import array as arr
from lxml import etree
url = ‘http://ggzyjy.xzfwzx.putian.gov.cn/images/bg.jpg’
re = requests.get(url)
cookies = re.cookies
cookie = requests.utils.dict_from_cookiejar(cookies)
head = re.headers
re.encoding=requests.utils.get_encodings_from_content(re.text)
print(cookie,head, re.encoding)
def http_take(url1, url2= None, head=None, body=None):
‘’’
外部配置body
body{
‘username’:‘xxx’
‘password’:‘xxx’
}
‘’’
session = requests.session()
resp = session.post(url1, head, body)
‘’’
需要获取到
Connection: # 如果是keep-alive则表示是长连接
Content-Encoding # 判断内容编码格式,选取解码格式
Content-Type # 判断文件类型,如是文本文件需要进行解码
Cookie # 用户身份识别标识,用于http长连接
Status Code # 请求的响应状态,判断错误原因
'''
resp2 = session.get(url2, head)
return resp.status_code, resp2, session
resp = requests.get(url)
resp.encoding = chardet.detect(resp.content)[‘encoding’]
cookie = resp.cookies
print(cookie)
def get_Cookies():
headers = {
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4515.159 Safari/537.36’
}
url = ‘http://ggzyjy.xzfwzx.putian.gov.cn/ptsq/005002/005002003/005002003001/subPageright.html’
session = requests.session()
s = session.post(url,headers=headers)
s.encoding=requests.utils.get_encodings_from_content(s.text)
cookie = session.cookies
a = cookie.get_dict()
return a, s
print(get_Cookies())