urllib学习记录

urllib 记录

import urllib.request

# 1.定义一个url  访问地址
url = "http://www.baidu.com/"

# 2.模拟浏览器向服务器发送请求
response = urllib.request.urlopen(url)

# 3.获取响应中的页面的源码
# read方法,返回的是字节形式的二进制数据
# 我们要将二进制的数据转换为字符串
# 二进制 --》 字符串 解码 decode
content = response.read().decode('utf-8')

# 4.打印数据
print(content)
url = 'https://www.baidu.com'
# https://www.baidu.com/s?tn=49055317_28_hao_pg&ie=utf-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6
# url的组成
# http/https        www.baidu.com     80/443      s       wd  =  周杰伦        #
#   协议                  主机          端口号      路径         参数              锚点
# http 80
# https 443
# mysql 3306
# oracle 1521
# redis 6379
# mongodb 27017

# UA
# User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
}

# 因为urlopen方法中不能存储字典
# 请求对象的定制
# 注意  因为参数顺序的问题  不能直接学url 和headers  中间还有data  所以我们需要关键字传参
request = urllib.request.Request(url=url, headers=headers)

response = urllib.request.urlopen(request)
content = response.read().decode('utf8')
print(content)

get请求的quote方法

# get请求的quote方法

# https://www.baidu.com/s?tn=49055317_28_hao_pg&ie=utf-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6
# %E5%91%A8%E6%9D%B0%E4%BC%A6 unicode编码

# 需求获取https://www.baidu.com/s?tn=49055317_28_hao_pg&ie=utf-8&wd=周杰伦的网页源码
url = 'https://www.baidu.com/s?tn=49055317_28_hao_pg&ie=utf-8&wd='

# 请求对象定制为了解决反爬的第一种手段
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

import urllib.parse
# 将周杰伦三个字变成unicode编码的格式
# 我们需要依赖与urllib.parse
name = urllib.parse.quote('周杰伦')
print(name)

url = url + name
# 请求对象的定制
request = urllib.request.Request(url=url, headers=headers)

# 模拟浏览器向服务发送请求
response = urllib.request.urlopen(request)

# 获取响应的内容
content = response.read().decode('utf-8')

print(content)

对字典数据进行urlencode编码就是将json数据转为urlencode编码

# urlencoded应用场景 : 多个参数的时候
import urllib.parse

data = {
    'wd': '周杰伦',
    'sex': '男'
}

result = urllib.parse.urlencode(data)
print(result)

get请求的urlencode方法使用实例

import urllib.parse
import urllib.request

base_url = "https://www.baidu.com/s?"

data = {
    'wd': '周杰伦',
    'sex': '男',
    'location': '中国台湾省'
}

new_data = urllib.parse.urlencode(data)

# 请求资源路径
url = base_url + new_data

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

# 请求对象的定制
request = urllib.request.Request(url=url,headers=headers)

# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)

# 获取网页源码数据
content = response.read().decode('utf-8')

print(content)

urllib_post请求方法使用

获取百度翻译接口

在这里插入代码片

urllib post请求百度翻译

# _*_ coding : utf-8 _*_
# @Time : 2023/1/11 16:28
# @Author : 李阶熊
# @File : urllib_post请求百度翻译
# @Project : pythonProject

# post 请求
import json
import urllib.request
import urllib.parse

url = 'https://fanyi.baidu.com/sug'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'
}

data = {
    'kw': '我爱你'
}

# post请求的参数  必须要进行编码
data = urllib.parse.urlencode(data).encode('utf-8')

# post的请求参数 是不会拼接在url后面的
request = urllib.request.Request(url=url,data=data,headers=headers)

# 模拟浏览器向服务器发送请求

response = urllib.request.urlopen(request)

# 获取响应的数据
content = response.read().decode('utf-8')
print(type(content))
content = json.loads(content)
print(type(content))
print(content)
# post请求方式的参数 必须编码  data = urllib.parse.urlencode(data).encode('utf-8')
# 参数是放在请求对象定制的方法中 request = urllib.request.Request(url=url,data=data,headers=headers)
# 参数是放在请求对象定制的方法中

urllib post请求百度翻译之详细翻译

# _*_ coding : utf-8 _*_
# @Time : 2023/1/18 17:13
# @Author : 李阶熊
# @File : urllib_post请问百度翻译之详情翻译
# @Project : pythonProject
import urllib.request
import urllib.parse

url = 'https://fanyi.baidu.com/v2transapi?from=zh&to=en'

headers = {
    # 'Accept': '*/*',
    # 'Accept-Encoding': 'gzip, deflate, br',
    # 'Accept-Language': 'zh-CN,zh;q=0.9',
    # 'Acs-Token': '1674029128306_1674033085914_gNF6xEf4eCcQZnLEAQDq4dA12UqNF/gQddV4G64w+VHdFqNXJWTuTGiPN2mIUfYwJeyEQqETIfTkSq1/gnEuDj6SsEPIZSLS4CFyhwsbR9CuWdJF3OTUwVWSKSTFuI+ojk2YGE72wY0tl72XCQ9iqIaiDmKwQHwN1jHRp0UHm0zKkNd24djmoJhjJ6uCOUcYH2U49zkk/VviyDiWswm3q/fx2s9n45C7RRsX5n6fc4e45FVH4/RX3ptG71pa8z28C5+/U/e9vb8eD4svR8cxUAXkxdoqagaxTTgw+O1WMBOEqGBuHCwydqAFiyMq7hARTWXXPyCdNz2KB652DG3C7w==',
    # 'Connection': 'keep-alive',
    # 'Content-Length': '153',
    # 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
    'Cookie': 'BIDUPSID=EEB732549A53EA0043F3E08B1641A2BD; PSTM=1668129337; BAIDUID=EEB732549A53EA00E6279CEFC52A05E2:FG=1; BDUSS=2JmblRGRk1mfmNMa2JJY0ZDNnRIcGxoTmYzaWNPVjFaOG13YUFJR3Zqdk1qdHhqSVFBQUFBJCQAAAAAAAAAAAEAAACn7Ch4Vm9saXRpb26yu8Lkz6YAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMwBtWPMAbVjT; BDUSS_BFESS=2JmblRGRk1mfmNMa2JJY0ZDNnRIcGxoTmYzaWNPVjFaOG13YUFJR3Zqdk1qdHhqSVFBQUFBJCQAAAAAAAAAAAEAAACn7Ch4Vm9saXRpb26yu8Lkz6YAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAMwBtWPMAbVjT; APPGUIDE_10_0_2=1; REALTIME_TRANS_SWITCH=1; FANYI_WORD_SWITCH=1; HISTORY_SWITCH=1; SOUND_SPD_SWITCH=1; SOUND_PREFER_SWITCH=1; BAIDUID_BFESS=EEB732549A53EA00E6279CEFC52A05E2:FG=1; BA_HECTOR=85818l0k0480800h00052l671hsen7l1l; ZFY=soomrl29iucGWlulnuViVnKN4R2AYdRUqwhqjQGdu5E:C; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; BDRCVFR[GHvuHTY4eos]=thN3igd4QH3uhuMuLf8mvqV; delPer=0; PSINO=6; H_PS_PSSID=; Hm_lvt_64ecd82404c51e03dc91cb9e8c025574=1673425168,1674032887; Hm_lpvt_64ecd82404c51e03dc91cb9e8c025574=1674032919; ab_sr=1.0.1_YTFiNGRkZWYzMGM3NmMwOTI4OWUwYTc0MDNlNzU2MzE1MDQzNDBkNDYyYTkzYzQzZGQ4MmZhYWU0MWU2MWFlMTNmNWEzMTVkMzI5YjEyMjA5YzE5MWEwOWI1MTZmYWZjMzczY2IzNjNmZTU3ZDZhMzg0ZGY4ZmJkNTYxNjQ1MjkwYjA1NmMyODNjODhmNzFmNmY1Mzk4NmQwOTkyMzc1M2JlZmQxNmMyYzQ5NWFmMDg1YjNhZTkxNTc1MzA2NTFi; Hm_lvt_246a5e7d3670cfba258184e42d902b31=1674032921; Hm_lpvt_246a5e7d3670cfba258184e42d902b31=1674032922',
    # 'Host': 'fanyi.baidu.com',
    # 'Origin': 'https://fanyi.baidu.com',
    # 'Referer': 'https://fanyi.baidu.com/translate',
    # 'sec-ch-ua': '"Not_A Brand";v="99", "Google Chrome";v="109", "Chromium";v="109"',
    # 'sec-ch-ua-mobile': '?0',
    # 'sec-ch-ua-platform': '"Windows"',
    # 'Sec-Fetch-Dest': 'empty',
    # 'Sec-Fetch-Mode': 'cors',
    # 'Sec-Fetch-Site': 'same-origin',
    # 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
    # 'X-Requested-With': 'XMLHttpRequest',
}

data = {
    'from': 'zh',
    'to': 'en',
    'query': '我是谁',
    'transtype': 'realtime',
    'simple_means_flag': 3,
    'sign': '325815.7046',
    'token': '3255152326bd7f84caa845278f282c2c',
    'domain': 'common'
}

# post 请求的参数, 必须进行编码  并且要调用encode方法
data = urllib.parse.urlencode(data).encode('utf-8')

# 请求对象的定制
request = urllib.request.Request(url=url, data=data, headers=headers)

# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)

# 获取响应的数据
content = response.read().decode('utf-8')
print(content)
import json

obj = json.loads(content)
print(obj)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值