一、requests模块GET请求
1. 无参实例
import requests
ret = requests.get('http://www.autohome.com.cn/news/')
ret.encoding='gbk' #改成中文编码
print(ret.url) #打印url
print(ret.text) #打印文本
2.有参实例
import requests
params_dic = {
'hostid': '10107',
'elementid': '23',
'screen': '1',
'name': '10.28.142.240'
}
ret = requests.get(url='http://zabbix.test.com/screens.php', params=params_dic)
print(ret.url)
print(ret.text)
二、requests模块POST请求
1.post基本用法
import requests
data = {
'hostid': '10107',
'elementid': '23',
'screen': '1',
'name': '10.28.142.240'
}
ret = requests.post(' data=data)
print(ret.text)
2.post传数据
import requests
from bs4 import BeautifulSoup
url = 'http://zabbix.kuaikuaidai.com:8888/index.php'
ret = requests.get(url)
soup = BeautifulSoup(ret.text, 'html.parser')
tag = soup.find(attrs={'name': 'sid'})
sid=tag.attrs['value']
tag1 = soup.find(attrs={'name': 'form_refresh'})
form_refresh = tag1.attrs['value']
data = {
'sid': sid,
'form_refresh': form_refresh,
'name': 'admin',
'password': 'kkdai123',
'autologin': '1',
'enter': 'Sign in'
}
headers = {'content-type': 'text/html'}
zabbix_ret = requests.post(url,data=data, headers=headers)
print(zabbix_ret.status_code)
print(zabbix_ret.text)
3.其他请求
requests.get(url, params=None, **kwargs)
requests.post(url, data=None, json=None, **kwargs)
requests.put(url, data=None, **kwargs)
requests.head(url, **kwargs)
requests.delete(url, **kwargs)
requests.patch(url, data=None, **kwargs)
requests.options(url, **kwargs)
注: 以上方法都是通过requests.request(method, url, **kwargs)构建
三、BeautifulSoup模块解析html文档
#获取汽车之家新闻页面
auto_home = requests.get(url='http://www.autohome.com.cn/news/')
auto_home.encoding='gbk'
auto_home_news = auto_home.text
soup = BeautifulSoup(auto_home_news, 'html.parser') #返回文档结构对象
tag = soup.find(name='h3') #找到第一个h3标签
print(tag.name) #打印标签名
1.name标签
tag = soup.find(name='h3')
name = tag.name
tag.name = 'h4' #设置name
2.attr标签属性
print(tag.attrs) #获取标签属性
tag.attrs['id'] = '123' #设置标签属性
3.children所有子标签
tag = soup.find(name='body')
v = tag.children
print(v)
4.descendants所有子孙标签
tag = soup.find(name='body')
v = tag.descendants
print(v)
5.clear清空所有子孙标签
tag = soup.find(name='body')
tag.clear()
print(tag)
6.decompose递归删除所有标签
tag = soup.find(name='body')
tag.decompose()
print(tag)
7.extract递归删除所有标签并返回删除的标签
tag = soup.find(name='body')
v = tag.extract()
print(v)
8.decode,转换为字符串(含当前标签);decode_contents(不含当前标签)
tag = soup.find(name='body')
v = tag.decode()
v1 = tag.decode_contents()
print(v)
9.encode,转换为字节(含当前标签);encode_contents(不含当前标签)
tag = soup.find(name='body')
v = tag.encode()
v1 = tag.encode_contents()
print(v)
10.find获取匹配的第一个标签
tag = soup.find(name='body')
11.find_all获取匹配的所有标签
tag = soup.find_all(name='h3')
12.has_attr判断标签是否有某个属性
tag = soup.find(name='h3')
v = tag.has_attr('id')
print(v)
13.get_text获取标签文本内容
tag = soup.find(name='h3')
v = tag.get_text()
print(v)
14.is_empty_element,是否是空标签(是否可以是空)或者自闭合标签
# tag = soup.find('br')
# v = tag.is_empty_element
# print(v)
15.当前的关联标签
# soup.next
# soup.next_element
# soup.next_elements
# soup.next_sibling
# soup.next_siblings
#
# tag.previous
# tag.previous_element
# tag.previous_elements
# tag.previous_sibling
# tag.previous_siblings
#
# tag.parent
# tag.parents
16.查找某标签的关联标签
# tag.find_next(...)
# tag.find_all_next(...)
# tag.find_next_sibling(...)
# tag.find_next_siblings(...)
# tag.find_previous(...)
# tag.find_all_previous(...)
# tag.find_previous_sibling(...)
# tag.find_previous_siblings(...)
# tag.find_parent(...)
# tag.find_parents(...)
# 参数同find_all
17.string获取修改标签内容
tag = soup.find(name='h3')
tag.string = 'new content' #设置标签内容
v = tag.string
print(v)
18.append在当前标签内部追加一个标签
# tag = soup.find('body')
# tag.append(soup.find('a'))
# print(soup)
#
# from bs4.element import Tag
# obj = Tag(name='i',attrs={'id': 'it'})
# obj.string = '我是一个新来的'
# tag = soup.find('body')
# tag.append(obj)
# print(soup)
19.insert在当前标签内部指定位置插入一个标签
# from bs4.element import Tag
# obj = Tag(name='i', attrs={'id': 'it'})
# obj.string = '我是一个新来的'
# tag = soup.find('body')
# tag.insert(2, obj)
# print(soup)
登录抽屉新热榜点赞
方法一
import requests
i1 = requests.get(url='http://dig.chouti.com/')
i1_cookies = i1.cookies.get_dict()
#print(i1_cookies)
i2 = requests.post(
url="http://dig.chouti.com/login",
data={
'phone': '86xxxxxxxxx',
'password': 'xxxxxxx',
'oneMonth': "",
},
cookies=i1_cookies
)
gpsd = i1_cookies['gpsd']
i3 = requests.post(
url="http://dig.chouti.com/link/vote?linksId=14723416",
cookies={'gpsd': gpsd}
)
print(i3.text)
方法二
import requests
headers = {
"Host": "dig.chouti.com",
"Referer": "http://dig.chouti.com/",
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
'X-Requested-With': 'XMLHttpRequest'
}
session = requests.Session()
i1 = session.get(url="http://dig.chouti.com/",headers=headers)
i2 = session.post(
url="http://dig.chouti.com/login",
data={
'phone': "8615726697022",
'password': 'xxxxxxx',
'oneMonth': ""
},
headers=headers
)
i3 = session.post(
url="http://dig.chouti.com/link/vote?linksId=14723416",headers=headers
)
print(i3.text)
登录github账号显示项目列表
import requests
from bs4 import BeautifulSoup
i1 = requests.get('https://github.com/login')
soup1 = BeautifulSoup(i1.text, features='lxml')
tag = soup1.find(name='input', attrs={'name': 'authenticity_token'})
authenticity_token = tag.get('value')
c1 = i1.cookies.get_dict()
i1.close()
form_data = {
"authenticity_token": authenticity_token,
'utf8': "",
'commit': "Sign in",
'login': '758109577@qq.com',
'password': 'xxxxxxxx'
}
i2 = requests.post('https://github.com/session', data=form_data, cookies=c1)
c2 = i2.cookies.get_dict()
c1.update(c2)
i3 = requests.get('https://github.com/settings/repositories', cookies=c1)
soup3 = BeautifulSoup(i3.text, features='lxml')
list_group = soup3.find(name='div', class_='listgroup')
from bs4.element import Tag
for child in list_group.children:
if isinstance(child, Tag):
project_tag = child.find(name='a', class_='mr-1')
size_tag = child.find(name='small')
temp = "项目: %s(%s); 项目路径: %s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
print(temp)
方法二
import requests
from bs4 import BeautifulSoup
session = requests.Session()
i1 = session.get('https://github.com/login')
soup1 = BeautifulSoup(i1.text,features='lxml')
tag = soup1.find(name='input', attrs={'name': 'authenticity_token'})
authenticity_token = tag.get('value')
c1 = i1.cookies.get_dict()
i1.close()
form_data = {
"authenticity_token": authenticity_token,
'utf8': "",
'commit': "Sign in",
'login': '758109577@qq.com',
'password': 'xxxxxxxxxx'
}
i2 = session.post('https://github.com/session', data=form_data)
c2 = i2.cookies.get_dict()
c1.update(c2)
i3 = session.get('https://github.com/settings/repositories')
soup3 = BeautifulSoup(i3.text, features='lxml')
list_group = soup3.find(name='div', class_='listgroup')
from bs4.element import Tag
for child in list_group.children:
if isinstance(child, Tag):
project_tag = child.find(name='a', class_='mr-1')
size_tag = child.find(name='small')
temp = "项目: %s(%s); 项目路径: %s" % (project_tag.get('href'), size_tag.string, project_tag.string, )
print(temp)
知乎登录
#!/usr/bin/env python
# -*- coding:utf-8 -*-
import time
import requests
from bs4 import BeautifulSoup
session = requests.Session()
i1 = session.get(
url='https://www.zhihu.com/#signin',
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
}
)
soup1 = BeautifulSoup(i1.text, 'lxml')
xsrf_tag = soup1.find(name='input', attrs={'name': '_xsrf'})
xsrf = xsrf_tag.get('value')
current_time = time.time()
i2 = session.get(
url='https://www.zhihu.com/captcha.gif',
params={'r': current_time, 'type': 'login'},
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
})
with open('zhihu.gif', 'wb') as f:
f.write(i2.content)
captcha = input('请打开zhihu.gif文件,查看并输入验证码:')
form_data = {
"_xsrf": xsrf,
'password': 'xxxxxx',
"captcha": captcha,
'email': '7xxxxx@qq.com'
}
i3 = session.post(
url='https://www.zhihu.com/login/email',
data=form_data,
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
}
)
i4 = session.get(
url='https://www.zhihu.com/settings/profile',
headers={
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36',
}
)
soup4 = BeautifulSoup(i4.text, 'lxml')
tag = soup4.find(id='rename-section')
nick_name = tag.find('span',class_='name').string
print(nick_name)
转载于:https://blog.51cto.com/haoyonghui/1972754