给我爬!10-22

该博客介绍了Python网络爬虫的基础操作,包括使用requests库发送GET和POST请求,处理响应状态码、编码问题,下载网页内容和图片,以及设置User-Agent和SSL证书。同时,展示了如何处理请求参数、表单数据,以及使用cookies进行会话管理。

import urllib3
import requests
import json
def write_to_file(filename, html):
f=open(filename,‘w’,encoding=‘utf-8’);
f.write(html);
f.close;

def load_page_with_urllib(filename,res):
html = res.read().decode(‘utf-8’)
print(html)
write_to_file(filename, html)

def load_page_with_requests(filename,res):
html = res.content.decode(‘utf-8’)
print(html)
write_to_file(filename, html)

def send_url():
url=“http://httpbin.org/get”
res=requests.get(url)
print(res.status_code)
print(res.encoding)
print(res.headers)
print(res.text)
load_page_with_requests(‘send_url.html’, res)
print(“Host:”+res.json()[‘headers’][‘Host’])

def send_url_with_pic():
url=“https://www.baidu.com/img/PCtm_d9c8750bed0b3c7d089fa7d55720d6cf.png”
res=requests.get(url)
print(res.status_code)
with open(‘kkk.png’,‘wb’) as f:
f.write(res.content)
f.close()

def send_with_text_encoding():
url=“http://www.google.cn/”
res=requests.get(url)
print(res.headers.get(“Content-Type”))
print(res.encoding)
print(res.apparent_encoding)
print(res.text)
res.encoding=res.apparent_encoding
print(res.text)

def send_with_text_encoding_baidu():
url=“https://www.baidu.com/”
headers={
“User-Agent”:“Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}
res=requests.get(url, headers=headers)
print(res.headers.get(“Content-Type”))
print(res.encoding)
print(res.apparent_encoding)
print(res.text)

def send_get_with_param():
url=“http://cn.bing.com/search”
word = {“q”:“Python网络爬虫”}
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}
res=requests.get(url,params=word,headers=headers)
load_page_with_requests(‘send_get_with_search.html’,res)

def send_post_with_form():
url=“http://fanyi.youdao.com/translate?smartresult=dic&client=fanyideskweb”;
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”
}
formdata = {
“i”: “我觉得你人不错”,
“from”: “AUTO”,
“to”: “AUTO”,
“smartresult”: “dict”,
“client”: “fanyideskweb”,
“salt”: “16348709989680”,
“sign”: “dae5fd6b6f4221ca9cda39bb46c9bfec”,
“lts”: “1634870998968”,
“bv”: “7b07590bbf1761eedb1ff6dbfac3c1f0”,
“doctype”: “json”,
“version”: “2.1”,
“keyfrom”: “fanyi.web”,
“action”: “FY_BY_REALTlME”
}
res=requests.post(url, data=formdata, headers=headers)
print(res.status_code)
print(res.text)
print(“翻译前:”+res.json()[‘translateResult’][0][0][‘src’])
print(“翻译后:”+res.json()[‘translateResult’][0][0][‘tgt’])

def send_with_ssl():
url=“https://kyfw.12306.cn/otn/leftTicket/init”
#res001=requests.get(url)
#print(res001.status_code)
#print(res001.text)

#res002=requests.get(url, verify=False)
#print(res002.status_code)
#print(res002.text)

res003 = requests.get(url, verify="D:/temp.txt")
print(res003.status_code)
print(res003.text)

def send_with_cookies():
url=“http://httpbin.org/cookies”
headers = {
“User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36”,
“Cookie”:“OUTFOX_SEARCH_USER_ID = -1942405773 @ 10.108.160.101;JSESSIONID = aaaUTOVsl72cZgmRt4LYx;OUTFOX_SEARCH_USER_ID_NCOO = 710165529.6036291;fanyi - ad - id = 118539;fanyi - ad - closed = 1;___rl__test__cookies = 1634870989954”
};

res001=requests.get(url,headers=headers)
print(res001.status_code)
print(res001.text)

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"
    #"Cookie":"OUTFOX_SEARCH_USER_ID=-1942405773@10.108.160.101; JSESSIONID=aaaUTOVsl72cZgmRt4LYx; OUTFOX_SEARCH_USER_ID_NCOO=710165529.6036291; fanyi-ad-id=118539; fanyi-ad-closed=1; ___rl__test__cookies=1634870989954"
};
cookies={
    "Cookie": "JSESSIONID=aaaUTOVsl72cZgmRt4LYx; OUTFOX_SEARCH_USER_ID_NCOO=710165529.6036291; fanyi-ad-id=118539; fanyi-ad-closed=1; ___rl__test__cookies=1634870989954"
}
res002 = requests.get(url, headers=headers,cookies=cookies)
print(res002.status_code)
print(res002.text)

if name ==‘main’:
send_url()
send_url_with_pic()
send_with_text_encoding()
send_with_text_encoding_baidu()
send_get_with_param()
send_post_with_form()
#send_with_ssl()
send_with_cookies()

<ul class="thrui"> <li> <div class="th200">2024-01-01 星期一 </div> <div class="th140">23℃</div> <div class="th140">14℃</div> <div class="th140">多云~阴</div> <div class="th140">东南风 2级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-02 星期二 </div> <div class="th140">22℃</div> <div class="th140">14℃</div> <div class="th140">阴~多云</div> <div class="th140">东南风 2级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-03 星期三 </div> <div class="th140">23℃</div> <div class="th140">14℃</div> <div class="th140">多云</div> <div class="th140">东北风 3级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-04 星期四 </div> <div class="th140">23℃</div> <div class="th140">15℃</div> <div class="th140">多云</div> <div class="th140">东南风 1级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-05 星期五 </div> <div class="th140">24℃</div> <div class="th140">16℃</div> <div class="th140">多云</div> <div class="th140">东南风 1级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-06 星期六 </div> <div class="th140">25℃</div> <div class="th140">14℃</div> <div class="th140">多云~阴</div> <div class="th140">南风 1级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-07 星期日 </div> <div class="th140">18℃</div> <div class="th140">14℃</div> <div class="th140">阴</div> <div class="th140">东南风 3级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-08 星期一 </div> <div class="th140">23℃</div> <div class="th140">17℃</div> <div class="th140">晴~多云</div> <div class="th140">东南风 2级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-09 星期二 </div> <div class="th140">26℃</div> <div class="th140">16℃</div> <div class="th140">阴~多云</div> <div class="th140">西北风 1级</div> <!-- <div class="th150"></div> --> </li> <li> <div class="th200">2024-01-10 星期三 </div> <div class="th140">24℃</div> <div class="th140">15℃</div> <div class="th140">晴</div> <div class="th140">东北风 3级</div> <!-- <div class="th150"></div> --> </li> <div class="lishidesc2" style="display: none;">查看更多<img src="/static/images/ckgd.png" alt=""></div> <li><div class="th200">2024-01-11 星期四</div><div class="th140">23℃</div><div class="th140">16℃</div><div class="th140">多云</div><div class="th140">南风 1级</div></li><li><div class="th200">2024-01-12 星期五</div><div class="th140">22℃</div><div class="th140">16℃</div><div class="th140">阴~多云</div><div class="th140">南风 1级</div></li><li><div class="th200">2024-01-13 星期六</div><div class="th140">25℃</div><div class="th140">16℃</div><div class="th140">阴~多云</div><div class="th140">东南风 2级</div></li><li><div class="th200">2024-01-14 星期日</div><div class="th140">26℃</div><div class="th140">16℃</div><div class="th140">阴~多云</div><div class="th140">东南风 1级</div></li><li><div class="th200">2024-01-15 星期一</div><div class="th140">27℃</div><div class="th140">15℃</div><div class="th140">晴~多云</div><div class="th140">东南风 2级</div></li><li><div class="th200">2024-01-16 星期二</div><div class="th140">24℃</div><div class="th140">16℃</div><div class="th140">多云</div><div class="th140">东南风 3级</div></li><li><div class="th200">2024-01-17 星期三</div><div class="th140">21℃</div><div class="th140">17℃</div><div class="th140">阴~多云</div><div class="th140">东南风 2级</div></li><li><div class="th200">2024-01-18 星期四</div><div class="th140">26℃</div><div class="th140">19℃</div><div class="th140">多云</div><div class="th140">东南风 1级</div></li><li><div class="th200">2024-01-19 星期五</div><div class="th140">27℃</div><div class="th140">19℃</div><div class="th140">晴~多云</div><div class="th140">东南风 2级</div></li><li><div class="th200">2024-01-20 星期六</div><div class="th140">26℃</div><div class="th140">13℃</div><div class="th140">多云</div><div class="th140">北风 2级</div></li><li><div class="th200">2024-01-21 星期日</div><div class="th140">21℃</div><div class="th140">12℃</div><div class="th140">阴~多云</div><div class="th140">东北风 2级</div></li><li><div class="th200">2024-01-22 星期一</div><div class="th140">15℃</div><div class="th140">5℃</div><div class="th140">阴~小雨</div><div class="th140">东北风 4级</div></li><li><div class="th200">2024-01-23 星期二</div><div class="th140">8℃</div><div class="th140">4℃</div><div class="th140">小雨~多云</div><div class="th140">东北风 3级</div></li><li><div class="th200">2024-01-24 星期三</div><div class="th140">10℃</div><div class="th140">6℃</div><div class="th140">晴~多云</div><div class="th140">东北风 3级</div></li><li><div class="th200">2024-01-25 星期四</div><div class="th140">15℃</div><div class="th140">9℃</div><div class="th140">晴~多云</div><div class="th140">东北风 2级</div></li><li><div class="th200">2024-01-26 星期五</div><div class="th140">17℃</div><div class="th140">12℃</div><div class="th140">多云~阴</div><div class="th140">北风 2级</div></li><li><div class="th200">2024-01-27 星期六</div><div class="th140">17℃</div><div class="th140">10℃</div><div class="th140">阴</div><div class="th140">北风 2级</div></li><li><div class="th200">2024-01-28 星期日</div><div class="th140">14℃</div><div class="th140">12℃</div><div class="th140">阴</div><div class="th140">东北风 2级</div></li><li><div class="th200">2024-01-29 星期一</div><div class="th140">18℃</div><div class="th140">16℃</div><div class="th140">阴~小雨</div><div class="th140">东北风 1级</div></li><li><div class="th200">2024-01-30 星期二</div><div class="th140">22℃</div><div class="th140">18℃</div><div class="th140">阴</div><div class="th140">东南风 1级</div></li><li><div class="th200">2024-01-31 星期三</div><div class="th140">24℃</div><div class="th140">19℃</div><div class="th140">阴~多云</div><div class="th140">东南风 2级</div></li></ul>
12-17
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值