1.获取需求
from day1tuozhan_all import get
1.1 这个'day1tuozhan_all'
from urllib import request, parse
from urllib.error import HTTPError, URLError
def get(url, headers=None):
return urlrequests(url, headers=headers)
def post(url, form, headers=None):
return urlrequests(url, form, headers=headers)
def urlrequests(url, form=None, headers=None):
user_agent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
#如果用户需要自行传入headers,则覆盖之前的headers
if headers == None:
headers = {
'User-Agent':user_agent
}
html_bytes = b''
try:
if form:
#post
#转换成str
form_str = parse.urlencode(form)
#转换成bytes
form_bytes = form_str.encode('utf-8')
req = request.Request(url, data=form_bytes, headers=headers)
else:
#get
req = request.Request(url, headers=headers)
response = request.urlopen(req)
html_bytes = response.read()
except HTTPError as e:
print(e)
except URLError as e:
print(e)
return html_bytes
if __name__ == '__main__':
url = 'http://www.baidu.com'
html_byte = get(url)
print(html_byte)
2.根据需求,寻找网站
url = 'http://www.renren.com/967454542'
3.设置请求头信息
headers = {
'Cookie':'anonymid=jkt65len42djig; depovince=JS; _r01_=1; ick_login=df4c932e-6c35-4052-8e25-1a4b44486bd1; XNESSESSIONID=7efa10c69935; ick=7599d90a-bd99-4395-8f02-3244c5b3a361; __utma=151146938.1136706232.1534236405.1534236405.1534236405.1; __utmc=151146938; __utmz=151146938.1534236405.1.1.utmcsr=renren.com|utmccn=(referral)|utmcmd=referral|utmcct=/SysHome.do; first_login_flag=1; ln_uact=14747475006; ln_hurl=http://head.xiaonei.com/photos/0/0/men_main.gif; wp_fold=0; jebecookies=2fe119fa-c8b3-4564-afa1-bcc03a50d3e1|||||; jebe_key=bff9faea-3f67-45f0-bbb7-88e87eb50dc3%7Ccff1de20e10bde56e4cce05effa8e048%7C1534265050003%7C1%7C1534266569662; _de=FEF18D3E9BEC2817C8DC921217ED3186; p=81b5bee8193e2a3fe43719851ef384ea2; t=99acc304ef4599da60442ae1bb19ced22; societyguester=99acc304ef4599da60442ae1bb19ced22; id=967454542; xnsid=c695f93b; ver=7.0; loginfrom=null'
}
4.获取及定位数据
html_bytes = get(url,headers=headers)
5.存储
with open('renren.html','wb') as f:
f.write(html_bytes)
6.返回到'renren.html'的内容
<!Doctype html>
<html class="nx-main760">
<head>
<meta name="Description" content="" />
<meta name="Keywords" content="" />
<title>人人网 - 斯钦布赫</title>
<meta charset="utf-8"/>
<link rel="shortcut icon" type="image/x-icon" href="http://a.xnimg.cn/favicon-rr.ico?ver=3" />
<link rel="apple-touch-icon" href="http://a.xnimg.cn/wap/apple_icon_.png" />
<link rel="stylesheet" type="text/css" href="http://s.xnimg.cn/a86614/nx/core/base.css">
<script type="text/javascript">
if(typeof nx === 'undefined'){
var nx = {};
}
nx.log = {
startTime : + new Date()
};
nx.user = {
id : "967454542",
ruid:"967454542",
tinyPic : "http://head.xiaonei.com/photos/0/0/men_tiny.gif ",
name : "斯钦布赫",
privacy: "99",
requestToken : '-967158768',
_rtk : '921b526a'
};nx.user.isvip = false;nx.user.hidead = false;nx.webpager = nx.webpager || {};
nx.production = true;
</script>
<script type="text/javascript" src="http://s.xnimg.cn/a83151/nx/core/libs.js"></script>
<script type="text/javascript">
define.config({map:{
"backbone":"http://s.xnimg.cn/a75208/nx/core/backbone.js",
"ui/draggable":"http://s.xnimg.cn/a70750/nx/core/ui/draggable.js",
"ui/menu":"http://s.xnimg.cn/a70736/nx/core/ui/menu.js",
"ui/resizable":"http://s.xnimg.cn/a70738/nx/core/ui/resizable.js",
"ui/sortable":"http://s.xnimg.cn/a70749/nx/core/ui/sortable.js",
"ui/tabs":"http://s.xnimg.cn/a78333/nx/core/ui/tabs.js",
"ui/ceiling":"http://s.xnimg.cn/a76297/nx/core/ui/ceiling.js",
"ui/columns":"http://s.xnimg.cn/a68070/nx/core/ui/columns.js",
"ui/dialog":"http://s.xnimg.cn/a76395/nx/core/ui/dialog.js",
"ui/fileupload":"http://s.xnimg.cn/a81310/nx/core/ui/fileupload.js",
"ui/pagination":"http://s.xnimg.cn/a70307/nx/core/ui/pagination.js",
"ui/placeholder":"http://s.xnimg.cn/a79685/nx/core/ui/placeholder.js",
"ui/progressbar":"http://s.xnimg.cn/a62964/nx/core/ui/progressbar.js",
"ui/rows":"http://s.xnimg.cn/a62964/nx/core/ui/rows.js",
"ui/scroll":"http://s.xnimg.cn/a61518/nx/core/ui/scroll.js",
"ui/scrollbar":"http://s.xnimg.cn/a76868/nx/core/ui/scrollbar.js",
"ui/select":"http://s.xnimg.cn/a82096/nx/core/ui/select.js",
"ui/slideshow":"http://s.xnimg.cn/a72804/nx/core/ui/slideshow.js",
"ui/speech":"http://s.xnimg.cn/a77631/nx/core/ui/speech.js",
"ui/textbox":"http://s.xnimg.cn/a79526/nx/core/ui/textbox.js",
"ui/renren/textbox":"http://s.xnimg.cn/a92727/nx/core/ui/renren/textbox.js",
"ui/tooltip":"http://s.xnimg.cn/a73228/nx/core/ui/tooltip.js",
"ui/renren/addfriend":"http://s.xnimg.cn/a78457/nx/core/ui/renren/addFriendLayer.js",
"ui/renren/at":"http://s.xnimg.cn/a78409/nx/core/ui/renren/atAndEmotion.js",
"ui/renren/emotion":"http://s.xnimg.cn/a78409/nx/core/ui/renren/atAndEmotion.js",
"ui/renren/commentCenter":"http://s.xnimg.cn/a83569/nx/core/ui/renren/commentCenter.js",
"ui/renren/friendgroup":"http://s.xnimg.cn/a62964/nx/core/ui/renren/friendGroup.js",
"ui/renren/friendListSelector":"http://s.xnimg.cn/a78513/nx/core/ui/renren/friendListSelector.js",
"ui/renren/like":"http://s.xnimg.cn/a83569/nx/core/ui/renren/like.js",
"nx/namecard":"http://s.xnimg.cn/a77668/nx/core/ui/renren/namecard.js",
"ui/renren/pagelayer":"http://s.xnimg.cn/a62964/nx/core/ui/renren/pageLayer.js",
"ui/renren/photoupload":"http://s.xnimg.cn/a82833/nx/core/ui/renren/photoupload.js",
"ui/renren/privacy":"http://s.xnimg.cn/a76680/nx/core/ui/renren/privacy.js",
"ui/renren/share":"http://s.xnimg.cn/a78999/nx/core/ui/renren/share.js",
"ui/renren/vocal":"http://s.xnimg.cn/a77347/nx/core/ui/renren/vocal.js",
"ui/renren/mvideo":"http://s.xnimg.cn/a80641/nx/core/ui/renren/mvideo.js",
"ui/renren/with":"http://s.xnimg.cn/a82994/nx/core/ui/renren/with.js",
"ui/clipboard":"http://s.xnimg.cn/a63417/nx/core/ui/clipboard.js",
"app/publisher":"http://s.xnimg.cn/a91505/nx/core/app/publisher.js",
"viewer":"http://s.xnimg.cn/a83025/nx/photo/viewer/js/viewer.js",
"media/player": "http://s.xnimg.cn/nx/photo/viewer/js/mediaplayer.js",
"ui/renren/like/commentseed":"http://s.xnimg.cn/a64636/nx/core/ui/renren/like.seed.comment.js",
"ui/renren/like/seed":"http://s.xnimg.cn/a62964/nx/core/ui/renren/like.seed.js",
"ui/renren/share/seed":"http://s.xnimg.cn/a62964/nx/core/ui/renren/share.seed.js",
"ui/renren/follow":"http://s.xnimg.cn/a78456/nx/core/ui/renren/follow.js",
"ui/renren/relationFollow":"http://s.xnimg.cn/a78457/nx/core/ui/renren/relationFollow.js",
"ui/autocomplete":"http://s.xnimg.cn/a70736/nx/core/ui/autocomplete.js",
"ui/showCommonFriend":"http://s.xnimg.cn/a78917/nx/core/ui/renren/showcommonfriend.js",
"photo/circler":"http://s.xnimg.cn/a73344/nx/photo/phototerminal/js/circler.js",
"ui/friendSearch":"http://s.xnimg.cn/a64338/nx/core/ui/renren/friendSearch.js",
"ui/renren/replyOption":"http://s.xnimg.cn/a68256/nx/core/ui/renren/replyOption.js",
"photo/avatarUpload": "http://s.xnimg.cn/a77340/nx/photo/upload-avata/js/avatarUpload.js",
"ui/renren/school":"http://s.xnimg.cn/a85689/nx/core/ui/renren/school.js"
}});
nx.data.isDoubleFeed = Boolean(false);
nx.data.isDoubleFeedGuide = Boolean(false);
</script>
<script type="text/javascript" src="http://s.xnimg.cn/a88603/nx/core/base.js"></script>
<!--[if lt IE 9]>
<script type="text/javascript">
document.execCommand("BackgroundImageCache", false, true);
</script>
<![endif]-->
<link rel="stylesheet" type="text/css" href="http://s.xnimg.cn/a90333/nx/home/css/home.css"/>
<link rel="stylesheet" href="http://s.xnimg.cn/a93851/wap/mobile/2018activity/advice/advice.css">
<!-- <link rel="stylesheet" href="http://s.xnimg.cn/a93818/wap/mobile/2018activity/newr/xcx/swiper-4.3.3.min.css"> -->
<style>
.swiper-container {
margin: 0 auto 15px;
}
</style>
<script type="text/javascript">
nx.user.pageid = 'home';
var hiddenAd = isV7guide = true;
if (isV7guide) nx.webpager.fold = true;
//左侧应用中心广告弹窗
var show_activity_flag_49999999 = '' === 'true';
//右下角广告弹窗
var show_activity_flag_49999998 = '' === 'true';
nx.load('http://s.xnimg.cn/a91508/nx/home/js/home-all.js');
require.config({
paths: {
"viparea/privilege": "http://s.xnimg.cn/a78332/nx/home/app/viparea/addons.js",
"viparea/decorate/deco": "http://s.xnimg.cn/a78332/nx/home/app/viparea/addons.js",
"viparea/gift/send": "http://s.xnimg.cn/a78332/nx/home/app/viparea/addons.js",
"vip/plugin": "http://s.xnimg.cn/a78332/nx/home/app/viparea/addons.js"
}
});
(function() {
var temp = "[{}]";
try {
temp = '' || "[{}]";
}
catch (e) {
console.log("新鲜事的中某些商业活动ICON配置有问题!!");
}
if (typeof (nx.data.feed) == "undefined") {
nx.data.feed = {};
}
nx.data.feed.showact = true;
nx.data.feed.fedact = jQuery.parseJSON(temp);
})();
</script>
<link rel="stylesheet" type="text/css" href="http://s.xnimg.cn/a75676/nx/apps/guide/css/register-guide-v7.css">
<script>
nx.load('http://s.xnimg.cn/a78451/nx/apps/guide/js/register-guide-v7.js');
</script>
<link rel="stylesheet" type="text/css" href="http://s.xnimg.cn/a90191/nx/apps/guide/css/hotlive.css">
</head>
<body>
<div id="nxContainer" class="nx-container">
<div class="nx-main">
<div id="zidou_template" style="display:none"></div>
<div id="hometpl_style" data-notshow="" style="display:none">
<br />
<style></style>
</div>
<div id="nxHeader" class="hd-wraper ">
<div class="hd-fixed-wraper clearfix">
<div class="hd-main">
<h1 class="hd-logo">
<a href="http://www.renren.com/" title="人人网 renren.com - 人人网校内是一个真实的社交网络,联系朋友,一起玩游戏">人人网</a>
</h1>
<div class="hd-nav clearfix">
<div class="hd-search">
<input type="text" id="hd-search" class="hd-search-input" placeholder="搜索好友,公共主页,状态" />
<a href="javascript:;" class="hd-search-btn"></a>
</div>
<dl class="hd-account clearfix">
<dt>
<a href="http://www.renren.com/967454542/profile">
<img class="hd-avatar" width="30" height="30" src="http://head.xiaonei.com/photos/0/0/men_tiny.gif" alt="斯钦布赫" />
</a>
</dt>
<dd>
<a class="hd-name" href="http://www.renren.com/967454542/profile" title="斯钦布赫">斯钦布赫</a>
<a id="hdLoginDays" target="_blank" class="hd-logindays" href="http://sc.renren.com/scores/mycalendar" data-tips="{'days':'2','range':'1','score':'5','vipLevel':'1'}">2天</a>
</dd>
</dl>
<div class="hd-account-action">
<a href="#" class="account-more"><span class="ui-icon ui-icon-setting ui-iconfont"></span></a>
<div class="nx-drop-box nx-simpl