# -*- coding: UTF-8 -*-
import urllib, urllib2, cookielib, re, time, os
import requests
print 'zhihu_QA'
print 'Please input your email:'
MyEmail = raw_input()
print 'Please input your password:'
MyPassWord = raw_input()
print '---------------------------------------------'
#基本信息
Url = 'http://www.zhihu.com/login'
#User_Agent每个电脑都不一样
User_Agent = **********************************************'
#MyReferer = 'http://www.zhihu.com/'
MyValues = {'email' : MyEmail, 'password' : MyPassWord}
MyHeaders = {'User-Agent' : User_Agent, }
MyRequests = requests.session()
MyCont = MyRequests.post(Url, data = MyValues, headers = MyHeaders)
MyCont2 = MyCont.text.encode('UTF-8')
print MyCont2
def GetNowTime():
return time.strftime("%Y-%m-%d_%H-%M-%S",time.localtime(time.time()))
WordPathName = r'D:\\MyZhiHu' + GetNowTime() + '.docx'
String1 = 'h2'
String2 = 'div'
from bs4 import BeautifulSoup
MySoup1 = BeautifulSoup(MyCont2)
def has_need(tag):
return tag.has_attr('class') and tag.has_attr('feed-item-a') and tag.has_attr('data-type')
def has_need_photo(tag):
return tag.has_attr('class') and tag.has_attr('src')
QuestionNum = 1
from docx import Document
from docx.shared import Inches
MyPhotoPath =
Python网络爬虫对知乎首页进行爬取
最新推荐文章于 2024-04-29 10:03:11 发布