import requests
import htmllib
import re
import urllib2
import json
import leancloud
from lxml import etree
import sys
reload(sys)
sys.setdefaultencoding('utf8')
def savefile(string):
f = open("/Users/macbook/python/代码/电影名单.txt","a")
f.writelines(string)
f.close()
def getFilmWithPage(page):
URL = 'http://hehe/list3/%d.html' % page
html1 = requests.get(URL, 'GET')
pythonEtree = etree.HTML(html1.text)
pythonLink = pythonEtree.xpath('//div[@class="video_box"]/a')
for each in pythonLink:
searchObjOne = re.search(r'/vod/(.*?).html', each.xpath('@href')[0], re.M | re.I | re.S)
URLTwo = 'http://www.hehe.co/jwplayer/jwconfig.php?vkey=%s.html' % searchObjOne.group(1)
html2 = requests.get(URLTwo, 'GET')
pythonEtree2 = etree.HTML(html2.text)
pythonLink2 = pythonEtree2.xpath('//config/file/text()')
leanCloudSave(each.xpath('img/@title')[0], pythonLink2[0], each.xpath('img/@src')[0])
print page
def leanCloudSave(MovieName, MovieUrl, MovieImageUrl):
Todo = leancloud.Object.extend('MyselfMovie')
todo = Todo()
todo.set('MovieImageUrl', MovieImageUrl)
todo.set('MovieUrl', MovieUrl)
todo.set('MovieName', MovieName)
todo.save()
leancloud.init("eb5QvTbUUq06b0qvyLE", "iu2j1fbxBOJi")
for num in range(19, 30):
getFilmWithPage(num)