百度迁徙爬虫
一、原由
学校表白墙有偿爬取百度迁徙数据,就拿下了。
根据情况生成三个excel文件爬取每天的数据信息
二、部分代码
def JsonTextConvert(text):
text = text.encode('utf-8').decode('unicode_escape')
head, sep, tail = text.partition('(')
tail=tail.replace(")","")
return tail
def UrlFormate(rankMethod, dt, name, migrationType, date):
list_date = list(date)
list_date.insert(4, '-')
list_date.insert(7, '-')
formatDate = ''.join(list_date)
formatDate = formatDate + " 00:00:00"
timeArray = time.strptime(formatDate, "%Y-%m-%d %H:%M:%S")
timeUnix = time.mktime(timeArray)
ID = code[name]
if migrationType == 'in' or migrationType == 'out' or rankMethod == 'historycurve':
url = 'http://huiyan.baidu.com/migration/{0}.jsonp