起初,想到用shell脚本进行编写,获取网络内容,然后json数据由 JSON.sh解析,到这一步就卡住了(数据格式不对等)。考虑到机器上有python,就转投python(毕竟写起来有经验,快速)。麻烦在于:
python使用不了pip(奈何神器不能用);
python本地装mysqldb过程,遇到了几次要装xxx模块等等(结果是无疾而终,定制的linux,装rpm都不行)(linux要是能够将装东西的步骤一键优化掉,绝对是最畅销的)。
纯shell有阻碍,纯python缺胳膊少腿。那就杂乱上了,没有mysqldb,这不还有python调系统接口么。于是乎,不伦不类的实现过程就出现了。
import os, sys
import urllib
import urllib2
import json
import time
#testdb
g_mysqlDB = "mysql -h10.204.28.101 -P3470 -umtt -pmtt@sn -Dxxxx -e "
#realdb
#g_mysqlDB = "mysql -h10.147.23.120 -P3302 -umtt -pmtt@sn -Dxxxx -e "
g_isoTimeFormat = '%Y-%m-%d %X'
g_url = "http://xxxx.com.cn"
#1 拉取网络数据
def getSougouDomain():
opener = urllib2.build_opener()
file = opener.open(g_url)
print g_url
content = file.read()
matchListIndex = content.index("matchList")
content = content[matchListIndex : ]
matchListBeginIndex = content.index("{")
content = content[matchListBeginIndex : ]
matchListEndIndex = content.index("}")
content = content[ : matchListEndIndex + 1]
#parse to JSON
contentJSON = json.loads(content)
print type(contentJSON)
domainSougouList = list()
for k,v in contentJSON.iteritems():
for elem in v:
elem = elem.replace("http://", "")
if elem[-1] == '/':
elem = elem[:-1]
domainSougouList.append(elem)
print len(domainSougouList)
return dict({"domainmap" : domainSougouList})
#2 获取当前数据库数据
def getDBDataByShell():
mysqlShell = g_mysqlDB + "'select domain,status from t_qua_domain where type=66' > domain.data";
print mysqlShell
shellResult = os.system(mysqlShell)
# readFile
fileHandler = open("domain.data", "r")
domainDBDict = {};
for line in fileHandler.readlines():
if (len(line.strip()) == 0):
continue
if line.find("domain") >= 0:
continue
domainList = line.strip().split('\t')
domainDBDict[domainList[0]] = domainList[1]
fileHandler.close()
print domainDBDict
print len(domainDBDict)
print "\n+++++++++++++++++++++++++++++++++++++++++++\n"
return domainDBDict
#3 网络数据和数据库数据对比,得出增、删、改、查
def dealEletronyDomain(domainFromWeb, domainFromDB):
domainAddList = [];
domainDelList = [];
domainChange = [];
for domainWeb in domainFromWeb["domainmap"]:
if domainFromDB.has_key(domainWeb) :
if domainFromDB[domainWeb] == "0":
domainChange.append(domainWeb) # change valid
del domainFromDB[domainWeb]
else:
domainAddList.append(domainWeb) # insert
for k,v in domainFromDB.iteritems():
if v == "1":
domainDelList.append(k)
print "++++++++++++++++++domainAddList+++++++++++++++++++++++++"
print domainAddList
print len(domainAddList)
wiriteNewDomainToDB(domainAddList, "add")
print "++++++++++++++++++domainDelList+++++++++++++++++++++++++"
print domainDelList
print len(domainDelList)
wiriteNewDomainToDB(domainDelList, "del")
print "++++++++++++++++++domainChange+++++++++++++++++++++++++"
print domainChange
print len(domainChange)
wiriteNewDomainToDB(domainChange, "change")
print "\n+++++++++++++++++++++++++++++++++++++++++++\n"
#4 根据增删改查,数据入库
def wiriteNewDomainToDB(domainList, sqlType):
updateTime = time.strftime( g_isoTimeFormat, time.localtime() )
#1
if sqlType == "add":
print "\n++++++++++++++++++add domain++++++++++++++++++++++\n"
addSQL = "'insert into t_qua_domain(domain, qua_ids, status, post_time, type, sort_id) \
values(\"%s\", \"1960;1961\", 1, \"%s\", 66, 1)'"
for domain in domainList:
mysqlAddShell = g_mysqlDB + (addSQL % (domain, updateTime))
print mysqlAddShell
os.system(mysqlAddShell)
if sqlType == "del":
print "\n++++++++++++++++++del domain++++++++++++++++++++++\n"
updateSQL = "' update t_qua_domain set status=0, post_time=\"%s\" where type=66 and \
domain=\"%s\"'"
for domain in domainList:
mysqlDelShell = g_mysqlDB + (updateSQL % (updateTime, domain))
print mysqlDelShell
os.system(mysqlDelShell)
if sqlType == "change":
print "\n++++++++++++++++++change domain++++++++++++++++++++++\n"
changeSQL = "' update t_qua_domain set status=1, post_time=\"%s\" where type=66 and \
domain=\"%s\"'"
for domain in domainList:
mysqlChangeShell = g_mysqlDB + (changeSQL % (updateTime, domain))
print mysqlChangeShell
os.system(mysqlChangeShell)
#main
if __name__ == "__main__":
os.system("rm -f domain.data")
dealEletronyDomain(getSougouDomain(),getDBDataByShell())
上面,主要就是对数据库的操作,使用python执行shell语句的方式实现。