测试API`http://192.168.100.75XXXXXXXXXXXXXXXXXXXXX
解析json:
{'result': True,
'MetaList': [{
'ID': '-1769765650_2108236046',
'Data':
{'Extra': {'来源链接': '', '答案': '您好,九龙湖校区馆长室在李文正图书馆A506,电话025-52090321。\r\n四牌楼校区馆长室在四牌楼图书馆205,电话025-83793481。', '来源': '', '完整答案': '', '问题': '馆长室在哪?'},
'Domain': '图书馆_业务数据',
'Question': '馆长室在哪?',
'Answer': '您好,九龙湖校区馆长室在李文正图书馆A506,电话025-52090321。\r\n四牌楼校区馆长室在四牌楼图书馆205,电话025-83793481。'},
'DataType': 0
}]
}
# -*- coding: utf-8 -*-
"""
Created on Mon Apr 15 15:00:50 2019
@author: xiaocui
"""
import json
import pandas as pd
import urllib
import urllib.request as urllib2
import random
import socket
socket.setdefaulttimeout(30)
user_agents = ['Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20130406 Firefox/23.0', \
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:18.0) Gecko/20100101 Firefox/18.0', \
'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533+ \
(KHTML, like Gecko) Element Browser 5.0', \
'IBM WebExplorer /v0.94', 'Galaxy/1.0 [en] (Mac OS X 10.5.6; U; en)', \
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', \
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14', \
'Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) \
Version/6.0 Mobile/10A5355d Safari/8536.25', \
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/28.0.1468.0 Safari/537.36', \
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; TheWorld)']
def ExtractInfo(question):
link = 'http://XXXXXXXXXX/dn.qa.api/GetAnswer?appid=lib_neu&aid=ee831eebc9f01c3f18d6c2198ff879b2&q={0}'.format(urllib.parse.quote(question))
urlDomain = urllib2.Request(link)
r = random.randint(0,len(user_agents)-1)
urlDomain.add_header('User-agent', user_agents[r])
urlDomain.add_header('connection','keep-alive')
#解析出来的ID
ID = []
#解析出来的领域
domain = []
#解析出来的匹配问题
matchQustion = []
#解析出来的答案
answer = []
is_match = []
try:
req = urllib2.urlopen(urlDomain)
html = req.read()
js = json.loads(html.decode('utf-8'))
print(js)
# if(js['result'] == True):
# for item in js['MetaList']:
# tmp = item['DataType']
# if(tmp == 1):
# child = item['MetaList'][0]
# ID.append(child['ID'])
# domain.append(child['DOMAIN'])
# matchQustion.append(child['QUESTION'])
# answer.append(child['Answer'].replace("\n",'')[:30])
# # 能匹配
# elif(tmp == 0):
# d_tmp = item['Domain']
# for child in item['MetaList']:
# domain.append(d_tmp)
# matchQustion.append(child[d_tmp+'名'])
# ID.append(child[d_tmp+'ID'])
# answer.append("")
if (js['result'] == True):
for list in js['MetaList']:
#ID = list['DataType']
# if (len(list['ID']) == 0):
# is_match.append('不能匹配')
# ID.append(list['ID'])
# else:
# is_match.append('可以匹配')
ID.append(list['ID'])
domain.append(list['Data']['Domain'])
matchQustion.append(list['Data']['Question'])
answer.append(list['Data']['Answer'])
else:
ID
domain.append('无答案')
matchQustion.append('无答案')
answer.append('无答案')
return ID,domain,matchQustion,answer
except urllib.error.HTTPError:
return [],[],[],[]
file_path = r"E:\工作\东南大学图书馆\0527测试不通过问题.txt"
questions = open("{0}".format(file_path),encoding="gb18030").read().strip().split('\n')
######################################################################################
match_ids = []
match_domain = []
match_questions = []
match_answers = []
match_dataType = []
# 原问题
valid_q = []
# 是否匹配
is_match = []
for q in questions:
print(q)
ID,domain,matchQustion,answer = ExtractInfo(q)
if(len(ID) == 0):
is_match.append('无答案')
valid_q.append(q)
match_ids.append('')
match_domain.append('')
match_questions.append('')
match_answers.append('')
continue
else:
is_match.append('可以匹配')
valid_q.append(q)
match_ids.append(ID[0])
match_domain.append(domain[0])
match_questions.append(matchQustion[0])
match_answers.append(answer[0])
for idx,d,q,a in zip(ID[1:],domain[1:],matchQustion[1:],answer[1:]):
valid_q.append('')
is_match.append('Y')
match_ids.append(idx)
match_domain.append(d)
match_questions.append(q)
match_answers.append(a)
######################################################################################
df = pd.DataFrame()
df['原问题'] = valid_q
df['是否匹配'] = is_match
df['匹配的ID'] = match_ids
df['匹配的问题'] = match_questions
df['匹配的领域'] = match_domain
df['匹配的答案'] = match_answers
df.to_excel('test_result_0418-1.xlsx')