基于百度AI的微博内容情感分析
在使用代码之前,需要先去百度AI官网注册一个账号,获取秘钥。
from aip import AipNlp
import pandas as pd
import numpy as np
import time
import random
""" 你的 APPID AK SK """
#需要注册百度AI账号
APP_ID = ''
API_KEY = ''
SECRET_KEY = ''
client = AipNlp(APP_ID, API_KEY, SECRET_KEY)
def is_chinese(uchar):
"""判断一个unicode是否是汉字"""
if uchar >= u'\u4e00' and uchar <= u'\u9fa5':
return True
if uchar in ['“','”',',',':','。','(',')','!','?','【','】','《','》',';']:
return True
else:
return False
def format_str(content):
content_str = ''
for i in content:
if is_chinese(i):
content_str = content_str+i
return content_str
datass = pd.read_csv('result.csv')
# In[]
datass['text'] = datass['text'].fillna('0')#用0填补缺失值
datass['text'] = datass['text'].map(lambda x:str(x))#把所有类型都转化为字符串
# In[]
ls3 = []
ls4 = []
ls5 = []
ls6 = []
ls11 = []
ls12 = []
for i in range(len(datass)):
ls11.append(datass['mid'].iloc[i])
ls12.append(datass['text'].iloc[i])
#链接百度AI
try:
datas = client.sentimentClassify(datass['text'].iloc[i][:1000])
except Exception:
datas = client.sentimentClassify(str(format_str(datass['text'].iloc[i][:1000])))
try:
data = datas['items'][0]
except Exception:
if datas['error_code'] == 18:
print('速度太快了!!!')
time.sleep(3)
datas = client.sentimentClassify(str(format_str(datass['text'].iloc[i][:1000])))
else:
pass
try:
data = datas['items'][0]
except Exception:
ls3.append(np.nan)
ls4.append(np.nan)
ls5.append(np.nan)
ls6.append(np.nan)
else:
print(i,datas)
try:
df1 = data['positive_prob']
except Exception:
ls3.append(np.nan)
else:
ls3.append(df1)
try:
df2 = data['confidence']
except Exception:
ls4.append(np.nan)
else:
ls4.append(df2)
try:
df3 = data['negative_prob']
except Exception:
ls5.append(np.nan)
else:
ls5.append(df3)
try:
df4 = data['sentiment']
except Exception:
ls6.append(np.nan)
else:
ls6.append(df4)
time.sleep(random.uniform(1,2))
# In[]
datass['positive_prob'] = ls3
datass['confidence'] = ls4
datass['negative_prob'] = ls5
datass['sentiment'] = ls6
datass.to_csv('情感分析.csv',index = None)
# In[]
results = pd.DataFrame()
results['id'] = ls11
results['text'] = ls12
results['positive_prob'] = ls3
results['confidence'] = ls4
results['negative_prob'] = ls5
results['sentiment'] = ls6
#results.to_excel('E:/all.xlsx',index = None)