今天,输出了cross validation 在每个类别里面的 模型评估值。
主要有下面2步:
#这是将输出变成pandas矩阵的函数
from sklearn.metrics import classification_report
from collections import defaultdict
def report2dict(cr):
# Parse rows
tmp = list()
for row in cr.split("\n"):
parsed_row = [x for x in row.split(" ") if len(x) > 0]
if len(parsed_row) > 0:
tmp.append(parsed_row)
# Store in dictionary
measures = tmp[0]
D_class_data = defaultdict(dict)
for row in tmp[1:]:
class_label = row[0]
for j, m in enumerate(measures):
D_class_data[class_label][m.strip()] = float(row[j + 1].strip())
return D_class_data
#输出了cross validation 在每个类别里面的 模型评估值。
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import classification_report,accuracy_score,confusion_matrix
from sklearn.model_selection import cross_val_score
X = text
y = lab
skf = StratifiedKFold(n_splits=10)
print(skf)
skf.get_n_splits(X, y)
accuracy=[]
precision = []
recall = []
fScore = []
pd_=pd.DataFrame()
StratifiedKFold(n_splits=10, random_state=None, shuffle=False)
for train_index, test_index in skf.split(X, y):
# print("TRAIN:", train_index, "TEST:", test_index)
X_train, X_test = np.array(X)[train_index], np.array(X)[test_index]
y_train, y_test = np.array(y)[train_index], np.array(y)[test_index]
# print(len(X_train),len(X_test))
# print(len(y_train),len(y_test))
# a_,p,r,f = accuracy1(pipe,X_train, X_test,y_train, y_test)
predictions=pipe.predict(X_test)
report2dict(classification_report(y_test, predictions, target_names=pipe.classes_))
pd1_=pd.DataFrame(report2dict(classification_report(y_test, predictions, target_names=pipe.classes_))).T
df_add = pd_.add(pd1_, fill_value=0)
pd_=df_add
print(df_add/10)