绘制sklearn分类结果图

本文介绍了一种使用Matplotlib将scikit-learn的分类报告转化为可视化的图表的方法,通过代码示例展示了如何将分类报告的精度、召回率、F1分数和支持值以热力图形式呈现。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

想解决这样一个问题:

怎样使用matplotlib画出scikit-learn的分类报告,也就是 classification_report的输出。例如下面这个report:

print('\n*Classification Report:\n', classification_report(y_test, predictions),
    confusion_matrix_graph = confusion_matrix(y_test, predictions))

生成总结报告:

Clasification Report:
             precision    recall  f1-score   support

          1       0.62      1.00      0.76        66
          2       0.93      0.93      0.93        40
          3       0.59      0.97      0.73        67
          4       0.47      0.92      0.62       272
          5       1.00      0.16      0.28       413

avg / total       0.77      0.57      0.49       858

下面代码是具体实现及结果图:

import matplotlib.pyplot as plt
import numpy as np

def show_values(pc, fmt="%.2f", **kw):
    '''
    Heatmap with text in each cell with matplotlib's pyplot
    Source: https://stackoverflow.com/a/25074150/395857 
    By HYRY
    '''
    from itertools import izip
    pc.update_scalarmappable()
    ax = pc.get_axes()
    for p, color, value in izip(pc.get_paths(), pc.get_facecolors(), pc.get_array()):
        x, y = p.vertices[:-2, :].mean(0)
        if np.all(color[:3] > 0.5):
            color = (0.0, 0.0, 0.0)
        else:
            color = (1.0, 1.0, 1.0)
        ax.text(x, y, fmt % value, ha="center", va="center", color=color, **kw)


def cm2inch(*tupl):
    '''
    Specify figure size in centimeter in matplotlib
    Source: https://stackoverflow.com/a/22787457/395857
    By gns-ank
    '''
    inch = 2.54
    if type(tupl[0]) == tuple:
        return tuple(i/inch for i in tupl[0])
    else:
        return tuple(i/inch for i in tupl)


def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=False, cmap='RdBu'):
    '''
    Inspired by:
    - https://stackoverflow.com/a/16124677/395857 
    - https://stackoverflow.com/a/25074150/395857
    '''

    # Plot it out
    fig, ax = plt.subplots()    
    #c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap='RdBu', vmin=0.0, vmax=1.0)
    c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap=cmap)

    # put the major ticks at the middle of each cell
    ax.set_yticks(np.arange(AUC.shape[0]) + 0.5, minor=False)
    ax.set_xticks(np.arange(AUC.shape[1]) + 0.5, minor=False)

    # set tick labels
    #ax.set_xticklabels(np.arange(1,AUC.shape[1]+1), minor=False)
    ax.set_xticklabels(xticklabels, minor=False)
    ax.set_yticklabels(yticklabels, minor=False)

    # set title and x/y labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)      

    # Remove last blank column
    plt.xlim( (0, AUC.shape[1]) )

    # Turn off all the ticks
    ax = plt.gca()    
    for t in ax.xaxis.get_major_ticks():
        t.tick1On = False
        t.tick2On = False
    for t in ax.yaxis.get_major_ticks():
        t.tick1On = False
        t.tick2On = False

    # Add color bar
    plt.colorbar(c)

    # Add text in each cell 
    show_values(c)

    # Proper orientation (origin at the top left instead of bottom left)
    if correct_orientation:
        ax.invert_yaxis()
        ax.xaxis.tick_top()       

    # resize 
    fig = plt.gcf()
    #fig.set_size_inches(cm2inch(40, 20))
    #fig.set_size_inches(cm2inch(40*4, 20*4))
    fig.set_size_inches(cm2inch(figure_width, figure_height))



def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'):
    '''
    Plot scikit-learn classification report.
    Extension based on https://stackoverflow.com/a/31689645/395857 
    '''
    lines = classification_report.split('\n')

    classes = []
    plotMat = []
    support = []
    class_names = []
    for line in lines[2 : (len(lines) - 2)]:
        t = line.strip().split()
        if len(t) < 2: continue
        classes.append(t[0])
        v = [float(x) for x in t[1: len(t) - 1]]
        support.append(int(t[-1]))
        class_names.append(t[0])
        print(v)
        plotMat.append(v)

    print('plotMat: {0}'.format(plotMat))
    print('support: {0}'.format(support))

    xlabel = 'Metrics'
    ylabel = 'Classes'
    xticklabels = ['Precision', 'Recall', 'F1-score']
    yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup  in enumerate(support)]
    figure_width = 25
    figure_height = len(class_names) + 7
    correct_orientation = False
    heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap)


def main():
    sampleClassificationReport = """             precision    recall  f1-score   support

          Acacia       0.62      1.00      0.76        66
          Blossom       0.93      0.93      0.93        40
          Camellia       0.59      0.97      0.73        67
          Daisy       0.47      0.92      0.62       272
          Echium       1.00      0.16      0.28       413

        avg / total       0.77      0.57      0.49       858"""


    plot_classification_report(sampleClassificationReport)
    plt.savefig('test_plot_classif_report.png', dpi=200, format='png', bbox_inches='tight')
    plt.close()

if __name__ == "__main__":
    main()
    #cProfile.run('main()') # if you want to do some profiling

输出结果图:

 

当类别变得特别多时,例如40个类别:

enter image description here

 

好的,我来回答你的问题。手写数字识别是一个常见的机器学习问题,KNN算法是其中一种经典的算法。下面是一个简单的Python代码实现KNN算法进行手写数字识别,不使用sklearn库: ```python import numpy as np import operator def classify(input_data, data_set, labels, k): # 计算距离 data_size = data_set.shape[0] diff_mat = np.tile(input_data, (data_size, 1)) - data_set sq_diff_mat = diff_mat ** 2 sq_distances = sq_diff_mat.sum(axis=1) distances = sq_distances ** 0.5 # 选择距离最小的k个点 sorted_dist_indices = distances.argsort() class_count = {} for i in range(k): vote_label = labels[sorted_dist_indices[i]] class_count[vote_label] = class_count.get(vote_label, 0) + 1 # 返回最终分类结果 sorted_class_count = sorted(class_count.items(), key=operator.itemgetter(1), reverse=True) return sorted_class_count[0][0] def load_data(file_path): data_set = [] labels = [] with open(file_path) as f: for line in f.readlines(): line = line.strip() if not line: continue data = list(map(int, line.split(','))) data_set.append(data[1:]) labels.append(data[0]) return np.array(data_set), np.array(labels) if __name__ == '__main__': train_data, train_labels = load_data('train.csv') test_data, test_labels = load_data('test.csv') num_test = test_data.shape[0] error_count = 0 for i in range(num_test): predict_label = classify(test_data[i], train_data, train_labels, k=3) if predict_label != test_labels[i]: error_count += 1 print('错误率为:%f' % (error_count / float(num_test))) ``` 以上代码主要包含了三个函数:classify()、load_data()和main()。其中,classify()函数是KNN算法的核心实现,用于对一个测试样本进行分类;load_data()函数用于加载数据集;main()函数用于测试KNN算法,并输出错误率。 在上述代码中,我们首先使用load_data()函数加载数据集,这里我们使用的是MNIST手写数字数据集。然后,对于测试集中的每一个样本,我们将其传入classify()函数进行分类,并将分类结果与真实标签进行比较,以计算错误率。 需要说明的是,由于本代码实现简单,所以对于大规模的数据集,其运行速度可能会比较慢,因此在实际使用中需要进行优化。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值