# coding='utf-8'
from sklearn.cluster import Birch
from time import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.manifold import TSNE
# 用来正常显示中文标签
plt.rcParams['font.sans-serif'] = ['SimHei']
# 用来正常显示负号
plt.rcParams['axes.unicode_minus'] = False
def get_tsne_data(dataname=u"../datasets/one_hot_kdd_tsne.csv"):
data_tsen = pd.read_csv(dataname, encoding="utf-8", header=None, nrows=10000)
return data_tsen
def get_data(dataname=u"../datasets/one_hot_kdd.csv"):
data_label = pd.read_csv(dataname, encoding="utf-8", header=None, nrows=10000)
dataset, label = data_label, data_label.pop(38)
return dataset, label
def birch(data, tsnedata):
X = data
birch = Birch(n_clusters=4, threshold=0.6)
##训练数据
labels = birc