KNN

本文介绍了如何使用Python实现KD树及其在K近邻算法中的应用。首先,详细讲解了KD树的节点结构和构建过程,包括选择最佳分割特征、中位数划分等步骤。接着,展示了MaxHeap数据结构用于存储最近邻节点。最后,通过实例演示了KD树在K近邻算法中的搜索过程,并进行了训练和测试数据的分割、数据标准化以及预测准确性评估。

理论知识是参考的《统计学习方法》

代码基本上是参考别人写的,然后整理的。

先占个位置,周日写,别忘了
这是参考,基本是理解之后照抄的。。。。。。
这是数据集,将数据集与下面的代码放到同一目录下即可正常运行

注释周日写,别忘了

class Node(object):
    def __init__(self):
        self.father = None
        self.left = None
        self.right = None
        self.feature = None
        self.split = None

    def __str__(self):
        return "feature: %s, split: %s" % (str(self.feature), str(self.split))
    @property
    def brother(self):
        ret = Node()
        if not self.father:
            ret = None
        else:
            if self.father.left is self:
                ret = self.father.right
            else:
                ret = self.father.left
        return ret

# def testNode():
#     n1 = Node()
#     n1.split = [(0,1)]
#     n1.feature = 0
#
#     n2 = Node()
#     n2.split = [(1, 2)]
#     n2.feature = 1
#
#     n3 = Node()
#     n3.split = [(2, 3)]
#     n3.feature = 2
#
#     n1.left = n2
#     n2.father = n1
#     n1.right = n3
#     n3.father = n1
#     print(n2.brother)
# testNode()

class KDtree(object):
    def __init__(self):
        self.root = Node()

    def __str__(self):
        nd = self.root
        level = 0
        queue = [(nd, -1)]
        res = []
        while queue:
            nd, preLevel = queue.pop(0)
            res.append("%d->%d" % (preLevel, level) + str(nd))
            if nd.left is not None:
                queue.append((nd.left, level))
            if nd.right is not None:
                queue.append((nd.right, level))
            level += 1
        return "\n".join(res)

    def get_feature_s2(self, X, feature, idx):
        ex = 0
        ex2 = 0
        n = len(idx)
        for i in idx:
            ex += X[i][feature]
            ex2 += X[i][feature] ** 2
        return ex2 / n - (ex / n) ** 2

    def get_best_feature(self, X, idx):
        best_feature = 0
        maxS2 = 0
        for f in range(len(X[0])):
            curS2 = self.get_feature_s2(X, f, idx)
            if curS2 > maxS2:
                maxS2 = curS2
                best_feature = f
        return best_feature

    def get_median(self, X, best_feature, idx):
        midVal = sorted(X[i][best_feature] for i in idx)[len(idx) // 2]
        res = idx[0]
        for i in idx:
            if X[i][best_feature] == midVal:
                res = i
                break
        return res

    def split_by_median(self, X, best_feature, mid, idx):
        div = [[], []]
        for i in idx:
            if i == mid:
                continue
            if X[i][best_feature] < X[mid][best_feature]:
                div[0].append(i)
            else:
                div[1].append(i)
        return div

    def create_KDtree(self, X, y):
        queue = [(self.root,range(len(X)))]
        while queue:
            node,idx = queue.pop(0)
            n = len(idx)
            if n == 1:
                node.split = (X[idx[0]],y[idx[0]])
                continue
            best_feature = self.get_best_feature(X,idx)
            median = self.get_median(X,best_feature,idx)
            div = self.split_by_median(X,best_feature,median,idx)
            node.feature = best_feature
            node.split = (X[median],y[median])
            if div[0] != []:   #注意python中is和is not判断的是引用是否相等,==和!=判断的是值是否相等
                node.left = Node()
                # node.left.father = Node()
                node.left.father = node
                queue.append((node.left,div[0]))
            if div[1] != []:
                node.right = Node()
                # node.right.father = Node()
                node.right.father = node
                queue.append((node.right,div[1]))
    def get_eu_dist(self,Xi,node):
        dist2 = 0
        for i in range(len(Xi)):
            dist2 += (node.split[0][i] - Xi[i]) ** 2
        return dist2 ** 0.5
    def get_hyper_dist(self,Xi,node):
        return abs(Xi[node.feature] - node.split[0][node.feature])
    def search_leave(self,Xi,node_ori):
        node = node_ori
        while node.left or node.right:
            if not node.left:
                node = node.right
            elif not node.right:
                node = node.left
            else:
                if Xi[node.feature] < node.split[0][node.feature]:
                    node = node.left
                else:
                    node = node.right
        return node

# def testKDtree():
#     t1 = KDtree()
#     t1.create_KDtree([[6,2],[3,5],[8,1],[6,3],[4,9],[5,0],[1,2]],[1,2,3,4,5,6,7])
#     # print(t1.get_eu_dist([2,8],t1.root.left.left))
#     print(t1.root.left.left.brother())
# testKDtree()


class MaxHeap(object):
    def __init__(self,maxSize,fn):
        self.max_size = maxSize
        self._items = [None] * maxSize
        self.size = 0
        self.fn = fn
    def insert(self,item):
        if self.size == self.max_size:
            if self.fn(item) < self.fn(self._items[0]):
                self._items[0] = item
                self.shift_down(0)
        else:
            self.size += 1
            self._items[self.size - 1] = item
            self.shift_up(self.size - 1)
    def shift_down(self,parent):
        child = parent * 2 + 1
        while child < self.size:
            if child + 1 < self.size and self.fn(self._items[child]) < self.fn((self._items[child + 1])):
                child += 1
            if self.fn(self._items[child]) > self.fn((self._items[parent])):
                self._items[child],self._items[parent] = self._items[parent],self._items[child]
                parent = child
                child = parent * 2 + 1
            else:
                break
    def shift_up(self,child):
        parent = (child - 1) // 2
        while child > 0:
            if self.fn(self._items[child]) < self.fn(self._items[parent]):
                break
            else:
                self._items[child], self._items[parent] = self._items[parent],self._items[child]
                child = parent
                parent = (child - 1) // 2

# mp = MaxHeap(20,lambda x:x)
# mp.insert(1)
# mp.insert(3)
# mp.insert(4)
# mp.insert(2)
# for i in range(mp.size):
#     print(mp._items[i])

class KNeighbors(object):
    def __init__(self):
        self.kneighbors = 0
        self.tree = KDtree()
    def fit(self,X,y,kneighbors):
        self.kneighbors = kneighbors
        self.tree = KDtree()
        self.tree.create_KDtree(X,y)

    def knn_search(self,Xi):
        heap = MaxHeap(self.kneighbors,lambda x:x.dist)
        tree = self.tree
        leave = tree.search_leave(Xi,tree.root)
        queue = [(tree.root,leave)]
        while queue:
            nd_root,nd_cur = queue.pop(0)
            nd_root.dist = tree.get_eu_dist(Xi,nd_root)
            heap.insert(nd_root)
            while nd_cur is not nd_root:
                nd_cur.dist = tree.get_eu_dist(Xi,nd_cur)
                heap.insert(nd_cur)
                hyper_dist = tree.get_hyper_dist(Xi, nd_cur.father)
                if nd_cur.brother and (heap.size < heap.max_size or hyper_dist < heap.fn(heap._items[0])):
                    _nd = tree.search_leave(Xi,nd_cur.brother)
                    queue.append((nd_cur.brother,_nd))
                nd_cur = nd_cur.father
        return heap
    def predict(self,Xi):
        heap = self.knn_search(Xi)
        return sum(heap._items[i].split[1] for i in range((heap.size))) > len(Xi) // 2

from numpy.random import seed, choice
import numpy as np

def train_test_split(data, label=None, prob=0.7, random_state=None):
    if random_state is not None:
        seed(random_state)
    n_rows, _ = data.shape
    k = int(n_rows * prob)
    train_indexes = choice(range(n_rows), size=k, replace=False)
    test_indexes = np.array([i for i in range(n_rows) if i not in train_indexes])
    data_train = data[train_indexes]
    data_test = data[test_indexes]

    if label is not None:
        label_train = label[train_indexes]
        label_test = label[test_indexes]
        ret = (data_train, data_test, label_train, label_test)
    else:
        ret = (data_train, data_test)

    if random_state is not None:
        seed(None)
    return ret

def normalization_x(X):
    for f in range(len(X[0])):
        minVal = min(X[i][f] for i in range(len(X)))
        maxVal = max(X[i][f] for i in range(len(X)))
        for i in range(len(X)):
            X[i][f] = (X[i][f] - minVal) / (maxVal - minVal)
    return X

def main():
    data = np.loadtxt('breast_cancer.csv',delimiter=',')
    X = data[:,:-1]
    y = data[:,-1]
    X = normalization_x(X)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, random_state=10)
    knn = KNeighbors()
    knn.fit(X_train, y_train, 21)
    y_predict = []
    for i in range(len(y_test)):
        print(i)
        y_predict.append(knn.predict(X_test[i]))

    #        print(y_predict[i],y_test[i])

    acc = sum(y_predict[i] == y_test[i] for i in range(len(y_test))) / (len(y_test))
    return acc
acc = main()
print("accuracy: %.2f%%"%(acc * 100))


【复现】并_离网风光互补制氢合成氨系统容量-调度优化分析(Python代码实现)内容概要:本文围绕“并_离网风光互补制氢合成氨系统容量-调度优化分析”的主题,提供了基于Python代码实现的技术研究与复现方法。通过构建风能、太阳能互补的可再生能源系统模型,结合电解水制氢与合成氨工艺流程,对系统的容量配置与运行调度进行联合优化分析。利用优化算法求解系统在不同运行模式下的最优容量配比和调度策略,兼顾经济性、能效性和稳定性,适用于并网与离网两种场景。文中强调通过代码实践完成系统建模、约束设定、目标函数设计及求解过程,帮助读者掌握综合能源系统优化的核心方法。; 适合人群:具备一定Python编程基础和能源系统背景的研究生、科研人员及工程技术人员,尤其适合从事可再生能源、氢能、综合能源系统优化等相关领域的从业者;; 使用场景及目标:①用于教学与科研中对风光制氢合成氨系统的建模与优化训练;②支撑实际项目中对多能互补系统容量规划与调度策略的设计与验证;③帮助理解优化算法在能源系统中的应用逻辑与实现路径;; 阅读建议:建议读者结合文中提供的Python代码进行逐模块调试与运行,配合文档说明深入理解模型构建细节,重点关注目标函数设计、约束条件设置及求解器调用方式,同时可对比Matlab版本实现以拓宽工具应用视野。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值