numpy：将数组中nan填充为均值

最新推荐文章于 2025-04-01 18:00:52 发布

y hat

最新推荐文章于 2025-04-01 18:00:52 发布

阅读量2.5k

点赞数 1

文章标签： numpy python

本文链接：https://blog.youkuaiyun.com/zhangqiqiyihao/article/details/109050804

版权

import numpy as np

#将数组中的所有nan替换为该列的平均值，（一般是替换列的因为一列都为一个属性）
def full_Ndarray(t1):
    # 遍历每一列
    for i in range(t1.shape[1]):  # t1.shape[1]返回的是列的长度，在这个数组中返回是3
        temp_col = t1[:, i]  # i=1时，就是将第一列构成的数组给了temp_col
        nan_num = np.count_nonzero(temp_col != temp_col)
        # count_nonzero统计不为0的数，一个数组不等于他本身只有数组中nan的值才是。遇到nan返回true，返回true的个数便是该数组中nan的个数
        # temp_col是数组当前列
        if nan_num != 0:  # 说明当前列有num
            # 选中当前列不为nan的元素组成的数组，求出他们的均值，将均值赋值给该列nan的元素
            temp_not_nan_col = temp_col[temp_col == temp_col]  # temp_col==temp_col返回一个数组，里面为true和false组成
            temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()  # 将mean赋值给当前值为nan的元素
        return t1

if __name__ == '__main__':
    t1=np.arange(12).reshape((3,4)).astype(float)
    t1[1, 1:] = np.nan
    print(t1)
    t1=full_Ndarray(t1)
    print(t1)
    t2=np.arange(24).reshape((4,6)).astype(float)
    t2[1,1:3]  = np.nan
    print(t2)
    t2=full_Ndarray(t2)
    print(t2)