K-MEANS聚类分析银行数据分析记录
调用的包
import seaborn as sns
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestRegressor
#读取文件
df = pd.read_csv('data.xlsx')
- 数据预处理
①数据缺失
a = df[df['年龄']>0]
b = a['年龄']
for i in range(len(df)):
if df['年龄'][i]<=0:
df['年龄'][i] = b.mean()
②异常值处理
df['年龄'] = df['年龄'].fillna(b.mean())
df['职业'] = df['职业'].</