main.py
import numpy as np
def three_sigma(ser1):
mean_value = ser1.mean()
std_value = ser1.std()
rule = (mean_value - 3*std_value > ser1) | (mean_value + 3*std_value < ser1)
index = np.arange(ser1.shape[0])[rule]
outrange = ser1.iloc[index]
return outrange
test.py
import pandas as pd
import matplotlib.pyplot as plt
import main
file = open("E:/某地区房屋销售数据.csv")
df = pd.read_csv(file)
print(df)
print(main.three_sigma(df["房屋价格"]))
data = {"菜谱": ["红烧肉", "铁板鱿鱼", "小炒肉", "干锅鸭掌", "酸菜鱼"],
"价格": [39, 30, 26, 388, 35]}
df = pd.DataFrame(data)
print(df)
plt.rcParams['font.sans-serif'] = ['Simsun']
df.plot.box(title="菜谱异常值", vert=False)
plt.grid(linestyle="--", alpha=0.3)
plt.show()