import pandas as pd
import numpy as np
# 类别数据转化
# 加载数据
detail = pd.read_excel('meal_order_detail.xlsx')
# print(detail.columns)
# 进行哑变量数据转换 --> 稀疏矩阵
# data = pd.get_dummies(detail['dishes_name'], prefix='菜品', prefix_sep='_')
# print(data)
# 连续型数据转化
# 对detail 里面的amounts进行离散化
# print(detail['amounts'].max())
# print(detail['amounts'].min())
# 等宽法
# bins = [1, 40, 80, 160,200]
# 数据类别转化--》转化为离散化
# res = pd.cut(detail['amounts'], bins=bins)
# print(res)
# num = pd.value_counts(res)
# print(num)
# data = pd.get_dummies(res)
# print(data)
# 等频法
# amounts 的分位数的求解
# quatile()四分位数函数
bins = detail['amounts'].quantile(np.arange(0, 1+0.25, 0.25))
# print(res)
# 以四分位数进行分组
data = pd.cut(detail['amounts'], bins=bins)
# 统计每组数量
# print(data)
num = pd.value_counts(data)
# print(num)
# 转化为哑变量矩阵,即稀疏矩阵
res = pd.get_dummies(data)
# print(res)
哑变量数据转换,稀疏矩阵
最新推荐文章于 2023-06-27 00:00:00 发布