recently

    Recently,my mind is not steady,i don't know why, i want to try analyzing it and adjust me as soon as possible,but i

can't,the feeling is very complicated and hard to controled.

    And the life seem  to more normal and tranquil , there are many things which wait for me .I must do it by mind, i need success too much. I have a dream ,I will succeed.

# 导入所需库(与文档一致) import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans from sklearn.tree import DecisionTreeClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score import warnings warnings.filterwarnings('ignore') # ============================== # 任务1:数据准备(按文档变量名读取数据) # ============================== try: order_data = pd.read_csv('meal_order_info.csv', encoding='gbk') history_order = pd.read_csv('info_new.csv', encoding='gbk') user_info = pd.read_csv('users.csv', encoding='gbk') # 确保此变量被正确加载 user_loss = pd.read_csv('user_loss.csv', encoding='gbk') print("数据内容:") print(data.to_csv(sep='\t', na_rep='nan')) # 以制表符分隔打印数据 except Exception as e: print(f"发生错误: {e}") # 捕获并打印其他可能的异常 # ============================== # 任务2:统计每日用餐人数与销售额(文档要求状态为1) # ============================== def analyze_daily_sales(data): """统计有效订单(状态=1)的每日用餐人数与销售额""" valid_data = data[data['order_status'] == 1].copy() # 提取日期(假设日期格式为'YYYY-MM-DD HH:MM',截取前10位) valid_data['use_start_time'] = valid_data['use_start_time'].str[:10] daily_stats = valid_data.groupby('use_start_time').agg( daily_diners=('number_consumers', 'sum'), daily_sales=('expenditure', 'sum') ).reset_index() # 绘制折线图(符合文档要求) plt.figure(figsize=(12, 6)) plt.plot(daily_stats['use_start_time'], daily_stats['daily_diners'], label='每日用餐人数', marker='o') plt.plot(daily_stats['use_start_time'], daily_stats['daily_sales'], label='每日销售额', marker='s') plt.title('餐饮企业每日经营趋势', fontsize=14) plt.xlabel('日期', fontsize=12) plt.ylabel('数值', fontsize=12) plt.xticks(rotation=45) plt.legend() plt.grid(True) plt.show() return daily_stats # 调用函数(此时order_data已定义) daily_trends = analyze_daily_sales(order_data) # ============================== # 任务3:数据预处理(构建RFM与流失特征) # ============================== # ------------------------- # 客户价值分析:构建RFM特征(文档表中R/F/M定义) # ------------------------- def build_rfm(order_data, user_info, rfm_end='2016-08-31'): merged = pd.merge(user_info, order_data, on='USER_ID', how='left') valid_orders = merged[merged['order_status'] == 1].copy() rfm = valid_orders.groupby('USER_ID').agg({ 'use_start_time': lambda x: (pd.to_datetime(rfm_end) - x.max()).days, 'order_number': 'count', 'expenditure': 'sum' }).reset_index() rfm.columns = ['USER_ID', 'R', 'F', 'M'] rfm.fillna({'R': rfm['R'].max(), 'F': 0, 'M': 0}, inplace=True) return rfm # 执行RFM分析(此时user_info应已正确加载) rfm_data = build_rfm(order_data, user_info) # ------------------------- # 客户流失预测:构建流失特征(文档中4个指标) # ------------------------- def build_churn(user_loss_data, history_order_data, churn_end='2016-07-31'): churn_merged = pd.merge(user_loss_data, history_order_data, on='USER_ID', how='left') churn_merged['use_start_time'] = pd.to_datetime(churn_merged['use_start_time']) churn_features = churn_merged.groupby('USER_ID').agg({ 'order_number': 'count', # frequence 'use_start_time': lambda x: (pd.to_datetime(churn_end) - x.max()).days, # recently 'expenditure': ['sum', lambda x: x.sum()/x.count() if x.count()!=0 else 0] # amount, average }).reset_index() churn_features.columns = ['USER_ID', 'frequence', 'recently', 'amount', 'average'] # 标记流失客户(文档未明确阈值,设为最近天数>90天) churn_features['churn_status'] = np.where(churn_features['recently'] > 90, 1, 0) return churn_features churn_data = build_churn(user_loss, history_order) # ============================== # 任务4:K-Means聚类分析(文档聚类数=3) # ============================== scaler = StandardScaler() rfm_scaled = scaler.fit_transform(rfm_data[['R', 'F', 'M']]) kmeans = KMeans(n_clusters=3, random_state=42) rfm_data['cluster'] = kmeans.fit_predict(rfm_scaled) # 输出聚类中心(文档要求分析各群特征) cluster_centers = pd.DataFrame(scaler.inverse_transform(kmeans.cluster_centers_), columns=['R', 'F', 'M'], index=['客户群1', '客户群2', '客户群3']) print("客户群特征中心:\n", cluster_centers.round(2)) # ============================== # 任务5:雷达图可视化(文档要求用雷达图) # ============================== def plot_radar_chart(centers, features): n_clusters = centers.shape[0] angles = np.linspace(0, 2*np.pi, len(features), endpoint=False).tolist() angles += angles[:1] # 闭合图形 plt.figure(figsize=(8, 8)) for i in range(n_clusters): values = centers.iloc[i].tolist() + [centers.iloc[i, 0]] plt.plot(angles, values, label=f'客户群{i+1}') plt.fill(angles, values, alpha=0.2, edgecolor='black') plt.xticks(angles[:-1], features, fontsize=10) plt.title('客户价值聚类雷达图', fontsize=14) plt.legend(loc='upper right') plt.grid(True, linestyle='--', alpha=0.7) plt.show() plot_radar_chart(cluster_centers, ['最近消费天数(R)', '消费次数(F)', '消费金额(M)']) # ============================== # 任务6:决策树模型(文档使用CART算法) # ============================== X = churn_data[['frequence', 'recently', 'average', 'amount']] y = churn_data['churn_status'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) cart_model = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42) cart_model.fit(X_train, y_train) # ============================== # 任务7:模型评价(文档要求混淆矩阵) # ============================== y_pred = cart_model.predict(X_test) cm = confusion_matrix(y_test, y_pred) precision = precision_score(y_test, y_pred) recall = recall_score(y_test, y_pred) f1 = f1_score(y_test, y_pred) print("混淆矩阵:\n", cm) print(f"精确率:{precision:.2f}, 召回率:{recall:.2f}, F1值:{f1:.2f}")x修改以上代码
最新发布
06-05
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值