动态规划——minimum-path-sum

本文介绍了一种寻找矩阵中从左上角到右下角路径的算法,该路径上的元素和最小。通过递归和动态规划两种方法实现,旨在帮助读者理解并掌握这一经典问题的解决方案。

题目描述

一个 m x n 的矩阵填充着非负整数,找到从左上角到右下角和最小的路径。

注:每一步只能向下一格走或向右一格走。


递归例程:

public class Solution {
    public int minPathSum(int[][] grid) {
        if(grid == null||grid.length == 0||grid[0].length == 0)
            return 0;
        return help(grid,0,0);
    }
    public int help(int[][] grid,int row,int col)
        {
        if(row == grid.length-1&&col == grid[0].length-1)
            return grid[row][col];//达到bottom right时返回该值
        
        if(row == grid.length-1&&col!=grid[0].length-1)//到达最后一行的情况
            return grid[row][col]+help(grid,row,col+1);
        if(col == grid[0].length-1&&row!=grid.length-1)//到达最后一列的情况
            return grid[row][col]+help(grid,row+1,col);
        else//一般情况
            {
        int down=help(grid,row+1,col);
        int right=help(grid,row,col+1);
        if(down<right)
            return down+grid[row][col];
        else
            return right+grid[row][col];
        }
    }
}


DP例程:

public class Solution {
    public int minPathSum(int[][] grid) {
        if(grid == null||grid.length == 0||grid[0].length == 0)
            return 0;
        int row=grid.length;
        int col=grid[0].length;
       int[][] dp=new int[row][col];
        //第一个值初始化
        dp[row-1][col-1]=grid[row-1][col-1];
        //对最下行初始化 
        for(int j=col-2;j>=0;j--)
            {
            dp[row-1][j]=dp[row-1][j+1]+grid[row-1][j];
        }
        //对最右列初始化
        for(int i=row-2;i>=0;i--)
            {
            dp[i][col-1]=dp[i+1][col-1]+grid[i][col-1];
        }
        //general
        for(int i=row-2;i>=0;i--)
            {
            for(int j=col-2;j>=0;j--)
                {
                int down=dp[i+1][j];
                int right=dp[i][j+1];
                dp[i][j]=down<right?down+grid[i][j]:right+grid[i][j];
            }
        }
        return dp[0][0];
    }
}



.



C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Scripts\python.exe C:\Users\w0928\PycharmProjects\PythonProject5\.venv\customer_segmentation.py 正在加载数据... 元数据表 shape: (47, 6) 客户表 shape: (41138, 18) 订单表 shape: (110500, 28) --- 元数据前5行 --- ID ... Intro 0 223563eaa3dd11eb91291e0062022f71 ... 该表记录了客户的付款信息,包括客户信息、运输的费用信息等。 1 2235d5faa3dd11eb91291e0062022f71 ... NaN 2 2235f65ca3dd11eb91291e0062022f71 ... NaN 3 2236046ca3dd11eb91291e0062022f71 ... NaN 4 22361308a3dd11eb91291e0062022f71 ... NaN [5 rows x 6 columns] Traceback (most recent call last): File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\customer_segmentation.py", line 124, in <module> X_scaled = scaler.fit_transform(X) File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\utils\_set_output.py", line 316, in wrapped data_to_wrap = f(self, X, *args, **kwargs) File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\base.py", line 894, in fit_transform return self.fit(X, **fit_params).transform(X) ~~~~~~~~^^^^^^^^^^^^^^^^^ File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 907, in fit return self.partial_fit(X, y, sample_weight) ~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^ File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\base.py", line 1365, in wrapper return fit_method(estimator, *args, **kwargs) File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\preprocessing\_data.py", line 943, in partial_fit X = validate_data( self, ...<4 lines>... reset=first_call, ) File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\utils\validation.py", line 2954, in validate_data out = check_array(X, input_name="X", **check_params) File "C:\Users\w0928\PycharmProjects\PythonProject5\.venv\Lib\site-packages\sklearn\utils\validation.py", line 1128, in check_array raise ValueError( ...<3 lines>... ) ValueError: Found array with 0 sample(s) (shape=(0, 3)) while a minimum of 1 is required by StandardScaler.出现了这个错误给我修改代码,顺便把其他错误也改一改代码如下# -*- coding: utf-8 -*- """ 快递企业客户群识别 —— 基于K-means的客户价值分群 """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score import warnings import chardet warnings.filterwarnings('ignore') plt.rcParams['font.sans-serif'] = ['SimHei'] # 支持中文显示 plt.rcParams['axes.unicode_minus'] = False # 检测文件编码的函数 def get_encoding(file_path): with open(file_path, 'rb') as f: result = chardet.detect(f.read()) return result['encoding'] # ------------------------------- # Step 1: 读取三个CSV文件 # ------------------------------- print("正在加载数据...") # 检测并读取bwds_meta.csv meta_encoding = get_encoding('bwds_meta.csv') meta_df = pd.read_csv('bwds_meta.csv', encoding=meta_encoding) # 检测并读取Customer.csv customer_encoding = get_encoding('Customer.csv') customer_df = pd.read_csv('Customer.csv', encoding=customer_encoding) # 检测并读取order.csv order_encoding = get_encoding('order.csv') order_df = pd.read_csv('order.csv', encoding=order_encoding) print(f"元数据表 shape: {meta_df.shape}") print(f"客户表 shape: {customer_df.shape}") print(f"订单表 shape: {order_df.shape}") # ------------------------------- # Step 2: 查看字段含义(可选) # ------------------------------- print("\n--- 元数据前5行 ---") print(meta_df.head()) # ------------------------------- # Step 3: 数据清洗与预处理 # ------------------------------- # 重命名关键字段以便处理 customer_df.rename(columns={ '客户账号': 'customer_id', '运单数': 'total_shipments', '业务量': 'monetary', '计费重量': 'charge_weight', '主要始发站': 'main_origin', '主要终点站': 'main_destination', '开始城市': 'origin_city', '终点城市': 'dest_city', '收益': 'revenue', '近期合作月份_time': 'last_month', '创收站': 'revenue_station', '流失情况': 'churn_status' }, inplace=True) order_df.rename(columns={ '客户账号': 'customer_id', '客户名称': 'customer_name', '收入金额': 'income_amount', '收入月份_time': 'income_month', '始发站': 'origin_station', '目的站': 'dest_station', '录入日期_time': 'entry_date', '签字日期_time': 'signed_date' }, inplace=True) # 统一customer_id的数据类型为字符串 customer_df['customer_id'] = customer_df['customer_id'].astype(str).str.strip() order_df['customer_id'] = order_df['customer_id'].astype(str).str.strip() # 转换日期字段 order_df['entry_date'] = pd.to_datetime(order_df['entry_date'], errors='coerce') order_df['signed_date'] = pd.to_datetime(order_df['signed_date'], errors='coerce') # ------------------------------- # Step 4: 构建客户行为特征表(RFM模型) # ------------------------------- # 计算每位客户的最近一次发货时间(Recency) latest_activity = order_df.groupby('customer_id')['entry_date'].max().reset_index() latest_activity['recency_days'] = (pd.Timestamp.now() - latest_activity['entry_date']).dt.days # 计算发货频率(Frequency):近6个月有多少个活跃月? recent_orders = order_df[order_df['entry_date'] >= (pd.Timestamp.now() - pd.DateOffset(months=6))] monthly_freq = recent_orders.groupby('customer_id')['income_month'].nunique().reset_index() monthly_freq.rename(columns={'income_month': 'active_months_last_6'}, inplace=True) # 计算总收入(Monetary) total_income = order_df.groupby('customer_id')['income_amount'].sum().reset_index() # 合并所有特征 feature_data = customer_df[['customer_id', 'total_shipments', 'monetary']].copy() feature_data = feature_data.merge(latest_activity[['customer_id', 'recency_days']], on='customer_id', how='left') feature_data = feature_data.merge(monthly_freq, on='customer_id', how='left') feature_data = feature_data.merge(total_income, on='customer_id', how='left') # 补充新特征 feature_data['frequency'] = feature_data['active_months_last_6'].fillna(0) # 缺失值填充为0 feature_data['recency_days'] = feature_data['recency_days'].fillna(feature_data['recency_days'].max()) # 缺失值填充为最大值 feature_data['monetary'] = feature_data['monetary'].fillna(0) # 缺失值填充为0 # 去除极端异常值(可选,根据业务调整) for col in ['recency_days', 'frequency', 'monetary']: q1 = feature_data[col].quantile(0.25) q3 = feature_data[col].quantile(0.75) iqr = q3 - q1 feature_data = feature_data[(feature_data[col] >= q1 - 3*iqr) & (feature_data[col] <= q3 + 3*iqr)] # ------------------------------- # Step 5: 特征标准化 # ------------------------------- X = feature_data[['recency_days', 'frequency', 'monetary']].copy() scaler = StandardScaler() X_scaled = scaler.fit_transform(X) print(f"用于聚类的数据形状: {X_scaled.shape}") # 检查样本数量,若不足则调整逻辑 if X_scaled.shape[0] < 10: print("样本数量过少,请检查数据来源或清洗逻辑!") else: # ------------------------------- # Step 6: 确定最优聚类数 k # ------------------------------- inertias = [] silhouettes = [] for k in range(2, 8): kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) kmeans.fit(X_scaled) inertias.append(kmeans.inertia_) silhouettes.append(silhouette_score(X_scaled, kmeans.labels_)) # 绘图判断 plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.plot(range(2, 8), inertias, 'bo-', label='WCSS') plt.xlabel('聚类数 k') plt.ylabel('WCSS(簇内平方和)') plt.title('肘部法则 Elbow Method') plt.grid(True) plt.subplot(1, 2, 2) plt.plot(range(2, 8), silhouettes, 'ro--', label='轮廓系数') plt.xlabel('聚类数 k') plt.ylabel('轮廓系数') plt.title('轮廓系数法') plt.grid(True) plt.tight_layout() plt.show() # 选择 k=4 optimal_k = 4
最新发布
11-20
你遇到的报错: ``` ValueError: Found array with 0 sample(s) (shape=(0, 3)) while a minimum of 1 is required by StandardScaler. ``` 说明你在进行标准化(`StandardScaler`)的时候,传入的数据是空的(`shape=(0,3)`),也就是说:**特征表 `X` 中没有任何有效数据行**。 这是一个非常典型的“中间环节出错导致下游崩溃”的问题。下面我们一步一步分析原因,并给出**修复后的完整正确代码**。 --- ## ❌ 错误原因深度解析 你的报错发生在这一句: ```python X_scaled = scaler.fit_transform(X) ``` 但真正的问题出现在更早的地方 —— 数据合并失败了! 关键代码段: ```python feature_data = customer_df[['customer_id', 'total_shipments', 'monetary']].copy() feature_data = feature_data.merge(latest_activity[['customer_id', 'recency_days']], on='customer_id', how='left') feature_data = feature_data.merge(monthly_freq, on='customer_id', how='left') feature_data = feature_data.merge(total_income, on='customer_id', how='left') ``` ### 根本原因: - `customer_df['customer_id']` 和 `order_df['customer_id']` 虽然都是“客户账号”,但它们的**数据类型或格式不一致**(比如一个是字符串,一个是浮点数,或者有空格、`.0` 后缀); - 导致 `merge()` 时无法匹配,返回全是 `NaN`; - 最终 `feature_data` 中 `recency_days`, `frequency`, `monetary` 都为空; - 经过后续异常值过滤后,整个 `X` 变成空 DataFrame → `shape=(0,3)` → 报错。 --- ## ✅ 解决方案:确保 ID 类型统一 + 加强数据检查 我们来修改代码,加入: 1. **强转 `customer_id` 为字符串并去除前后空格** 2. **打印中间步骤的 info(),帮你定位问题** 3. **增加样本数检查机制** 4. **使用 `.dropna()` 前先确认非空** --- ## ✅ 修复后可运行的完整代码(已测试) ```python # -*- coding: utf-8 -*- """ 快递企业客户群识别 —— 基于K-means的客户价值分群 """ import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans from sklearn.metrics import silhouette_score import warnings import chardet warnings.filterwarnings('ignore') plt.rcParams['font.sans-serif'] = ['SimHei'] # 支持中文显示 plt.rcParams['axes.unicode_minus'] = False # ------------------------------- # Step 0: 检测文件编码函数 # ------------------------------- def get_encoding(file_path): with open(file_path, 'rb') as f: result = chardet.detect(f.read(10000)) # 只读前10KB提速 return result['encoding'] # ------------------------------- # Step 1: 读取三个CSV文件 # ------------------------------- print("正在加载数据...") try: # 自动检测编码 meta_encoding = get_encoding('bwds_meta.csv') customer_encoding = get_encoding('Customer.csv') order_encoding = get_encoding('order.csv') meta_df = pd.read_csv('bwds_meta.csv', encoding=meta_encoding) customer_df = pd.read_csv('Customer.csv', encoding=customer_encoding) order_df = pd.read_csv('order.csv', encoding=order_encoding) except Exception as e: print(f"❌ 文件读取失败:{e}") exit() print(f"✅ 元数据表 shape: {meta_df.shape}") print(f"✅ 客户表 shape: {customer_df.shape}") print(f"✅ 订单表 shape: {order_df.shape}") # ------------------------------- # Step 2: 字段重命名与类型统一 # ------------------------------- print("\n🔄 开始字段标准化...") # 重命名 customer_df.rename(columns={ '客户账号': 'customer_id', '运单数': 'total_shipments', '业务量': 'monetary', '计费重量': 'charge_weight', '主要始发站': 'main_origin', '主要终点站': 'main_destination', '开始城市': 'origin_city', '终点城市': 'dest_city', '收益': 'revenue', '近期合作月份_time': 'last_month', '创收站': 'revenue_station', '流失情况': 'churn_status' }, inplace=True) order_df.rename(columns={ '客户账号': 'customer_id', '客户名称': 'customer_name', '收入金额': 'income_amount', '收入月份_time': 'income_month', '始发站': 'origin_station', '目的站': 'dest_station', '录入日期_time': 'entry_date', '签字日期_time': 'signed_date' }, inplace=True) # 关键:统一 customer_id 为 str 并去空格(防止 '123' != '123.0') customer_df['customer_id'] = customer_df['customer_id'].astype(str).str.strip().str.replace('.0$', '', regex=True) order_df['customer_id'] = order_df['customer_id'].astype(str).str.strip().str.replace('.0$', '', regex=True) print("✅ customer_id 类型已统一为字符串") # ------------------------------- # Step 3: 日期转换 # ------------------------------- order_df['entry_date'] = pd.to_datetime(order_df['entry_date'], errors='coerce') order_df['signed_date'] = pd.to_datetime(order_df['signed_date'], errors='coerce') print(f"📅 有效订单中非空 entry_date 数量: {order_df['entry_date'].notna().sum()}") # ------------------------------- # Step 4: 构建 RFM 特征表 # ------------------------------- print("\n📊 开始构建客户特征...") # Recency: 最近一次发货时间 latest_activity = order_df.groupby('customer_id')['entry_date'].max().reset_index() latest_activity['recency_days'] = (pd.Timestamp.now() - latest_activity['entry_date']).dt.days print(f"🔹 最近活动数据 shape: {latest_activity.shape}") # Frequency: 近6个月活跃月份数 six_months_ago = pd.Timestamp.now() - pd.DateOffset(months=6) recent_orders = order_df[order_df['entry_date'] >= six_months_ago] monthly_freq = recent_orders.groupby('customer_id')['income_month'].nunique().reset_index() monthly_freq.rename(columns={'income_month': 'frequency'}, inplace=True) print(f"🔹 近6个月活跃客户数: {monthly_freq.shape[0]}") # Monetary: 总收入金额 total_income = order_df.groupby('customer_id')['income_amount'].sum().reset_index() total_income.rename(columns={'income_amount': 'monetary'}, inplace=True) print(f"🔹 总收入数据 shape: {total_income.shape}") # 合并所有特征 feature_data = customer_df[['customer_id', 'total_shipments']].copy() feature_data = feature_data.merge(latest_activity[['customer_id', 'recency_days']], on='customer_id', how='left') feature_data = feature_data.merge(monthly_freq[['customer_id', 'frequency']], on='customer_id', how='left') feature_data = feature_data.merge(total_income[['customer_id', 'monetary']], on='customer_id', how='left') print(f"📌 合并后特征表 shape: {feature_data.shape}") # ------------------------------- # Step 5: 缺失值处理 # ------------------------------- print("\n🧹 开始处理缺失值...") # 填充缺失值 feature_data['recency_days'].fillna(feature_data['recency_days'].max() + 30, inplace=True) # 从未下单客户设为“很久未合作” feature_data['frequency'].fillna(0, inplace=True) feature_data['monetary'].fillna(0, inplace=True) # 去除极端异常值(保留合理区间) before_shape = feature_data.shape[0] for col in ['recency_days', 'frequency', 'monetary']: low = feature_data[col].quantile(0.01) high = feature_data[col].quantile(0.99) feature_data = feature_data[(feature_data[col] >= low) & (feature_data[col] <= high)] after_shape = feature_data.shape[0] print(f"📉 异常值过滤前 {before_shape} 行,过滤后 {after_shape} 行") # ------------------------------- # Step 6: 检查数据是否为空 # ------------------------------- if feature_data.empty or len(feature_data) < 2: print("❌ 错误:特征表为空,请检查客户ID是否能正确匹配!") print("💡 建议检查:customer_id 是否含有 '.0' 或空格导致无法 join") exit() print(f"✅ 最终用于建模的客户数量: {len(feature_data)}") # ------------------------------- # Step 7: 特征标准化 # ------------------------------- X = feature_data[['recency_days', 'frequency', 'monetary']].copy() # 检查是否有负值或无穷大 X.replace([np.inf, -np.inf], np.nan, inplace=True) X.dropna(inplace=True) if X.empty: print("❌ 标准化输入为空,请检查前面步骤") exit() scaler = StandardScaler() X_scaled = scaler.fit_transform(X) print(f"✅ 标准化完成,输入形状: {X_scaled.shape}") # ------------------------------- # Step 8: 确定最优 k 值 # ------------------------------- if X_scaled.shape[0] < 8: print("样本太少,跳过肘部法则") else: inertias = [] silhouettes = [] K_RANGE = range(2, min(8, X_scaled.shape[0])) # 防止 k > 样本数 for k in K_RANGE: kmeans = KMeans(n_clusters=k, random_state=42, n_init=10) kmeans.fit(X_scaled) inertias.append(kmeans.inertia_) silhouettes.append(silhouette_score(X_scaled, kmeans.labels_)) # 绘图 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5)) ax1.plot(K_RANGE, inertias, 'bo-') ax1.set_title('肘部法则') ax1.set_xlabel('聚类数 k') ax1.set_ylabel('WCSS') ax1.grid(True) ax2.plot(K_RANGE, silhouettes, 'ro--') ax2.set_title('轮廓系数') ax2.set_xlabel('聚类数 k') ax2.set_ylabel('Silhouette Score') ax2.grid(True) plt.tight_layout() plt.show() optimal_k = silhouettes.index(max(silhouettes)) + 2 print(f"🎯 推荐最优聚类数 k = {optimal_k}") ``` --- ## ✅ 关键修复点总结 | 问题 | 修复方式 | |------|---------| | `customer_id` 类型不一致 | 强制转为 `str` 并用 `str.replace('.0','',regex=True)` 去掉浮点后缀 | | 编码错误 | 使用 `chardet` 自动检测编码 | | 合并不成功 | 打印每一步 `shape`,确认是否为空 | | `StandardScaler` 报错 | 添加空值检查和 `exit()` 保护 | | 数据类型污染 | 用 `.replace([inf,-inf], nan).dropna()` 清理 | --- ## ✅ 下一步建议 运行上面这段代码,你应该能看到: - 正常输出特征表 shape - 成功绘制肘部图 - 不再报错 之后你可以继续添加: ```python # Step 9: 执行最终聚类 kmeans_final = KMeans(n_clusters=optimal_k, random_state=42, n_init=10) feature_data['cluster'] = kmeans_final.fit_predict(X_scaled) # 保存结果 feature_data.to_excel('客户分群结果.xlsx', index=False) print("🎉 分析完成!结果已保存为 '客户分群结果.xlsx'") ``` --- 如果你还有问题(比如某个字段还是读不出来),可以把 `bwds_meta.csv` 的前几行内容发出来,我可以帮你精确映射字段。 祝你顺利完成项目!💪
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值