直接安装mlxtend库出错

直接pip install mlxtend安装失败
解决方法:
1.打开 Anaconda prompt
2.输入conda update --all
3.输入conda install -c conda-forge mlxtend完成安装

import pandas as pd import numpy as np import matplotlib.pyplot as plt from mlxtend.preprocessing import TransactionEncoder from mlxtend.frequent_patterns import fpgrowth from mlxtend.frequent_patterns import association_rules # 步骤1: 加载数据并预处理 def load_and_preprocess(file_path): """加载CSV数据并预处理技能列""" df = pd.read_csv(file_path, encoding='utf-8') # 定义高薪阈值(取前25%分位数) salary_threshold = np.percentile(df['salary_usd'], 75) print(f"高薪阈值设定为: ${salary_threshold:,.0f} USD/年") # 筛选高薪岗位 high_salary_df = df[df['salary_usd'] > salary_threshold].copy() # 处理技能列:分割字符串并转换为列表 high_salary_df['skills'] = high_salary_df['required_skills'].str.split(',') # 移除空技能列表 high_salary_df = high_salary_df[high_salary_df['skills'].apply(len) > 0] return high_salary_df # 步骤2: 应用FP-Growth算法 def apply_fpgrowth(df, min_support=0.05, min_confidence=0.7): """应用FP-Growth算法挖掘技能关联规则""" # 转换为事务列表 transactions = df['skills'].tolist() # 编码为one-hot格式 te = TransactionEncoder() te_ary = te.fit(transactions).transform(transactions) df_encoded = pd.DataFrame(te_ary, columns=te.columns_) # 挖掘频繁项集 frequent_itemsets = fpgrowth( df_encoded, min_support=min_support, use_colnames=True ) # 生成关联规则 rules = association_rules( frequent_itemsets, metric="confidence", min_threshold=min_confidence ) # 计算提升度并筛选有效规则 rules = rules[rules['lift'] > 1.0] rules = rules.sort_values(['lift', 'confidence'], ascending=False) return frequent_itemsets, rules # 步骤3: 可视化结果 def visualize_results(rules, top_n=10): """可视化关联规则结果""" # 筛选前N条规则 top_rules = rules.head(top_n).copy() # 格式化规则字符串 top_rules['rule'] = top_rules.apply( lambda x: f"{', '.join(x['antecedents'])} → {', '.join(x['consequents'])}", axis=1 ) # 创建图表 plt.figure(figsize=(12, 8)) # 绘制提升度和置信度条形图 plt.barh( y=top_rules['rule'], width=top_rules['lift'], color='skyblue', label='提升度' ) plt.barh( y=top_rules['rule'], width=top_rules['confidence'], color='orange', alpha=0.5, label='置信度' ) plt.xlabel('指标值') plt.ylabel('关联规则') plt.title('高薪岗位技能关联规则分析 (Top 10)') plt.legend() plt.tight_layout() plt.show() return top_rules # 主函数 def main(): file_path = r"C:\Users\f'b\PyCharmMiscProject\AI市场.csv" # 加载并预处理数据 high_salary_df = load_and_preprocess(file_path) print(f"高薪岗位数量: {len(high_salary_df)}") # 应用FP-Growth算法 frequent_itemsets, rules = apply_fpgrowth( high_salary_df, min_support=0.05, # 可调整 min_confidence=0.7 # 可调整 ) # 显示频繁项集 print("\n频繁技能组合 (Top 10):") print(frequent_itemsets.sort_values('support', ascending=False).head(10)) # 显示关联规则 print("\n关联规则 (Top 10):") top_rules = visualize_results(rules) print(top_rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]) if __name__ == "__main__": main() ValueError: Cannot set a DataFrame with multiple columns to the single column rule
最新发布
06-30
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值