import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
import time
import warnings
from io import BytesIO
import platform
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, f1_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
def safe_path(path):
"""处理Windows长路径问题"""
if platform.system() == 'Windows':
try:
import ntpath
return ntpath.realpath(path)
except:
return str(Path(path).resolve())
return path
# 忽略警告
warnings.filterwarnings("ignore")
# 页面设置
st.set_page_config(
page_title="精准营销系统",
page_icon="📊",
layout="wide",
initial_sidebar_state="expanded"
)
# 自定义CSS样式
st.markdown("""
<style>
.stApp {
background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
font-family: 'Helvetica Neue', Arial, sans-serif;
}
.header {
background: linear-gradient(90deg, #1a237e 0%, #283593 100%);
color: white;
padding: 1.5rem;
border-radius: 0.75rem;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);
margin-bottom: 2rem;
}
.card {
background: white;
border-radius: 0.75rem;
padding: 1rem;
margin-bottom: 1.5rem;
box-shadow: 0 4px 12px rgba(0,0,0,0.08);
transition: transform 0.3s ease;
}
.card:hover {
transform: translateY(-5px);
box-shadow: 0 6px 16px rgba(0,0,0,0.12);
}
.stButton button {
background: linear-gradient(90deg, #3949ab 0%, #1a237e 100%) !important;
color: white !important;
border: none !important;
border-radius: 0.5rem;
padding: 0.75rem 1.5rem;
font-size: 1rem;
font-weight: 600;
transition: all 0.3s ease;
width: 100%;
}
.stButton button:hover {
transform: scale(1.05);
box-shadow: 0 4px 8px rgba(57, 73, 171, 0.4);
}
.feature-box {
background: linear-gradient(135deg, #e3f2fd 0%, #bbdefb 100%);
border-radius: 0.75rem;
padding: 1.5rem;
margin-bottom: 1.5rem;
}
.result-box {
background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%);
border-radius: 0.75rem;
padding: 1.5rem;
margin-top: 1.5rem;
}
.model-box {
background: linear-gradient(135deg, #fff3e0 0%, #ffe0b2 100%);
border-radius: 0.75rem;
padding: 1.5rem;
margin-top: 1.5rem;
}
.stProgress > div > div > div {
background: linear-gradient(90deg, #2ecc71 0%, #27ae60 100%) !important;
}
.metric-card {
background: white;
border-radius: 0.75rem;
padding: 1rem;
text-align: center;
box-shadow: 0 4px 8px rgba(0,0,0,0.06);
}
.metric-value {
font-size: 1.8rem;
font-weight: 700;
color: #1a237e;
}
.metric-label {
font-size: 0.9rem;
color: #5c6bc0;
margin-top: 0.5rem;
}
.highlight {
background: linear-gradient(90deg, #ffeb3b 0%, #fbc02d 100%);
padding: 0.2rem 0.5rem;
border-radius: 0.25rem;
font-weight: 600;
}
.stDataFrame {
border-radius: 0.75rem;
box-shadow: 0 4px 8px rgba(0,0,0,0.06);
}
.convert-high {
background-color: #c8e6c9 !important;
color: #388e3c !important;
font-weight: 700;
}
.convert-low {
background-color: #ffcdd2 !important;
color: #c62828 !important;
font-weight: 600;
}
</style>
""", unsafe_allow_html=True)
def preprocess_data_train(df):
"""
训练时数据预处理函数
返回处理后的数据和推理时需要的参数
"""
data = df.copy()
# 新增强化清洗逻辑
available_features = [col for col in data.columns if col in [
'AGE', 'GENDER', 'ONLINE_DAY', 'TERM_CNT',
'IF_YHTS', 'MKT_STAR_GRADE_NAME', 'PROM_AMT_MONTH',
'is_rh_next'
]]
if 'is_rh_next' not in available_features:
st.error("错误:数据集中缺少目标变量 'is_rh_next'")
return data, None
data = data[available_features]
# 新增强化清洗步骤
def enhanced_cleaning(col_data):
"""
增强清洗逻辑:
1. 替换多种特殊字符
2. 处理文本型数值
3. 类型转换验证
"""
# 替换多种特殊字符
col_data = col_data.replace(
['-', 'N/A', 'NA', 'null', 'NULL', '未知', '暂无'],
np.nan
)
# 处理文本型数值(如"二十五"等中文数字)
if col_data.dtype == object:
try:
col_data = pd.to_numeric(col_data, errors='coerce')
except:
pass
return col_data
# 1. 数值特征清洗
numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH', 'IF_YHTS']
for col in numeric_cols:
if col in data.columns:
data[col] = enhanced_cleaning(data[col])
data[col] = pd.to_numeric(data[col], errors='coerce')
# 2. 分类特征清洗
categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME']
for col in categorical_cols:
if col in data.columns:
data[col] = data[col].astype(str).str.strip()
data[col] = data[col].str.replace('[^a-zA-Z0-9\u4e00-\u9fa5]', '', regex=True) # 移除非法字符
# 6. 处理目标变量的缺失值
if 'is_rh_next' in data.columns and data['is_rh_next'].isnull().any():
st.warning(f"目标变量 'is_rh_next' 中存在 {data['is_rh_next'].isnull().sum()} 个缺失值,将删除这些行")
data = data.dropna(subset=['is_rh_next'])
# 7. 异常值处理(使用IQR方法)
def handle_outliers(series):
# 确保是数值类型
if pd.api.types.is_numeric_dtype(series):
Q1 = series.quantile(0.25)
Q3 = series.quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
return series.clip(lower_bound, upper_bound)
return series
for col in numeric_cols:
if col in data.columns:
data[col] = handle_outliers(data[col])
# 8. 保存预处理参数
preprocessor_params = {
# 数值特征均值
'numerical_means': {col: data[col].mean() for col in numeric_cols if col in data.columns},
# 分类特征众数
'categorical_modes': {col: data[col].mode()[0] if len(data[col].mode()) > 0 else '未知'
for col in categorical_cols if col in data.columns},
# 特征列表
'features': available_features,
# 数值特征列表
'numeric_cols': numeric_cols,
# 分类特征列表
'categorical_cols': categorical_cols,
# 异常值处理边界
'outlier_bounds': {}
}
# 计算并保存异常值边界
for col in numeric_cols:
if col in data.columns and pd.api.types.is_numeric_dtype(data[col]):
Q1 = data[col].quantile(0.25)
Q3 = data[col].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR
preprocessor_params['outlier_bounds'][col] = (lower_bound, upper_bound)
return data, preprocessor_params
def preprocess_data_inference(df, preprocessor_params):
"""
推理时数据预处理函数
"""
# 1. 复制数据避免污染原始数据
data = df.copy()
# 新增强化清洗逻辑
if 'features' in preprocessor_params:
features = [f for f in preprocessor_params['features'] if f != 'is_rh_next']
data = data[features]
# 数值特征清洗
numeric_cols = preprocessor_params.get('numeric_cols', [])
for col in numeric_cols:
if col in data.columns:
data[col] = data[col].replace(
['-', 'N/A', 'NA', 'null', 'NULL', '未知', '暂无'],
np.nan
)
data[col] = pd.to_numeric(data[col], errors='coerce')
# 分类特征清洗
categorical_cols = preprocessor_params.get('categorical_cols', [])
for col in categorical_cols:
if col in data.columns:
data[col] = data[col].astype(str).str.strip()
data[col] = data[col].str.replace('[^a-zA-Z0-9\u4e00-\u9fa5]', '', regex=True)
# 4. 处理文本分类特征
binary_text_cols = ['IF_YHTS']
for col in binary_text_cols:
if col in data.columns and col in preprocessor_params.get('numeric_cols', []):
# 检查列是否包含文本值
if data[col].dtype == object:
# 创建映射字典: 是->1, 否->0, 其他值设为NaN
mapping = {'是': 1, '否': 0}
data[col] = data[col].map(mapping)
# 转换为数值类型
data[col] = pd.to_numeric(data[col], errors='coerce')
# 5. 处理缺失值
# 数值特征用训练集的均值填充
if 'numerical_means' in preprocessor_params:
for col, mean_val in preprocessor_params['numerical_means'].items():
if col in data.columns:
# 确保列是数值类型
if data[col].dtype == object:
data[col] = pd.to_numeric(data[col], errors='coerce')
data[col].fillna(mean_val, inplace=True)
# 分类特征用训练集的众数填充
if 'categorical_modes' in preprocessor_params:
for col, mode_val in preprocessor_params['categorical_modes'].items():
if col in data.columns:
data[col].fillna(mode_val, inplace=True)
# 6. 异常值处理(使用训练集的边界)
if 'outlier_bounds' in preprocessor_params:
for col, bounds in preprocessor_params['outlier_bounds'].items():
if col in data.columns and pd.api.types.is_numeric_dtype(data[col]):
lower_bound, upper_bound = bounds
data[col] = data[col].clip(lower_bound, upper_bound)
return data
# 标题区域
st.markdown("""
<div class="header">
<h1 style='text-align: center; margin: 0;'>精准营销系统</h1>
<p style='text-align: center; margin: 0.5rem 0 0; font-size: 1.1rem;'>基于机器学习的单宽转融预测</p>
</div>
""", unsafe_allow_html=True)
# 页面布局
col1, col2 = st.columns([1, 1.5])
# 左侧区域 - 图片和简介
with col1:
st.markdown("""
<div class="card">
<h2>📱 智能营销系统</h2>
<p>预测单宽带用户转化为融合套餐用户的可能性</p>
</div>
""", unsafe_allow_html=True)
# 使用在线图片作为占位符
st.image("https://images.unsplash.com/photo-1551836022-d5d88e9218df?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1200&q=80",
caption="精准营销系统示意图", width=600)
st.markdown("""
<div class="card">
<h4>📈 系统功能</h4>
<ul>
<li>用户转化可能性预测</li>
<li>高精度机器学习模型</li>
<li>可视化数据分析</li>
<li>精准营销策略制定</li>
</ul>
</div>
""", unsafe_allow_html=True)
# 右侧区域 - 功能选择
with col2:
st.markdown("""
<div class="card">
<h3>📋 请选择操作类型</h3>
<p>您可以选择数据分析或使用模型进行预测</p>
</div>
""", unsafe_allow_html=True)
# 功能选择
option = st.radio("请选择操作类型", ["📊 数据分析 - 探索数据并训练模型", "🔍 预测分析 - 预测用户转化可能性"],
index=0, label_visibility="hidden")
# 数据分析部分
if "数据分析" in option:
st.markdown("""
<div class="card">
<h3>数据分析与模型训练</h3>
<p>上传数据并训练预测模型</p>
</div>
""", unsafe_allow_html=True)
# 上传训练数据
train_file = st.file_uploader("上传数据集 (CSV格式, GBK编码)", type=["csv"])
# 检查是否存在空字符串
for col in numeric_cols:
if (train_data[col].astype(str).str.strip() == '').any():
st.warning(f"警告:列 '{col}' 中存在空字符串,请检查数据质量")
if train_file is not None:
try:
# 读取数据
train_data = pd.read_csv(train_file, encoding='GBK')
# 显示数据预览
with st.expander("数据预览", expanded=True):
st.dataframe(train_data.head())
col1, col2 = st.columns(2)
col1.metric("总样本数", train_data.shape[0])
col2.metric("特征数量", train_data.shape[1] - 1)
# 数据预处理
st.subheader("数据预处理")
with st.spinner("数据预处理中..."):
processed_data, preprocessor_params = preprocess_data_train(train_data)
# 检查目标变量是否有缺失值
if 'is_rh_next' in processed_data.columns and processed_data['is_rh_next'].isnull().any():
st.warning(f"目标变量 'is_rh_next' 中仍有 {processed_data['is_rh_next'].isnull().sum()} 个缺失值,将删除这些行")
processed_data = processed_data.dropna(subset=['is_rh_next'])
joblib.dump(preprocessor_params, 'preprocessor_params.pkl')
st.success("✅ 数据预处理完成")
# 可视化数据分布
st.subheader("数据分布分析")
# 目标变量分布
st.markdown("**目标变量分布 (is_rh_next)**")
fig, ax = plt.subplots(figsize=(8, 5))
sns.countplot(x='is_rh_next', data=processed_data, palette='viridis')
plt.title('用户转化分布 (0:未转化, 1:转化)')
plt.xlabel('是否转化')
plt.ylabel('用户数量')
st.pyplot(fig)
# 数值特征分布
st.markdown("**数值特征分布**")
numeric_cols = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH', 'IF_YHTS']
# 动态计算子图布局
num_features = len(numeric_cols)
if num_features > 0:
ncols = 2
nrows = (num_features + ncols - 1) // ncols # 向上取整
fig, axes = plt.subplots(nrows, ncols, figsize=(14, 4*nrows))
# 将axes展平为一维数组
if nrows > 1 or ncols > 1:
axes = axes.flatten()
else:
axes = [axes] # 单个子图时确保axes是列表
for i, col in enumerate(numeric_cols):
if col in processed_data.columns and i < len(axes):
sns.histplot(processed_data[col], kde=True, ax=axes[i], color='skyblue')
axes[i].set_title(f'{col}分布')
axes[i].set_xlabel('')
# 隐藏多余的子图
for j in range(i+1, len(axes)):
axes[j].set_visible(False)
plt.tight_layout()
st.pyplot(fig)
else:
st.warning("没有可用的数值特征")
# 特征相关性分析
st.markdown("**特征相关性热力图**")
corr_cols = numeric_cols + ['is_rh_next']
if len(corr_cols) > 1:
corr_data = processed_data[corr_cols].corr()
fig, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(corr_data, annot=True, fmt=".2f", cmap='coolwarm', ax=ax)
plt.title('特征相关性热力图')
st.pyplot(fig)
else:
st.warning("特征不足,无法生成相关性热力图")
# 模型训练
st.subheader("模型训练")
# 训练参数设置
col1, col2 = st.columns(2)
test_size = col1.slider("测试集比例", 0.1, 0.4, 0.2, 0.05)
random_state = col2.number_input("随机种子", 0, 100, 42)
n_estimators = col1.slider("树的数量", 10, 500, 100, 10)
max_depth = col2.slider("最大深度", 2, 30, 10, 1)
# 开始训练按钮
if st.button("开始训练模型", use_container_width=True):
with st.spinner("模型训练中,请稍候..."):
progress_bar = st.progress(0)
X = processed_data.drop('is_rh_next', axis=1)
if not all(pd.api.types.is_numeric_dtype(X[col]) for col in X.columns):
st.error("存在非数值型特征,请检查预处理流程")
raise ValueError("存在非数值型特征")
# 步骤1: 特征工程
X = processed_data.drop('is_rh_next', axis=1)
y = processed_data['is_rh_next']
# 确保目标变量没有缺失值 - 再次检查
if y.isnull().any():
st.warning(f"目标变量中仍有 {y.isnull().sum()} 个缺失值,将删除这些行")
X = X.dropna()
y = y.dropna()
# 处理分类特征
categorical_cols = ['GENDER', 'MKT_STAR_GRADE_NAME']
existing_cat_cols = [col for col in categorical_cols if col in X.columns]
# 创建预处理管道
numeric_features = ['AGE', 'ONLINE_DAY', 'TERM_CNT', 'PROM_AMT_MONTH', 'IF_YHTS']
numeric_transformer = Pipeline(steps=[
('scaler', StandardScaler())
])
categorical_transformer = Pipeline(steps=[
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, existing_cat_cols)
])
# 步骤2: 处理不平衡数据
os = SMOTE(random_state=random_state)
X_res, y_res = os.fit_resample(X, y)
# 划分训练测试集
X_train, X_test, y_train, y_test = train_test_split(
X_res, y_res, test_size=test_size, random_state=random_state, stratify=y_res
)
progress_bar.progress(30)
time.sleep(0.5)
# 步骤3: 模型训练
model = RandomForestClassifier(
n_estimators=n_estimators,
max_depth=max_depth,
random_state=random_state,
n_jobs=-1
)
# 创建完整管道
clf = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', model)
])
clf.fit(X_train, y_train)
progress_bar.progress(80)
time.sleep(0.5)
# 步骤4: 模型评估
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:, 1]
accuracy = accuracy_score(y_test, y_pred)
auc = roc_auc_score(y_test, y_proba)
f1 = f1_score(y_test, y_pred)
# 保存模型
joblib.dump(clf, "marketing_model.pkl")
st.session_state.model = clf
st.session_state.preprocessor_params = preprocessor_params
progress_bar.progress(100)
st.success("🎉 模型训练完成!")
# 显示模型性能
st.subheader("模型性能评估")
col1, col2, col3 = st.columns(3)
col1.markdown(f"""
<div class="metric-card">
<div class="metric-value">{accuracy*100:.1f}%</div>
<div class="metric-label">准确率</div>
</div>
""", unsafe_allow_html=True)
col2.markdown(f"""
<div class="metric-card">
<div class="metric-value">{auc:.3f}</div>
<div class="metric-label">AUC 分数</div>
</div>
""", unsafe_allow_html=True)
col3.markdown(f"""
<div class="metric-card">
<div class="metric-value">{f1:.3f}</div>
<div class="metric-label">F1 分数</div>
</div>
""", unsafe_allow_html=True)
# 混淆矩阵
st.subheader("混淆矩阵")
cm = confusion_matrix(y_test, y_pred)
fig, ax = plt.subplots(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
ax.set_xlabel("预测标签")
ax.set_ylabel("真实标签")
ax.set_title("混淆矩阵")
st.pyplot(fig)
# 特征重要性
st.subheader("特征重要性")
# 获取特征名称
feature_names = numeric_features.copy()
if 'cat' in clf.named_steps['preprocessor'].named_transformers_:
ohe = clf.named_steps['preprocessor'].named_transformers_['cat'].named_steps['onehot']
cat_feature_names = ohe.get_feature_names_out(existing_cat_cols)
feature_names.extend(cat_feature_names)
# 获取特征重要性
feature_importances = clf.named_steps['classifier'].feature_importances_
importance_df = pd.DataFrame({
"特征": feature_names,
"重要性": feature_importances
}).sort_values("重要性", ascending=False).head(10)
fig, ax = plt.subplots(figsize=(10, 6))
sns.barplot(x="重要性", y="特征", data=importance_df, palette="viridis", ax=ax)
ax.set_title("Top 10 重要特征")
st.pyplot(fig)
except Exception as e:
st.error(f"数据处理错误: {str(e)}")
# 预测分析部分
else:
st.markdown("""
<div class="card">
<h3>用户转化预测</h3>
<p>预测单宽带用户转化为融合套餐的可能性</p>
</div>
""", unsafe_allow_html=True)
# 上传预测数据
predict_file = st.file_uploader("上传预测数据 (CSV格式, GBK编码)", type=["csv"])
if predict_file is not None:
try:
# 读取数据
predict_data = pd.read_csv(predict_file, encoding='GBK')
# 显示数据预览
with st.expander("数据预览", expanded=True):
st.dataframe(predict_data.head())
# 检查是否有模型
if not os.path.exists("marketing_model.pkl") or not os.path.exists("preprocessor_params.pkl"):
st.warning("⚠️ 未找到训练好的模型,请先训练模型")
st.stop()
# 开始预测按钮
if st.button("开始预测", use_container_width=True):
with st.spinner("预测进行中,请稍候..."):
progress_bar = st.progress(0)
# 加载预处理参数
preprocessor_params = joblib.load('preprocessor_params.pkl')
# 数据预处理
processed_data = preprocess_data_inference(predict_data, preprocessor_params)
progress_bar.progress(30)
time.sleep(0.5)
# 加载模型
model = joblib.load("marketing_model.pkl")
# 生成预测结果
predictions = model.predict(processed_data)
probas = model.predict_proba(processed_data)[:, 1]
progress_bar.progress(80)
time.sleep(0.5)
# 创建结果DataFrame
if 'CCUST_ROW_ID' in predict_data.columns:
customer_ids = predict_data['CCUST_ROW_ID']
else:
customer_ids = range(1, len(predict_data) + 1)
result_df = pd.DataFrame({
"客户ID": customer_ids,
"转化概率": probas,
"预测结果": predictions
})
# 添加转化可能性等级
result_df['预测标签'] = result_df['预测结果'].apply(lambda x: "可能转化" if x == 1 else "可能不转化")
result_df['转化可能性'] = pd.cut(
result_df['转化概率'],
bins=[0, 0.3, 0.7, 1],
labels=["低可能性", "中可能性", "高可能性"],
include_lowest=True
)
# 保存结果
st.session_state.prediction_results = result_df
progress_bar.progress(100)
st.success("✅ 预测完成!")
except Exception as e:
st.error(f"预测错误: {str(e)}")
# 显示预测结果
if "prediction_results" in st.session_state:
st.markdown("""
<div class="card">
<h3>预测结果</h3>
<p>用户转化可能性评估报告</p>
</div>
""", unsafe_allow_html=True)
result_df = st.session_state.prediction_results
# 转化可能性分布
st.subheader("转化可能性分布概览")
col1, col2, col3 = st.columns(3)
high_conv = (result_df["转化可能性"] == "高可能性").sum()
med_conv = (result_df["转化可能性"] == "中可能性").sum()
low_conv = (result_df["转化可能性"] == "低可能性").sum()
col1.markdown(f"""
<div class="metric-card">
<div class="metric-value">{high_conv}</div>
<div class="metric-label">高可能性用户</div>
</div>
""", unsafe_allow_html=True)
col2.markdown(f"""
<div class="metric-card">
<div class="metric-value">{med_conv}</div>
<div class="metric-label">中可能性用户</div>
</div>
""", unsafe_allow_html=True)
col3.markdown(f"""
<div class="metric-card">
<div class="metric-value">{low_conv}</div>
<div class="metric-label">低可能性用户</div>
</div>
""", unsafe_allow_html=True)
# 转化可能性分布图
fig, ax = plt.subplots(figsize=(8, 5))
conv_counts = result_df["转化可能性"].value_counts()
conv_counts.plot(kind='bar', color=['#4CAF50', '#FFC107', '#F44336'], ax=ax)
plt.title('用户转化可能性分布')
plt.xlabel('可能性等级')
plt.ylabel('用户数量')
st.pyplot(fig)
# 详细预测结果
st.subheader("详细预测结果")
# 样式函数
def color_convert(val):
if val == "高可能性":
return "background-color: #c8e6c9; color: #388e3c;"
elif val == "中可能性":
return "background-color: #fff9c4; color: #f57f17;"
else:
return "background-color: #ffcdd2; color: #c62828;"
# 格式化显示
display_df = result_df[["客户ID", "转化概率", "预测标签", "转化可能性"]]
styled_df = display_df.style.format({
"转化概率": "{:.2%}"
}).applymap(color_convert, subset=["转化可能性"])
st.dataframe(styled_df, height=400)
# 下载结果
csv = display_df.to_csv(index=False).encode("utf-8")
st.download_button(
label="下载预测结果",
data=csv,
file_name="用户转化预测结果.csv",
mime="text/csv",
use_container_width=True
)
# 页脚
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: #5c6bc0; font-size: 0.9rem; padding: 1rem;">
© 2023 精准营销系统 | 基于Sklearn和Streamlit开发
</div>
""", unsafe_allow_html=True)
根据上述代码结合下面数据集markting_datav3.csv代码部分示例,修改代码,并给出修改后完整代码
PROM_TYPES_包3年 PROM_TYPES_包年 PROM_TYPES_包月 PROM_TYPES_无包年以及包月 ASSET_ROW_ID MKT_CHANNEL_NAME MKT_CHANNEL_SUB_NAME PREPARE_FLG SERV_START_DT FIBER_ACCESS_CATEGORY ... AVG_STMT_AMT_LV is_kdts is_itv_up is_mobile_up if_zzzw_up itv_cnt itv_day serv_in_time PROM_AMT_MONTH is_rh_next
0 True False False False 1-1E6Z49HF NaN 其它部门-未知部门细分-未知 ... 0 20140126 0 ... c30-59 0 0 0 0 0 0 41 44.44 0.0
1 False True False False 3-J591KYI NaN 其它部门-未知部门细分-未知 ... 0 20160406 0 ... e89-129 0 0 0 0 0 0 14 100.00 0.0
2 True False False False 1-F3YGP4D 营业厅 营业厅-营业服务中心-城市 ... 0 20100112 0 ... c30-59 0 0 0 0 0 28 89 44.44 0.0
3 True False False False 1-1AITRLCN NaN 其它部门-未知部门细分-未知 ... 0 20131017 0 ... c30-59 1 0 0 0 0 10 44 55.56 0.0
4 False False True False 1-132ZSIVX 10000号 其它部门-10000客服部-城市 ... 0 20130209 0 ... d59-89 0 0 0 0 0 0 52 0.00 0.0
5 rows × 92 columns