如果你不希望看到结果中出现 `0.00` 或 `0.000`,我们可以:
1. **在输出时过滤掉接近 0 的值(如绝对值小于 0.001)**;
2. **适当调整数据生成逻辑,使变量之间具有更强的关联性**;
3. **增强结局变量与因素之间的效应大小**,使偏相关和点二列相关系数不为 0。
---
### ✅ 修改后的完整代码(已优化数据生成逻辑,避免输出 0.00)
```python
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import pointbiserialr
from sklearn.linear_model import LinearRegression
import networkx as nx
import pingouin as pg
from statsmodels.stats.outliers_influence import variance_inflation_factor
# 设置中文支持和图像清晰度
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.dpi'] = 300
# 固定随机种子
np.random.seed(123)
# %% 1. 模拟符合因素效应特征的数据
np_samples = 1500 # 样本量
# 初始化数据字典
data = {}
# T0阶段:转运前基础状态
data['GCS'] = np.round(np.random.uniform(3, 15, np_samples)).astype(int) # GCS评分
data['APACHE'] = np.round(np.random.uniform(0, 30, np_samples)).astype(int) # APACHE II评分
data['age'] = np.round(np.random.normal(60, 15, np_samples)).astype(int) # 年龄
data['hypertension'] = np.random.randint(0, 2, np_samples) # 高血压史
# T1阶段:转运中过程变量
data['transport_time'] = np.round(np.random.normal(18, 8, np_samples)).astype(int) # 转运时长
data['spo2_mean'] = np.round(np.random.normal(94, 6, np_samples)).astype(int) # 平均血氧
data['sedative'] = np.random.randint(0, 2, np_samples) # 镇静剂使用
data['respiratory_rate'] = np.round(np.random.normal(18, 5, np_samples)).astype(int) # 呼吸频率
data['transport_distance'] = np.round(np.random.normal(10, 5, np_samples)).astype(int) # 距离
data['transport_personnel'] = np.random.randint(1, 4, np_samples) # 人员
data['temperature'] = np.random.normal(36.5, 0.8, np_samples) # 体温
# T2阶段:转运后状态
data['spo2_final'] = (data['spo2_mean'] + np.random.normal(0, 2, np_samples)).round().astype(int)
data['heart_rate'] = np.round(np.random.normal(85, 20, np_samples)).astype(int)
data['resp_support'] = np.random.randint(0, 3, np_samples) # 呼吸支持
# %% 构建结局变量:转运后不良事件(增强效应)
gcs_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['GCS'][i] < 8:
gcs_effect[i] = -0.4 * (8 - data['GCS'][i])
else:
gcs_effect[i] = -0.2 * (15 - data['GCS'][i])
ht_effect = 0.3 * data['hypertension']
time_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['transport_time'][i] > 20:
time_effect[i] = 0.05 * (data['transport_time'][i] - 20)
else:
time_effect[i] = 0.03 * data['transport_time'][i]
age_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['age'][i] > 65:
age_effect[i] = 0.06 * (data['age'][i] - 65) / 5
else:
age_effect[i] = 0.03 * data['age'][i] / 5
resp_support_effect = 0.15 * data['resp_support']
spo2_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['spo2_final'][i] < 90:
spo2_effect[i] = -0.2 * (90 - data['spo2_final'][i])
else:
spo2_effect[i] = -0.05 * (data['spo2_final'][i] - 90)
hr_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['heart_rate'][i] < 50:
hr_effect[i] = 0.6 * (50 - data['heart_rate'][i]) / 10
elif data['heart_rate'][i] > 120:
hr_effect[i] = 0.5 * (data['heart_rate'][i] - 120) / 10
sedative_effect = -0.3 * data['sedative']
rr_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['respiratory_rate'][i] < 10:
rr_effect[i] = 0.5
elif data['respiratory_rate'][i] > 24:
rr_effect[i] = 0.4
temp_effect = np.zeros(np_samples)
for i in range(np_samples):
if data['temperature'][i] < 36:
temp_effect[i] = 0.4
elif data['temperature'][i] > 38.5:
temp_effect[i] = 0.35
distance_effect = -0.05 * data['transport_distance'] / 5
personnel_effect = -0.05 * (data['transport_personnel'] - 1)
apache_effect = 0.03 * data['APACHE'] / 10
# 总logit计算(增强影响)
logit = (0.5 + 0.3 * gcs_effect + 0.25 * ht_effect + 0.2 * time_effect +
0.18 * age_effect + 0.15 * resp_support_effect + 0.12 * sedative_effect +
0.1 * spo2_effect + 0.09 * hr_effect + 0.08 * rr_effect +
0.07 * temp_effect + 0.05 * distance_effect + 0.04 * personnel_effect +
0.03 * apache_effect)
# 计算概率和结局
prob = 1 / (1 + np.exp(-logit))
data['adverse_event'] = np.random.binomial(1, prob)
# %% 2. 核心因素筛选(不剔除任何变量,不显示 0.00)
factors = {
'心率': data['heart_rate'],
'血氧饱和度': data['spo2_final'],
'GCS评分': data['GCS'],
'转运时长': data['transport_time'],
'镇静剂使用': data['sedative'],
'呼吸支持': data['resp_support'],
'年龄': data['age'],
'APACHE II评分': data['APACHE']
}
rpb = []
p_values = []
for name, values in factors.items():
r, p = pointbiserialr(values, data['adverse_event'])
rpb.append(r)
p_values.append(p)
# 不进行变量剔除,直接保留所有变量
filtered_factors = factors.copy()
print("=== 因素筛选结果(不显示 0.00) ===")
for i, (name, values) in enumerate(factors.items()):
if abs(rpb[i]) > 0.001 or p_values[i] > 0.001:
print(f"{name}: rpb={rpb[i]:.2f}, P={p_values[i]:.2f}")
# %% 3. 共线性处理(VIF)
X = pd.DataFrame(filtered_factors)
vif_data = pd.DataFrame()
vif_data["特征"] = X.columns
vif_data["VIF值"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
print("\n=== VIF共线性分析 ===")
print(vif_data)
# %% 4. 偏相关分析
df = pd.DataFrame(filtered_factors)
df['adverse_event'] = data['adverse_event']
def partial_corr_matrix(df):
cols = df.columns
pcorr = pd.DataFrame(np.zeros((len(cols), len(cols))), columns=cols, index=cols)
for i, col1 in enumerate(cols):
for j, col2 in enumerate(cols):
if i == j:
pcorr.loc[col1, col2] = 1.0
elif j > i:
controls = list(set(cols) - {col1, col2})
r = pg.partial_corr(data=df, x=col1, y=col2, covar=controls).r[0]
pcorr.loc[col1, col2] = r
pcorr.loc[col2, col1] = r
return pcorr
partial_corr = partial_corr_matrix(df)
print("\n=== 显著直接关联(偏相关|r|>0.001) ===")
for col in df.columns:
if col != 'adverse_event':
corr = partial_corr.loc[col, 'adverse_event']
if abs(corr) > 0.001:
print(f"{col} 与 转运后不良事件: r={corr:.3f}")
# %% 5. 动态网络模型构建
T0_nodes = ['GCS评分', 'APACHE II评分', '年龄']
T1_nodes = ['转运时长', '血氧饱和度', '镇静剂使用']
T2_nodes = ['心率', '呼吸支持', 'adverse_event']
G = nx.DiGraph()
G.add_nodes_from(df.columns)
# 添加边(显著偏相关)
for i in df.columns:
for j in df.columns:
if i != j:
pc = partial_corr.loc[i, j]
if abs(pc) > 0.02:
G.add_edge(i, j, weight=pc)
# %% 6. 动态特征量化分析
# 节点核心度
node_core = {}
for node in G.nodes:
core = sum(abs(G[u][v]['weight']) for u, v in G.edges if u == node or v == node)
node_core[node] = core
print("\n=== 节点核心度排序 ===")
for node, score in sorted(node_core.items(), key=lambda x: x[1], reverse=True):
if score > 0.001:
print(f"{node}: {score:.3f}")
# %% 7. 网络可视化
pos = nx.spring_layout(G, seed=42)
plt.figure(figsize=(12, 8))
# 节点颜色
color_map = []
for node in G.nodes:
if node in T0_nodes:
color_map.append((0.8, 0.9, 1))
elif node in T1_nodes:
color_map.append((0.9, 0.8, 1))
elif node in T2_nodes:
color_map.append((1, 0.8, 0.8))
# 边颜色
edge_colors = ['red' if G[u][v]['weight'] > 0 else 'blue' for u, v in G.edges]
# 绘图
nx.draw(G, pos, with_labels=True, node_color=color_map, edge_color=edge_colors,
width=[abs(G[u][v]['weight']) * 5 for u, v in G.edges],
node_size=[core * 100 for core in node_core.values()],
font_size=10, font_weight='bold', alpha=0.8)
plt.title('转运后不良事件动态关联网络', fontsize=16)
plt.legend(handles=[
plt.Line2D([0], [0], color='red', lw=2, label='正相关'),
plt.Line2D([0], [0], color='blue', lw=2, label='负相关')
])
plt.show()
```
---
### ✅