jm3.py-20180919

本文采用K-means聚类算法对近3年的未知作案者恐怖袭击事件进行聚类分析,通过选择最优聚类数并评估聚类效果,确定9类事件集群。进一步分析各类事件的危害性,并对特定事件集进行分类预测。
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Sep 17 03:01:06 2018

@author: vicky
"""
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns 
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans 
import copy
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from scipy import stats
from pandas.core.frame import DataFrame

#--------------变量提取
#df = pd.read_excel('/Users/vicky/Desktop/建模/附件1.xlsx')
result=pd.read_excel('/Users/vicky/Desktop/建模/第一问/结果.xlsx')
data= result[(result.gname =='Unknown')] #提取未知作案者的事件
data= data[(data.iyear >=2015)] #近3年
data1=data[['iyear', 'imonth', 'country', 'region', 'suicide','attacktype1', 
          'targtype1', 'weaptype1', 'ransom']]#选9个特征变量,都是离散值

#-------------缺失值负值处理
#data1[data1.columns[9]].value_counts()
data1.isnull().sum() #只有ransom有nan和负值
#缺失值和负值都变成0(未知是否有赎金的都当作没有赎金)
data1[data1.ransom<0]=0
data1['ransom']=data1['ransom'].fillna(0)
#data1['ransom'].value_counts()

#--------------相关性检验
corr = data1.corr()#变量之间相关度很低,不用去相关处理
sns.heatmap(corr)

#---------------标准化处理----------------
scaler = StandardScaler().fit(data1)
data2=pd.DataFrame(scaler.fit_transform(data1))

#----------------kmeans聚类---------------
#----选K------
kmax=15#K的选择范围上限

#手肘法:画sse与K的图,拐点处为最佳K
SSE = []  # 存放每次结果的误差平方和
for k in range(1,kmax):
    estimator = KMeans(n_clusters=k,init='k-means++',random_state=1234)  # 构造聚类器
    estimator.fit(data2)
    SSE.append(estimator.inertia_)
plt.xlabel('k')
plt.ylabel('SSE')
plt.plot(range(1,kmax),SSE,'o-',color='black')
plt.grid()
plt.xticks(np.arange(1, kmax, 1))
plt.show()
#效果不好,看不出来拐点

#轮廓系数法,轮廓系数最大的k最佳 
Scores = []  # 存放轮廓系数
for k in range(2,kmax):
    estimator = KMeans(n_clusters=k, init='k-means++',random_state=1234)  # 构造聚类器
    estimator.fit(data2)
    Scores.append(silhouette_score(data2,estimator.labels_,metric='euclidean'))
plt.xlabel('k')
plt.ylabel('Silhouette Coefficient')
plt.plot(range(2,kmax),Scores,'o-',color='black')
plt.grid()
plt.xticks(np.arange(1, kmax, 1))
plt.show()
#k=3和9时局部最大,结合手肘法得知k=3时sse还很大,所以k=9时聚类效果最好
score=Scores[7]#k=9时对应的轮廓系数

#---带入k聚类-----
num=9 #参数k
clf = KMeans(n_clusters=num,init='k-means++',random_state=1234)
model = clf.fit(data2) 
#中心点
centers=clf.cluster_centers_
print(centers)
#Kmeans的sse
print(clf.inertia_)
#分类结果:每个样本所属的簇
label2=list(clf.labels_)   

set2={ k:label2.count(k) for k in set(label2)}
print(set2)

result2=DataFrame(copy.copy(data.eventid))
result2.insert(0,'label2',label2) 

result2.to_csv('/Users/vicky/Desktop/建模/第二问/result2.csv', sep=',', header=True, index=False,encoding="utf_8_sig")




#--------类别的危害性排序,用到第一问结果--------------
dic={'最高':5,'较高':4,'中等':3,'较低':2,'最低':1} 
lev=list(data.level)
wh=[] #危害性=第一问等级从高到底取权重5,4,3,2,1
for i in range(len(data)):
    wh.append(dic[lev[i]])
wh=DataFrame(wh,columns=['weihai'])
wh.insert(0,'label2',label2) 
wh_label=wh['weihai'].groupby(wh['label2']).sum() #分类别求和
#类别危险度从大到小排序8,4,2,7,5,3,6,1,0


#------提取题目给的事件集
eid=[201701090031,201702210037,201703120023,201705050009,201705050010,201707010028,201707020006,
     201708110018,201711010006,201712010003]
data3=copy.copy(data)
data3.insert(0,'label2',label2)
test=DataFrame()
for x in eid:
    test=test.append(data3[(data3.eventid==x)])
test1=test[['iyear', 'imonth', 'country', 'region', 'suicide','attacktype1', 
          'targtype1', 'weaptype1', 'ransom']]
test1[test1.ransom<0]=0
test1['ransom']=test1['ransom'].fillna(0)
#中心化
scaler = StandardScaler().fit(test1)
test2=pd.DataFrame(scaler.fit_transform(test1))


#------计算test到2,4,5,7,8这5类中心点的距离
k=[2,4,5,7,8]
distance=np.zeros((10,5))#距离矩阵
for i in range(len(test1)):
    for j in range(len(k)):
        distance[i][j]=np.sqrt(sum(np.power(test2.iloc[i] - centers[k[j],:],2)))#欧式距离
rank_dis=DataFrame(distance).rank(axis=1)#每行排序,距离最小排第一








#其它聚类方法尝试

#----------------MeanShift-----------
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
# Compute clustering with MeanShift
ms = MeanShift(bin_seeding=True)
ms.fit(data2)
labels = ms.labels_
cluster_centers = ms.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)
print("number of estimated clusters : %d" % n_clusters_)
#自动分了8类

#--------------谱聚类-----------
from sklearn.cluster import SpectralClustering
from sklearn import metrics
result=[]
for k in range(2,kmax):
    y_pred = SpectralClustering(n_clusters=k).fit_predict(data2)
    result.append([k,metrics.calinski_harabaz_score(data2, y_pred)])
    print("Calinski-Harabasz Score with n_clusters=", k,"score:", metrics.calinski_harabaz_score(data2, y_pred)) 
#跑的特别慢
    
y_pred = SpectralClustering(n_clusters=8).fit_predict(data2)
print("Calinski-Harabasz Score", metrics.calinski_harabaz_score(data2, y_pred))


#---------------层次聚类-------------
from scipy.cluster import hierarchy
import scipy.spatial.distance as dist
 #生成点与点之间的距离矩阵
distMatrix = dist.pdist(data1)
#进行层次聚类:
Z = hierarchy.linkage(distMatrix, method = 'complete')
#将层级聚类结果以树状图表示出来
hierarchy.dendrogram(Z)
#根据linkage matrix Z得到聚类结果:
cluster= hierarchy.fcluster(Z, 1, 'inconsistent')
num_clusters = cluster.max()


#-------------DBSCAN---------------
 
from sklearn.cluster import DBSCAN
from sklearn import metrics
from sklearn.preprocessing import StandardScaler

# Compute DBSCAN
db = DBSCAN(eps=0.3, min_samples=10).fit(data2)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_

# Number of clusters in labels, ignoring noise if present.
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
print('Estimated number of clusters: %d' % n_clusters_)




 

(venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ flask shell ?charset=utf8mb4ASE_URI: mysql+pymysql://superset_user Loaded your LOCAL configuration at [/home/gapinyc/superset/superset_config.py] 2025-10-25 07:16:58,944:DEBUG:superset.utils.logging_configurator:logging was configured successfully 2025-10-25 07:16:58,949:DEBUG:root:Configured event logger of type <class &#39;superset.utils.log.DBEventLogger&#39;> 2025-10-25 07:16:58,952:INFO:superset.initialization:Setting database isolation level to READ COMMITTED 2025-10-25 07:16:58,953:ERROR:flask_appbuilder.security.sqla.manager:DB Creation and initialization failed: (pymysql.err.OperationalError) (2003, "Can&#39;t connect to MySQL server on &#39;192.168.110.204\\r&#39; ([Errno -2] Name or service not known)") (Background on this error at: https://sqlalche.me/e/14/e3q8) (venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ cat -A /home/gapinyc/superset/.env # M-fM-^UM-0M-fM-^MM-.M-eM-:M-^SM-hM-?M-^^M-fM-^NM-%$ DB_HOST=192.168.110.204$ DB_PORT=3306$ DB_USER=superset$ DB_PASSWORD=password$ DB_NAME=superset$ $ # Superset M-iM-^EM-^MM-gM-=M-.$ SECRET_KEY=kNeeshADyj2cmejJjWqJf590qM2Hkf6LrspLE8dgzatDmogrn4a8SAma(venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ cat -A /home/gapinyc/superset/superset_config.pycat -A /home/gapinyc/superset/superset_config.py | grep -A5 -B5 &#39;\^M&#39; import os^M$ ^M$ # -------------------------------^M$ # M-fM-^UM-0M-fM-^MM-.M-eM-:M-^SM-iM-^EM-^MM-gM-=M-.M-oM-<M-^ZM-dM-=M-?M-gM-^TM-( MySQL^M$ # -------------------------------^M$ SQLALCHEMY_DATABASE_URI = (^M$ f"mysql+pymysql://{os.getenv(&#39;MYSQL_USER&#39;)}:{os.getenv(&#39;MYSQL_PASSWORD&#39;)}"^M$ f"@{os.getenv(&#39;MYSQL_HOST&#39;)}/{os.getenv(&#39;DATABASE&#39;)}?charset=utf8mb4"^M$ )^M$ ^M$ print("SQLALCHEMY_DATABASE_URI:", SQLALCHEMY_DATABASE_URI)^M$ ^M$ # -------------------------------^M$ # Superset M-eM-^EM-^CM-fM-^UM-0M-fM-^MM-.M-fM-^UM-0M-fM-^MM-.M-eM-:M-^S URIM-oM-<M-^HM-eM-^MM-3M-hM-^GM-*M-hM-:M-+M-eM-^EM-^CM-fM-^UM-0M-fM-^MM-.M-eM--M-^XM-eM-^BM-(M-dM-=M-^MM-gM-=M-.M-oM-<M-^I^M$ # -------------------------------^M$ SQLALCHEMY_TRACK_MODIFICATIONS = False^M$ ^M$ # -------------------------------^M$ # Flask App Builder M-iM-^EM-^MM-gM-=M-.^M$ # -------------------------------^M$ SECRET_KEY = os.getenv(&#39;SUPERSET_SECRET_KEY&#39;, &#39;your-super-secret-key-change-in-prod&#39;)^M$ ^M$ # -------------------------------^M$ # M-iM-^BM-.M-dM-;M-6M-iM-^EM-^MM-gM-=M-.M-oM-<M-^HM-eM-^OM-/M-iM-^@M-^IM-oM-<M-^I^M$ # -------------------------------^M$ # M-eM-&M-^BM-fM-^^M-^\M-iM-^\M-^@M-hM-&M-^AM-eM-^OM-^QM-iM-^BM-.M-dM-;M-6M-eM-^QM-^JM-hM--M-&M-gM--M-^IM-eM-^JM-^_M-hM-^CM-=M-oM-<M-^LM-hM-/M-7M-iM-^EM-^MM-gM-=M-.^M$ # MAIL_SERVER = &#39;localhost&#39;^M$ # MAIL_USE_TLS = False^M$ # MAIL_USERNAME = &#39;no-reply@example.com&#39;^M$ # MAIL_PASSWORD = &#39;&#39;^M$ # MAIL_DEFAULT_SENDER = &#39;no-reply@example.com&#39;^M$ ^M$ # -------------------------------^M$ # M-iM-^]M-^YM-fM-^@M-^AM-hM-5M-^DM-fM-:M-^PM-eM-^RM-^LM-gM-<M-^SM-eM--M-^XM-oM-<M-^HM-eM-^OM-/M-iM-^@M-^IM-dM-<M-^XM-eM-^LM-^VM-oM-<M-^I^M$ # -------------------------------^M$ # DATA_CACHE_CONFIG = {^M$ # &#39;CACHE_TYPE&#39;: &#39;simple&#39;^M$ # } (venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ sed -i &#39;s/\r$//&#39; /home/gapinyc/superset/superset_config.py (venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ cat -A /home/gapinyc/superset/superset_config.py | grep -A5 -B5 &#39;\^M&#39; import os$ $ # -------------------------------$ # M-fM-^UM-0M-fM-^MM-.M-eM-:M-^SM-iM-^EM-^MM-gM-=M-.M-oM-<M-^ZM-dM-=M-?M-gM-^TM-( MySQL$ # -------------------------------$ SQLALCHEMY_DATABASE_URI = ($ f"mysql+pymysql://{os.getenv(&#39;MYSQL_USER&#39;)}:{os.getenv(&#39;MYSQL_PASSWORD&#39;)}"$ f"@{os.getenv(&#39;MYSQL_HOST&#39;)}/{os.getenv(&#39;DATABASE&#39;)}?charset=utf8mb4"$ )$ $ print("SQLALCHEMY_DATABASE_URI:", SQLALCHEMY_DATABASE_URI)$ $ # -------------------------------$ # Superset M-eM-^EM-^CM-fM-^UM-0M-fM-^MM-.M-fM-^UM-0M-fM-^MM-.M-eM-:M-^S URIM-oM-<M-^HM-eM-^MM-3M-hM-^GM-*M-hM-:M-+M-eM-^EM-^CM-fM-^UM-0M-fM-^MM-.M-eM--M-^XM-eM-^BM-(M-dM-=M-^MM-gM-=M-.M-oM-<M-^I$ # -------------------------------$ SQLALCHEMY_TRACK_MODIFICATIONS = False$ $ # -------------------------------$ # Flask App Builder M-iM-^EM-^MM-gM-=M-.$ # -------------------------------$ SECRET_KEY = os.getenv(&#39;SUPERSET_SECRET_KEY&#39;, &#39;your-super-secret-key-change-in-prod&#39;)$ $ # -------------------------------$ # M-iM-^BM-.M-dM-;M-6M-iM-^EM-^MM-gM-=M-.M-oM-<M-^HM-eM-^OM-/M-iM-^@M-^IM-oM-<M-^I$ # -------------------------------$ # M-eM-&M-^BM-fM-^^M-^\M-iM-^\M-^@M-hM-&M-^AM-eM-^OM-^QM-iM-^BM-.M-dM-;M-6M-eM-^QM-^JM-hM--M-&M-gM--M-^IM-eM-^JM-^_M-hM-^CM-=M-oM-<M-^LM-hM-/M-7M-iM-^EM-^MM-gM-=M-.$ # MAIL_SERVER = &#39;localhost&#39;$ # MAIL_USE_TLS = False$ # MAIL_USERNAME = &#39;no-reply@example.com&#39;$ # MAIL_PASSWORD = &#39;&#39;$ # MAIL_DEFAULT_SENDER = &#39;no-reply@example.com&#39;$ (venv) gapinyc@DESKTOP-9QS7RL5:~/superset$ flask shell ?charset=utf8mb4ASE_URI: mysql+pymysql://superset_user Loaded your LOCAL configuration at [/home/gapinyc/superset/superset_config.py] 2025-10-25 07:18:41,114:DEBUG:superset.utils.logging_configurator:logging was configured successfully 2025-10-25 07:18:41,120:DEBUG:root:Configured event logger of type <class &#39;superset.utils.log.DBEventLogger&#39;> 2025-10-25 07:18:41,122:INFO:superset.initialization:Setting database isolation level to READ COMMITTED 2025-10-25 07:18:41,123:ERROR:flask_appbuilder.security.sqla.manager:DB Creation and initialization failed: (pymysql.err.OperationalError) (2003, "Can&#39;t connect to MySQL server on &#39;192.168.110.204\\r&#39; ([Errno -2] Name or service not known)") (Background on this error at: https://sqlalche.me/e/14/e3q8) (venv) gapinyc@DESKTOP-9QS7RL5:~/superset$
最新发布
10-26
你仍然遇到错误: ``` Can&#39;t connect to MySQL server on &#39;192.168.110.204\\r&#39; ``` 但最关键的是,从你的 `cat -A` 输出来看,**`.env` 文件已经被严重损坏或编码错误**! --- ## 🔥 问题定位:`.env` 文件是乱码 看这一行: ```bash # M-fM-^UM-0M-fM-^MM-.M-eM-:M-^SM-hM-?M-^^M-fM-^NM-%$ ``` 这是典型的 **UTF-8 字节序列被错误解释为 Latin-1 或其他单字节编码的结果**。 说明:你用一个不支持中文/特殊字符的编辑器打开了文件,或者复制粘贴时引入了编码污染。 更糟糕的是: ```bash SECRET_KEY=kNeeshADyj2cmejJjWqJf590qM2Hkf6LrspLE8dgzatDmogrn4a8SAma(venv) ``` → `SECRET_KEY` 值末尾直接拼上了 `(venv)`,说明你在终端里复制粘贴时不小心把命令提示符也粘进去了! --- ## ✅ 终极解决方案:重建 `.env` 文件 ### ✅ 步骤 1:删除并重建 `.env` ```bash rm /home/gapinyc/superset/.env nano /home/gapinyc/superset/.env ``` 输入以下内容(请根据实际情况修改): ```env # Database Settings MYSQL_USER=superset_user MYSQL_PASSWORD=your_real_password MYSQL_HOST=192.168.110.204:3306 DATABASE=superset # Superset Settings SUPERSET_SECRET_KEY=your-super-secret-key-change-in-prod ``` 📌 注意事项: - 不要加引号; - 使用纯英文注释(避免中文导致编码问题); - 每个值都不能有空格或换行残留; - `MYSQL_HOST` 包含端口没问题; - `SUPERSET_SECRET_KEY` 要足够长且随机(用于加密 session); 保存退出:`Ctrl+O` → 回车 → `Ctrl+X` --- ### ✅ 步骤 2:确保 `superset_config.py` 正确加载环境变量 确保文件开头有显式加载 `.env` 的代码: ```python import os from dotenv import load_dotenv # 显式加载 .env basedir = os.path.abspath(os.path.dirname(__file__)) load_dotenv(os.path.join(basedir, &#39;.env&#39;)) # 构建数据库 URI SQLALCHEMY_DATABASE_URI = ( f"mysql+pymysql://{os.getenv(&#39;MYSQL_USER&#39;)}:{os.getenv(&#39;MYSQL_PASSWORD&#39;)}" f"@{os.getenv(&#39;MYSQL_HOST&#39;)}/{os.getenv(&#39;DATABASE&#39;)}?charset=utf8mb4" ) print("SQLALCHEMY_DATABASE_URI:", repr(SQLALCHEMY_DATABASE_URI)) ``` > ⚠️ `repr()` 可以显示字符串中的隐藏字符如 `\r`, `\n` --- ### ✅ 步骤 3:验证 `.env` 是否正常加载 运行测试脚本: ```bash python -c " import os from dotenv import load_dotenv load_dotenv(&#39;.env&#39;) print(&#39;MYSQL_USER:&#39;, repr(os.getenv(&#39;MYSQL_USER&#39;))) print(&#39;MYSQL_PASSWORD:&#39;, repr(os.getenv(&#39;MYSQL_PASSWORD&#39;))) print(&#39;MYSQL_HOST:&#39;, repr(os.getenv(&#39;MYSQL_HOST&#39;))) print(&#39;DATABASE:&#39;, repr(os.getenv(&#39;DATABASE&#39;))) " ``` ✅ 正确输出应类似: ``` MYSQL_USER: &#39;superset_user&#39; MYSQL_PASSWORD: &#39;your_real_password&#39; MYSQL_HOST: &#39;192.168.110.204:3306&#39; DATABASE: &#39;superset&#39; ``` ❌ 如果出现 `&#39;192.168.110.204:3306\r&#39;` 或乱码,则说明文件仍有问题。 --- ### ✅ 步骤 4:再次运行 `flask shell` ```bash flask shell ``` 观察输出中是否还有 `\\r` 或乱码。 你应该会看到类似: ``` SQLALCHEMY_DATABASE_URI: &#39;mysql+pymysql://superset_user:your_real_password@192.168.110.204:3306/superset?charset=utf8mb4&#39; ``` 如果没有报错,并进入 Python shell,说明成功! --- ### ✅ 补充:防止未来出错的最佳实践 | 措施 | 方法 | |------|------| | **使用 LF 换行** | 在 VS Code 左下角确认是 `LF` 而非 `CRLF` | | **禁用富文本粘贴** | 不要从 Word、微信、网页直接复制内容到 `.env` | | **设置文件权限** | `chmod 600 .env` 防止泄露 | | **加入 .gitignore** | 确保 `.env` 不提交到 Git | | **使用 pre-commit 检查** | 自动检测 CRLF 和敏感词 | --- ### ✅ 最终检查清单 | 检查项 | 命令 | |--------|------| | 查看 .env 内容是否干净 | `cat -A .env` (不能有 `^M` 或乱码) | | 测试环境变量加载 | `python -c "import os; from dotenv import load_dotenv; load_dotenv(); print(repr(os.getenv(&#39;MYSQL_HOST&#39;)))"` | | 确认网络连通性 | `nc -zv 192.168.110.204 3306` | | 检查 MySQL 用户权限 | 登录 MySQL 执行 `SELECT User, Host FROM mysql.user WHERE User=&#39;superset_user&#39;;` | ---
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值