使用python建立心理韧性数学模型

文章展示了使用Pandas进行Excel数据读取、时间差转换、多列分组聚合以及数据框操作。还涉及了模型保存与加载、性能评估方法,如R方值和误差计算。此外,提到了混淆矩阵和ROC曲线在模型评估中的应用。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import pandas as pd
import numpy as np

col=['工号','部门','员工类别','刷卡时间','刷卡地点','刷卡Site','In/Out']
try:
	####非常关键,这里的格式
	data = pd.read_excel("../data/(PTMS)刷卡数据批量查询.xlsx",names=col)
except Exception as e:
    print(e)

data
时间差转换格式
def transform_hour(s):
    import pandas as pd
    ss=pd.Timedelta(str(s))##这里很重要
    ss.total_seconds()
    return ss.total_seconds()/3600

data['IN_OUT']=data['IN_OUT'].map(transform_hour)

data

pandas多列分组聚合

data.columns

data.groupby(["工号",'刷卡日期'])['IN_OUT'].max()

pandas赋值

for i in range(data_.shape[0]):
    
    if data_.index[i][1]==7:
        d1=format(float(mon[i])/Jul, '.4f')
        print("!!!!!!!!",data_.loc[:,'考勤系数'][i],data_['考勤系数'][i],float(mon[i]),d1)
        
        data_['考勤系数'][i]=np.float(d1)  ##这里很重要,np.float(),否则一直报错
   for i in range(data_.shape[0]):
    
    if data_.index[i][1]==7:
        d1=format(float(mon[i])/Jul, '.4f')
# print("!!!!!!!!",data_.loc[:,'考勤系数'][i],data_['考勤系数'][i],float(mon[i]),d1)
        
        data_['考勤系数'][i]=np.float(d1)
        
    elif data_.index[i][1]==8:
        data_.loc[:,'考勤系数'][i]=np.float(format(float(mon[i]/Aug), '.4f') )
        
    elif data_.index[i][1]==9:
        data_.loc[:,'考勤系数'][i]=np.float(format(float(mon[i]/Sep), '.4f') )
        
    elif data_.index[i][1]==10:
        data_.loc[:,'考勤系数'][i]=np.float(format(float(mon[i]/Oct), '.4f') )
        
    elif data_.index[i][1]==11:
        data_.loc[:,'考勤系数'][i]=np.float(format(float(mon[i]/Nov), '.4f') )
        
    elif data_.index[i][1]==12:
        data_.loc[:,'考勤系数'][i]=np.float(format(float(mon[i]/Dec), '.4f') )
data_

pandas转Dataframe

data_mon=data_mon.reset_index()##直接转换
data_mon

pandas:df.info()显示不全

pd.options.display.max_info_columns = 200  # 设置info中信息显示数量为200

# 或者是

pd.set_option("display.max_info_columns", 200)   # 设置info中信息显示数量为200

保存和调用模型

import sklearn.externals
import joblib

#保存模型
joblib.dump(forest, './model/others/train_model_forest.pkl')
# import pickle
# #标准化预测数据
# scaler = pickle.load(open('sc.pkl', 'rb'))
# allmatrix = scaler.transform(你的预测数据)
#加载预测模型并预测
d=joblib.load("./model/grade01/learner.pkl")
c=d.predict(x_test)

模型评估

def good_or_bad_model(y_test, y_test_pred):
    from sklearn.metrics import explained_variance_score, \
    mean_absolute_error, mean_squared_error, \
    median_absolute_error, r2_score
    
    print(f'均方根误差为": {np.sqrt(mean_squared_error(y_test, y_test_pred))} ')
    
    print('均方误差为:', mean_squared_error(y_test, y_test_pred))
    
    print(f'平均绝对误差为: {mean_absolute_error(y_test, y_test_pred)}')
    
    print(f'R方值为: { r2_score(y_test, y_test_pred)} ')
    
    print(f'中值绝对误差为": {explained_variance_score(y_test, y_test_pred)} ')
    
#     print('均方误差为:', mean_squared_error(y_test, y_test_pred))
    
#     print('平均绝对误差为:', mean_absolute_error(y_test, y_test_pred))
    
#     print('中值绝对误差为:', median_absolute_error(y_test, y_test_pred))
    
#     print('可解释方差值为:', explained_variance_score(y_test, y_test_pred))
    
#     print('R方值为:', r2_score(y_test, y_test_pred))

混淆矩阵、AUC曲线

from sklearn.metrics import roc_auc_score, auc, roc_curve, recall_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
import itertools

import gc
# plot confusion matrix
def plot_confusion_matrix(cm:np.array, classes:str, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    Input
    - cm : 计算出的混淆矩阵的值
    - classes : 混淆矩阵中每一行每一列对应的列
    - normalize : True:显示百分比, False:显示个数
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')
    print(cm)
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)
    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")
    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    
# plot roc
def plot_roc_curve(y_pred, y_pred_score):
    fpr, tpr, thresholds = roc_curve(y_pred, y_pred_score)
    roc_auc = auc(fpr, tpr)
    # Plot ROC
    plt.title('Receiver Operating Characteristic')
    plt.plot(fpr, tpr, 'b',label='AUC = %0.3f'% roc_auc)
    plt.legend(loc='lower right')
    plt.plot([0,1],[0,1],'r--')
    plt.xlim([-0.1,1.0])
    plt.ylim([-0.1,1.01])
    plt.ylabel('True Positive Rate')
    plt.xlabel('False Positive Rate')
    plt.show()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值