针对轴承、齿轮箱的机器学习

该代码段展示了使用Python进行机器学习的流程,包括加载数据集、预处理、训练RandomForestClassifier模型、计算精度、保存模型以及模型评估。预处理步骤涉及数据缩放和特征选择,模型训练后进行交叉验证以得到平均得分。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

import os
from nltk import accuracy
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

from data.RandimForestFunctions import train_model, save_model

if name == ‘main’:
# 获取当前脚本文件所在目录的绝对路径
current_dir = os.path.dirname(os.path.abspath(file))

# 构建数据集文件路径
dataset_path = os.path.join(current_dir, "data", "TRAISN.csv")

# 构建保存模型的文件路径
saved_model_path = os.path.join(current_dir, "train_model.m")

def main(dataset_path, saved_model_path):

    # 训练模型

    # model, score_r, mean_score, Xtest, Ytest = train_model(dataset_path)
    model, score_r, train_accuracy, class_accuracy, mean_score, Xtest, Ytest = train_model(dataset_path)
    # 模型保存
    save_model(model, saved_model_path)
    # 模型评估
    Ypred = model.predict(Xtest)
    accuracy = accuracy_score(Ytest, Ypred)
    #precision = precision_score(Ytest, Ypred)
    #recall = recall_score(Ytest, Ypred)
    #f1 = f1_score(Ytest, Ypred)

    # 打印训练和保存路径
    print("Random Forest Score: {}".format(score_r))
    print("Ridge Mean Score: {}".format(mean_score))
    print("Model saved at: {}".format(saved_model_path))
    print("Accuracy: {}".format(accuracy))
    print("Training Accuracy: {}".format(train_accuracy))
    print("Class Accuracy: {}".format(class_accuracy))
    print("Done!")
   # print("Precision: {}".format(precision))
   # print("Recall: {}".format(recall))
   # print("F1 Score: {}".format(f1))


# 调用主函数,传入数据集路径和保存模型路径

# 打印训练集和测试集精度
# 调用主函数,传入数据集路径和保存模型路径
main(dataset_path, saved_model_path)

封装.py
import numpy as np
from google.protobuf.internal import encoder
from pyts.preprocessing import StandardScaler, RobustScaler, MaxAbsScaler, PowerTransformer, QuantileTransformer, scaler
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.linear_model import Ridge
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder, LabelEncoder
import joblib
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import MinMaxScaler
import joblib
from sklearn.model_selection import train_test_split

#def get_datasets(dataset, way=None):

scaler = MinMaxScaler()

# 处理空字符串(“”),将其替换为NaN

dataset[dataset == “”] = np.nan

# 将NaN替换为0
#dataset = np.nan_to_num(dataset.astype(float))

normalized_data = scaler.fit_transform(dataset)

#return normalized_data

#def train_model(dataset_path):

root = dataset_path

#raw_datasets = np.genfromtxt(root, dtype=np.str, delimiter=",")[1:, 1:][:1000]
#datasets = get_datasets(raw_datasets[:, :-1])
#labels = [int(one_data[-1]) for one_data in raw_datasets]
#labels = [int(one_data[-1]) if one_data[-1] != '' else -1 for one_data in raw_datasets]

#Xtrain, Xtest, Ytrain, Ytest = train_test_split(datasets, labels, test_size=0.3)
#rfc = RandomForestClassifier(random_state=10)
#rfc = rfc.fit(Xtrain, Ytrain)
#score_r = rfc.score(Xtest, Ytest)

#return rfc, score_r

def get_datasets(dataset, way=None):
#scaler = StandardScaler()
#预处理方式
scaler = MinMaxScaler() #精度0.45

#scaler = RobustScaler() 精度0.31
#对数据进行缩放,使用中位数和四分位数而不是均值和方差。对于存在离群值的数据集,这种方法可以更好地保持数据的分布。

#scaler = MaxAbsScaler() 精度0.41
#对数据进行缩放,使其最大绝对值为1。它通过除以每个特征的最大绝对值来缩放数据。

#scaler = PowerTransformer(method='box-cox')  # 可选择使用Box-Cox变换或Yeo-Johnson变换;通过对数据进行幂变换,将数据映射到正态分布或近似正态分布。常见的幂变换方法包括Box-Cox变换和Yeo-Johnson变换。
#正数据

#scaler = QuantileTransformer(output_distribution='uniform')  # 可选择输出均匀分布或高斯分布;将数据映射到指定分位数的均匀分布上。可以使用这种方法来降低离群值的影响并使数据更加符合某种分布。
#精度0.31
#encoder = OneHotEncoder()
#encoded_data = encoder.fit_transform(dataset)#对于具有分类特征的数据集,可以使用One-Hot编码将其转换为二进制向量表示。每个类别都将转换为一个独立的二进制特征。

#encoder = LabelEncoder()
#encoded_labels = encoder.fit_transform(labels)#对于具有分类标签的数据集,可以使用LabelEncoder将标签编码为整数。

# 处理空字符串(""),将其替换为NaN
dataset[dataset == ""] = np.nan
# 将NaN替换为0
dataset = np.nan_to_num(dataset.astype(float))
normalized_data = scaler.fit_transform(dataset)
return normalized_data

def train_model(dataset_path):
root = dataset_path
#raw_datasets = np.genfromtxt(root, dtype=np.str, delimiter=“,”)[1:, 1:][:1000]
raw_datasets = np.genfromtxt(root, dtype=np.str, delimiter=“,”)[1:].reshape(-1, 1)
#raw_datasets = np.loadtxt(root,dtype=np.str,delimiter=“,”)[1:1:1].reshape
raw_datasets = np.loadtxt(root, dtype=np.str, delimiter=“,”)[1:, 1:][:1000]
datasets = get_datasets(raw_datasets[:, :-1])
#labels = [int(one_data[-1]) for one_data in raw_datasets]
labels = [int(one_data[-1]) if one_data[-1] != ‘’ else -1 for one_data in raw_datasets]
#如果标签是一个非空字符串,将其转换为整数类型,并作为标签值。
#如果标签是一个空字符串,将其转换为 - 1,并作为标签值。

# 特征选择添加之前精度0.45添加后精度0.41
selector = SelectKBest(score_func=f_classif, k=50)
datasets_selected = selector.fit_transform(datasets, labels)

Xtrain, Xtest, Ytrain, Ytest = train_test_split(datasets_selected, labels, test_size=0.3)
rfc = RandomForestClassifier(random_state=60)
rfc = rfc.fit(Xtrain, Ytrain)
score_r = rfc.score(Xtest, Ytest)


# 计算训练集精度
train_accuracy = rfc.score(Xtrain, Ytrain)

# 计算每个类别的精度
Ypred = rfc.predict(Xtest)
class_accuracy = accuracy_score(Ytest, Ypred)

# 使用岭回归进行正则化和交叉验证
ridge = Ridge(alpha=0.5)
cv_scores = cross_val_score(ridge, datasets, labels, cv=5)
mean_score = cv_scores.mean()

return rfc, score_r, train_accuracy, class_accuracy, mean_score, Xtest, Ytest

def save_model(model, save_path):
joblib.dump(model, save_path)

虽然很简单,但是经过检验和数据处理,对于标准数据集来说,精度80%上

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值