import datetime
import time
while(1):#无限循环实际作用有
now = datetime.datetime.now()
min = int(str(now).split(".")[0][-5:-3])
sec = int(str(now).split(".")[0][-2:])
if((min in (1,11,21,31,41,51)) & (sec in (5,5))):# 设定为每十分钟运行一笔
try:
# # 导入自定义函数
import function.MyFunction as rm
# 导入运行环境
import clr
import sys
sys.path.append(r'C:\Program Files (x86)\PIPC\AF\PublicAssemblies\4.0')
clr.AddReference('OSIsoft.AFSDK')
from OSIsoft.AF import *
from OSIsoft.AF.PI import *
from OSIsoft.AF.Asset import *
from OSIsoft.AF.Data import *
from OSIsoft.AF.Time import *
from OSIsoft.AF.UnitsOfMeasure import *
#忽略警告
import warnings
warnings.filterwarnings("ignore")
# 导入封装的库
from IPython.display import display_html#这条语句的实际作用待验证
import pandas as pd
# pd.set_option('display.max_rows', None) # 展示所有行
pd.set_option('display.max_columns', None) # 显示所有列
pd.options.display.float_format = '{:.10f}'.format# 不要显示科学计数法
import numpy as np
import math
import datetime
import time
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib
zhfont1 = matplotlib.font_manager.FontProperties(fname='C:\Windows\Fonts\YuGothB.ttc')#正常显示中文
font = {'family' : 'Microsoft YaHei','weight' : 'bold'}
matplotlib.rc("font",**font)
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error
from sklearn.decomposition import PCA
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from IPython.display import clear_output
from sklearn.feature_selection import RFE
################################################################资料读取
# ptag
ptag = [
'PTA6-CTA-ML.DACA.PV',
'PTA6-CTA-FIC_50301.PIDA.PV',
'PTA6-CTA-FIC_50153.PIDA.PV',
'PTA6-CTA-FIC_441A08.PIDA.PV',
'PTA6-CTA-FIC_441B08.PIDA.PV',
'PTA6-CTA-FIC_441C08.PIDA.PV',
'PTA6-CTA-FIC_441D08.PIDA.PV',
'PTA6-CTA-FIC_441E08.PIDA.PV',
'PTA6-CTA-FIC_441F08.PIDA.PV',
'PTA6-CTA-FIC_50803.PIDA.PV',
'PTA6-CTA-FIC_57005.PIDA.PV',
'PTA6-CTA-FRIC_54602.PIDA.PV',
'PTA6-CTA-FRIC_20705.PIDA.PV',
'PTA6-CTA-PI_441A04.DACA.PV',
'PTA6-CTA-PI_441B04.DACA.PV',
'PTA6-CTA-PI_441C04.DACA.PV',
'PTA6-CTA-PI_441D04.DACA.PV',
'PTA6-CTA-PI_441E04.DACA.PV',
'PTA6-CTA-PI_441F04.DACA.PV',
'PTA6-CTA-FIC_38502.PIDA.PV',
'PTA6_CTA_FIC_54602A.PV',
'PTA6-CTA-FIC_21504.PIDA.PV'
]
ptagname=[
'母液總量',
'母液Purge量',
'C5016淋洗水流量',
'RPFA',
'RPFB',
'RPFC',
'RPFD',
'RPFE',
'RPFF',
'V-5076至母液缓存槽流量',
'V-5706/V-5726回用量',
'觸媒比',
'溶劑比',
'A臺壓力',
'B臺壓力',
'C臺壓力',
'D臺壓力',
'E臺壓力',
'F臺壓力',
'C3856中层淋洗量',
'触媒流量',
'PX'
]
# qctag
qctag = ['PTA6-CTA-V2096_Co.QC']
qctagname = ['y']
# time set
span = AFTimeSpan.Parse("10m")
now = datetime.datetime.now()
# start='2025-02-14 17:00:00'
# end='2025-02-16 17:00:00'
start = str((now+datetime.timedelta(days=-3))).split(".")[0][:-4] + "0:00"
end = str(now).split('.')[0][:-4]+'0:00'#强制将分钟各位数变成0,设置排程整分执行00,10,20......
print('数据时间段:{0}~{1}'.format(start,end))
# pdata
piServer = PIServers()["LBRTPMSPI"]
pdata = rm.GetAverageData(ptag,ptagname,start,end,span,piServer)
pdata['time'] = pdata['time'].shift(-1)
pdata.loc[len(pdata)-1,'time']=end
pdata['time']= pd.to_datetime(pdata['time'])
pdata.iloc[:,1:] = pdata.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
# 变量计算并drop
pdata.loc[pdata['A臺壓力']<4,'RPFA'] = 0
pdata.loc[pdata['B臺壓力']<4,'RPFB'] = 0
pdata.loc[pdata['C臺壓力']<4,'RPFC'] = 0
pdata.loc[pdata['D臺壓力']<4,'RPFD'] = 0
pdata.loc[pdata['E臺壓力']<4,'RPFE'] = 0
pdata.loc[pdata['F臺壓力']<4,'RPFF'] = 0
pdata['RPFLL']=pdata['RPFA']+pdata['RPFB']+pdata['RPFC']+pdata['RPFD']+pdata['RPFE']+pdata['RPFF']
pdata['觸媒比']=pdata['触媒流量']
# QC data
piServer = PIServers()["LBRTPMSPI"]
qcdata = rm.getQCdata(qctag,qctagname,start,end,piServer) # 設定時間段內沒有qc值程式會運行異常
# 合併p & qc数据
result = pd.merge(pdata,qcdata,on = 'time',how = 'outer').sort_values(by = 'time')
result.drop(result.head(1).index,inplace=True) #合併后容易導致第一行自變量為空會影響後續運行
result.reset_index(drop=True, inplace=True)
result.iloc[:,1:] = result.iloc[:,1:].apply(pd.to_numeric, errors='coerce')
# 提取后续建模需要变量
result2=result[['time','母液總量','母液Purge量','C5016淋洗水流量','RPFLL','V-5076至母液缓存槽流量',
'V-5706/V-5726回用量','觸媒比','溶劑比','C3856中层淋洗量','y']]
################################################################删除异常并线性回填
# 先删除异常再用线性进行填充,可能会导致异常时间段预测偏离,但本身有异常的时候预测就不可以参考
# 保留最后一行避免被删除
temp=result2.loc[[(len(result)-1)]]
temp1 = result2.drop(result.index[-1])
collist=list(temp1.columns.drop(['time','y']))
# 箱線圖刪除異常
for col in collist:
Q1 = temp1[col].quantile(q = 0.25)
Q3 = temp1[col].quantile(q = 0.75)
low_whisker = Q1 - 3*(Q3 - Q1)
up_whisker = Q3 + 3*(Q3 - Q1)
#print(temp2.isnull().sum().sum())
temp1.loc[temp1[(temp1[col] > up_whisker) | (temp1[col] < low_whisker)].index,col]=None
# 先回填
temp1.interpolate(method='linear',inplace=True)
# 将最后一行合并回去
result3=pd.concat([temp1,temp],axis=0,ignore_index=True)
result3.reset_index(drop=True, inplace=True)
# # 对x进行线性回填,然后合并x和y
# y_dt=result3[['time','y']]
# x_dt=result3.drop(columns=['y'], inplace=False)
# x_dt.interpolate(method='linear',inplace=True)
# result4 = pd.merge(x_dt, y_dt, on='time', how='left')
# result4.reset_index(drop=True, inplace=True)
result4=result3.copy(deep=True)
################################################################平滑
result5=result4.copy(deep=True)
collist=list(result5.columns.drop(['time','y']))
result5.rename(index=result5['time'],inplace=True)
Fig = plt.figure(figsize=(60,8),facecolor='white')
# 选择时间平滑还是卡尔曼平滑
for idx,col in enumerate(collist):
new_col_name = col
#时间平滑
result5[new_col_name] = result5[col].rolling(window = '30min').mean()
result5[col] = result5[col].rolling(window = '30min').mean()
#卡尔曼平滑
# kalman_index = list(result5[[col]].dropna().index)
# kalman = rm.Kalman1D(list(result5.loc[kalman_index,col]),p = 0.01)
# result5.loc[kalman_index,new_col_name] = kalman
result5.reset_index(drop=True, inplace=True)
################################################################################## 位移及繪圖
# 確認最佳位移時間
temp=result5.copy(deep=True)
tagname_x={'母液總量':[0,30,-1],'母液Purge量':[40,70,-1],'C5016淋洗水流量':[40,70,-1],'RPFLL':[40,70,-1],
'V-5076至母液缓存槽流量':[0,30,-1],'V-5706/V-5726回用量':[40,70,1],'觸媒比':[0,30,1],'溶劑比':[0,30,-1],
'C3856中层淋洗量':[0,200,2]}
shift_list=[]
x_list=[]
corr_list=[]
for x,ran in tagname_x.items():
col_shift_corr = rm.BestShiftTime(data=temp,x=x,y='y',rule=ran[2],start=ran[0],end=ran[1])
shift_max=col_shift_corr.loc[0]['shift']
corr_max=col_shift_corr.loc[0]['corr']
shift_list.append(shift_max)
x_list.append(x)
corr_list.append(corr_max)
x_shift = pd.DataFrame({ 'x': x_list, 'shift': shift_list, 'corr':corr_list})
# 依最佳位移時間進行位移
result6=result5.copy(deep=True)
for i,j ,q in zip(x_list,shift_list,corr_list):
k=int(j/10)
result6[i] = result6[i].shift(k)
# data3.dropna(inplace=True)
result6.drop(result6.head(28).index,inplace=True)
result6.reset_index(drop=True, inplace=True)
# 複製
result7 = result6.copy(deep=True)
x_shift = x_shift.set_index('x')
################################################################################## 保留那些相關係數和理論方向一致的變量用於建模
keep_x=[]
corr_dict = {'母液總量':1,'C5016淋洗水流量':-1,'RPFLL':-1,'V-5076至母液缓存槽流量':1,'r3017yw':-1,'觸媒比':1,'溶劑比':1}
for i in ['母液總量','C5016淋洗水流量','RPFLL','V-5076至母液缓存槽流量','觸媒比','溶劑比']:
if corr_dict[i] == -1 and x_shift.loc[i]['corr'] < 0:
keep_x.append(i)
elif corr_dict[i] == 1 and x_shift.loc[i]['corr'] > 0:
keep_x.append(i)
print('用於建模的變量為',keep_x)
################################################################数据归一化
pick_col=['母液總量','母液Purge量','C5016淋洗水流量','RPFLL','V-5076至母液缓存槽流量',
'V-5706/V-5726回用量','觸媒比','溶劑比','C3856中层淋洗量']#'feedmix_k',
scale = StandardScaler()#MinMaxScaler()
result7_std = pd.DataFrame(scale.fit_transform(result7[pick_col]), columns=pick_col)
result7_std .reset_index(drop=True, inplace=True)
result7_std['time']=result7['time']
result7_std['y']=result7['y']
# 考虑使用归一化前变量建模(結合模組表現考慮是否使用平滑數據),用归一化后变量进行权重分析
# result8=result7[keep_x]
# result8['time']=result7['time']
# result8['y']=result7['y']
result8=result7_std.copy(deep=True)
################################################################划分train和test
# test
xtest=result8.loc[(len(result8)-1):].drop(['time','y'],axis=1)#,'sp'
xtest.reset_index(drop=True, inplace=True)
ytest=result8.loc[(len(result8)-1):][['y']]
ytest.reset_index(drop=True, inplace=True)
time_test=str(result8.loc[(len(result8)-1):]['time'][len(result8)-1])
# train
result8.dropna(axis = 0,inplace=True)
result8.reset_index(drop=True, inplace=True)
# xtrain=result8.loc[0:(len(result8)-2)].drop(['time','y'],axis=1)#,'sp'
# ytrain=result8.loc[0:(len(result8)-2)][['y']]
xtrain=result8.drop(['time','y'],axis=1)#,'sp'
ytrain=result8[['y']]
# #####################################根据相关系数筛选特征值
# colCH=[]
# dataChose = result8.corr().loc['y'].to_frame().T.drop('y', axis=1)#只保留其他参数对y的相关系数
# for index, row in dataChose.iteritems():#将所有相关系数转为正数
# row[0] = abs(row[0])
# dataChose1 = dataChose
# for index, row in dataChose.iteritems():#只保留相关系数0.3以上的参数
# if(row[0])<0.4:
# dataChose = dataChose.drop(index, axis=1)
# if dataChose.empty or len(dataChose.columns)<4:#若没有相关系数0.3以上的参数,则取相关系数最大的三个数
# # dataChose=dataChose1.sort_values(by='y',axis=1,ascending=False).iloc[:,0:3]
# ##############selectKbest选择最佳特征子集
# from sklearn.feature_selection import SelectKBest
# from sklearn.feature_selection import f_regression
# ########划分XY
# X=result8.loc[0:(len(result8)-2)].drop(['time','y'],axis=1)
# X.reset_index(drop=True, inplace=True)
# Y=result8.loc[0:(len(result8)-2)][['y']]
# Y.reset_index(drop=True, inplace=True)
# ########调用selectbest,保留三个特征值
# selector = SelectKBest(score_func=f_regression,k=4)
# X_new = selector.fit_transform(X, Y)
# GetSupport= selector.get_support(True)
# dataChose = X.iloc[:,GetSupport] # 取出保留的三个特征值
# for index, row in dataChose.iteritems():#将保留的参数名字写到数组
# colCH.append(index)
# print(colCH)
# result8=result7[colCH]
# result8['y']=result7['y']
# result8['time']=result7['time']
################################################################建模,2種建模方式選擇
# 通过for循环结合RFE回归特征选择方式
# 遍历所有特征子集后根据在测试集上MAE最小原则产生最终的最佳子集用于建模预测
X=xtrain.copy(deep=True)
Y=ytrain.copy(deep=True)
trainMAE_best=100
trainscore_list,trainMAE_list,num_of_features_list=[],[],[]
for n in np.arange(1,5):
model = LinearRegression()
rfe_model = RFE(estimator=model,n_features_to_select=n)
xtrain_rfe = rfe_model.fit_transform(X,Y)
xtest_rfe = rfe_model.transform(xtest)
model.fit(xtrain_rfe,Y)
trainscore = model.score(xtrain_rfe,Y)
trainMAE=mean_absolute_error(Y,model.predict(xtrain_rfe))
trainMAE=float(format(trainMAE, '.2f'))############################幾位小數需要根據模組特性自定義設置
testpred=model.predict(xtest_rfe)
trainMAE_list.append(trainMAE)
trainscore_list.append(trainscore)
num_of_features_list.append(n)
if(trainMAE<trainMAE_best):
trainMAE_best = trainMAE
trainscore_best=trainscore
testpred_best=testpred
ytrain['pre']=model.predict(xtrain_rfe)
num_of_features = n
pick_cols = list(X[xtrain.columns].iloc[:,rfe_model.support_].columns)
rfe_history = pd.DataFrame({ 'mae': trainMAE_list, 'r2': trainscore_list, 'features_num': num_of_features_list})
# print(rfe_history)
print(time_test)
print('最优的特征子集为:',pick_cols)
print("训练MAE为 %f" % trainMAE_best)
print("训练R²为 %f" % trainscore_best)
print("测试值:" ,testpred_best[0][0])
print('實際值:',ytest.loc[0,'y'])
####操作建议
adv_start = str(((datetime.datetime.strptime(end, "%Y-%m-%d %H:%M:%S"))+datetime.timedelta(minutes =-120)))
adv_end = end
adv_tag = [
'PTA6-CTA-V2096_CO_center.py',
'PTA6_CTA_FIC_54602A_6.SP',
'PTA6-CTA-FIC_21504.PIDA.PV',
'PTA6-CTA-FRIC_20705.PIDA.PV',
'PTA6-CTA-FI_21504A.DACA.PV',
'PTA6_CTA_FIC_54602A.PV',
'PTA6-CTA-E2096_Co_advise_L.py',
'PTA6-CTA-E2096_Co_advise_H.py'
]
adv_tagname = [
'中心值',
'SP值',
'出口流量',
'溶剂比',
'feedmix',
'PV值',
'Low',
'Hight'
]
span = AFTimeSpan.Parse("1m")
piServer = PIServers()["LBRTPMSPI"]
adv_data = rm.GetInstantaneousData(adv_tag,adv_tagname,adv_start,adv_end,span,piServer)
####去除空值并重置索引
adv_data.dropna(axis = 0,inplace=True)
adv_data.reset_index(drop=True, inplace=True)
####提取最新一行数据
temp=adv_data.iloc[-1:,:]
temp.reset_index(drop=True, inplace=True)
####计算点
pre = testpred_best[0][0]
center = temp.loc[0,'中心值']
sp = temp.loc[0,'SP值']
feedmix = temp.loc[0,'feedmix']
pv = adv_data['PV值'].mean()
Low = temp.loc[0,'Low']
Hight = temp.loc[0,'Hight']
####建议值计算
# advise = sp + (pre - center) * feedmix /0.05/1000*0.06 / 2.5
advise = sp + (center - pre) * feedmix /0.05/1000*0.06 / 2.5
####单次调整量限制
DanCi = 0.5
if( advise - sp > DanCi ):
advise = sp + DanCi
elif( sp - advise > DanCi ):
advise = sp - DanCi
####上下限限制
if(advise < Low):
advise = Low
elif(advise > Hight):
advise = Hight
# model = LinearRegression()
# model.fit(X,Y)
# trainscore = model.score(X,Y)
# trainMAE=mean_absolute_error(Y,model.predict(X))
# trainMAE=float(format(trainMAE, '.0f'))
# testpred=model.predict(xtest)
# ytrain['pre']=model.predict(X)
# print('預測時間點',time_test)
# print("训练MAE为 %f" % trainMAE)
# print("训练R²为 %f" % trainscore)
# print("测试值:" ,testpred[0][0])
# print('實際值:',ytest.loc[0,'y'])
print('-------------------------------------------------------------------------------------')
################################################################輸出各個特征權重百分比
# 建模
temp=result7_std[0:len(result7_std)-1]
temp.dropna(axis = 0,inplace=True)
lr = LinearRegression()
lr.fit(temp[['母液總量','母液Purge量','C5016淋洗水流量','RPFLL','V-5076至母液缓存槽流量',
'V-5706/V-5726回用量','觸媒比','溶劑比']],temp['y'])
coef = pd.DataFrame(list(lr.coef_),columns=['係數'])
coef['name']=['母液總量','母液Purge量','C5016淋洗水流量','RPFLL','V-5076至母液缓存槽流量',
'V-5706/V-5726回用量','觸媒比','溶劑比']
coef['係數abs'] = abs(coef['係數'])
coef['係數abs百分比'] = (coef['係數abs']/(coef['係數abs']).sum())*100
coef = coef.sort_values(by = '係數abs',ascending=False)
coef = coef.reset_index(drop=False)
# 生成前5排名信息
coef['係數abs百分比']=coef['係數abs百分比'].astype(float)
num1=coef.loc[0,'name']+str(' : ')+str(round(coef.loc[0,'係數abs百分比'],1))+str('%')
num2=coef.loc[1,'name']+str(' : ')+str(round(coef.loc[1,'係數abs百分比'],1))+str('%')
num3=coef.loc[2,'name']+str(' : ')+str(round(coef.loc[2,'係數abs百分比'],1))+str('%')
num4=coef.loc[3,'name']+str(' : ')+str(round(coef.loc[3,'係數abs百分比'],1))+str('%')
num5=coef.loc[4,'name']+str(' : ')+str(round(coef.loc[4,'係數abs百分比'],1))+str('%')
################################################################写入PI 系统
nowtag=[
'PTA6-CTA-E2096_Co_advise.py',
'PTA6-CTA-E2096_Co_pred.py',
'PTA6-CTA-E2096_Co_MAE.py',
'PTA6-CTA-E2096_Co_R2.py',
'PTA6-CTA-E2096_Co_num1.py',
'PTA6-CTA-E2096_Co_num2.py',
'PTA6-CTA-E2096_Co_num3.py',
'PTA6-CTA-E2096_Co_num4.py',
'PTA6-CTA-E2096_Co_num5.py',]
nowlist = [
advise,
testpred_best[0][0],
trainMAE_best,
trainscore_best,
num1,num2,num3,num4,num5 ]
piServer = PIServers()["LBRTPMSPI"];
writetime=str(end)#此处应该写入test的时间,如果end所在的样本为异常样本,因删除异常有可能导致test时间小于end时间,
for (i,j) in zip(nowtag,nowlist):
writept = PIPoint.FindPIPoint(piServer,i)
val = AFValue()
val.Timestamp = AFTime(writetime)
val.Value = str(j)
writept.UpdateValue(val, AFUpdateOption.InsertNoCompression, AFBufferOption.BufferIfPossible)
################################################################判断是否在【】内,满足写入4h预测值
hour_list=['01:00','05:00','09:00','13:00','17:00','21:00']
if writetime[11:16] in hour_list:
writept = PIPoint.FindPIPoint(piServer,'PTA6-CTA-E2096_Co_pred4H.py')
val = AFValue()
val.Timestamp = AFTime(writetime)
val.Value = str(testpred[0][0])
writept.UpdateValue(val, AFUpdateOption.InsertNoCompression, AFBufferOption.BufferIfPossible)
print('____________________________________________________________________')
except Exception as Argument:
print('!!!!!!!!!!!!!!!!!!!!!')
print(now)
print('出现异常',Argument)
else:
print('---------------------')
print('无异常出现')
这个程序我应该怎么优化内存的释放
最新发布