#热失控预警:PCA异常指数 import pandas as pd import numpy as np from scipy.signal import savgol_filter from sklearn.preprocessing import RobustScaler from sklearn.decomposition import PCA #筛选特征 def makedataset1(df_data): df_data=df_data.drop(['Unnamed: 0','总电流[A]','GSM信号','外电压','SOH[%]','开关状态','充电状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore') df_data=df_data.drop(["单体温度"+str(i) for i in range(1,5)],axis=1,errors='ignore') df_data=df_data.drop(["其他温度"+str(i) for i in range(1,7)],axis=1,errors='ignore') listV=[s for s in list(df_data) if '单体电压' in s] for i in range(1,len(listV)+1): df_data=df_data[(df_data['单体电压'+str(i)]>2200) & (df_data['单体电压'+str(i)]<4800)] df_data=df_data[df_data['SOC[%]']>20] df_data['时间']=[df_data.loc[i,'时间戳'][0:15] for i in df_data.index] data_set=df_data.groupby('时间').mean() for k in data_set.columns: data_set[k]=savgol_filter(data_set[k],3,2) return data_set #新建统计特征 def makedataset2(df_data): data_set=makedataset1(df_data) listV=[s for s in list(df_data) if '单体电压' in s] data_set["最低单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].min(axis=1) data_set["最高单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].max(axis=1) data_set["平均单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].mean(axis=1) data_set["最大单体压差"]=[data_set.loc[k,"最高单体电压"]-data_set.loc[k,"最低单体电压"] for k in data_set.index] data_set["低压差"]=[data_set.loc[k,"平均单体电压"]-data_set.loc[k,"最低单体电压"] for k in data_set.index] data_set=data_set.drop(["单体电压"+str(i) for i in range(1,len(listV)+1)],axis=1) return data_set #标准化 def process(data_set): features=data_set.columns sX=RobustScaler(copy=True) data_set2=data_set.copy() data_set2.loc[:,features]=sX.fit_transform(data_set2[features]) return data_set2 #异常指数函数 def anomalyScores(originalDF,reducedDF): loss=np.sum((np.array(originalDF)-np.array(reducedDF))**2,axis=1) loss=pd.Series(data=loss,index=originalDF.index) loss=(loss-np.min(loss))/(np.max(loss)-np.min(loss)) return loss #建立PCA模型 def anomalyPCA(x_train_pro): n_components=4 whiten=True random_state=2 pca=PCA(n_components=n_components,whiten=whiten,random_state=random_state) pca.fit(x_train_pro) return pca #判断PCA异常指数 def transform(df_data_pro,model,df_data): #降维 X_train=model.transform(df_data_pro) X_train=pd.DataFrame(data=X_train,index=df_data_pro.index) #还原 X_train_inverse=model.inverse_transform(X_train) X_train_inverse=pd.DataFrame(data=X_train_inverse,index=df_data_pro.index) #异常指数 anomalyScoresModel=anomalyScores(df_data_pro,X_train_inverse) anomalyScoresModel=savgol_filter(anomalyScoresModel,15,3) df_data2=df_data.copy() df_data2['anomalyScores_'+str(model)]=anomalyScoresModel return df_data2 #判断离群 def detect_outliers(data,pred,threshold=3): anomaly=data['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)'] anomalypred=pred['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)'] mean_d=np.mean(anomaly.values) std_d=np.std(anomaly.values) max_score=np.max(anomaly.values) outliers2=pd.DataFrame() for k in anomalypred.index: z_score= (anomalypred[k]-mean_d)/std_d if (np.abs(z_score) >threshold) & (anomalypred[k]>max_score): outliers2=outliers2.append(pred[anomalypred.values==anomalypred[k]]) return outliers2 #训练模型 def train_model(data_train): x_train1=makedataset1(data_train) x_train2=makedataset2(data_train) x_train_pro1=process(x_train1) x_train_pro2=process(x_train2) pca1=anomalyPCA(x_train_pro1) pca2=anomalyPCA(x_train_pro2) res1=transform(x_train_pro1,pca1,x_train1) res2=transform(x_train_pro2,pca2,x_train2) return pca1,pca2,res1,res2 #预测 def prediction(data_test,pca1,pca2): x_test1=makedataset1(data_test) x_test2=makedataset2(data_test) x_test_pro1=process(x_test1) x_test_pro2=process(x_test2) pred1=transform(x_test_pro1,pca1,x_test1) pred2=transform(x_test_pro2,pca2,x_test2) return pred1,pred2 #判定异常 def check_anomaly(outliers1,outliers2): if (len(outliers1)>0) & (len(outliers2)>0): outliers=pd.merge(outliers1,outliers2,on='时间') outliers=outliers[outliers['SOC[%]_x']>45] outliers=outliers.drop(['总电压[V]_y','单体压差_y','SOC[%]_y'],axis=1) return outliers