import pandas as pd import numpy as np import datetime from random import shuffle from sklearn.preprocessing import StandardScaler from keras.layers import Activation,Dense,Input from keras.layers.recurrent import GRU from keras.models import Model from keras.optimizers import adam_v2 from keras.layers import Dropout import random #### Process1 - Prediction - Model1+Model2 ### # Step1 Features # Model1 def features1(dataset2): dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','总电流[A]'],axis=1,errors='ignore') cellvolt_list = [s for s in list(dataset2) if '单体电压' in s] celltemp_name = [s for s in list(dataset2) if '温度' in s] dataset2=dataset2.drop(celltemp_name,axis=1) dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1) dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1) dataset2=dataset2.drop(cellvolt_list,axis=1) dataset2.reset_index(drop=True,inplace=True) return dataset2 # Model2 def features2(dataset2): dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','单体压差','总电压[V]'],axis=1,errors='ignore') cellvolt_list = [s for s in list(dataset2) if '单体电压' in s] celltemp_name = [s for s in list(dataset2) if '单体温度' in s] celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s] dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1) dataset2['temp_max']=dataset2[celltemp_name].max(axis=1) dataset2['temp_min']=dataset2[celltemp_name].min(axis=1) dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min'])) dataset2=dataset2.drop(celltemp_name,axis=1) dataset2.reset_index(drop=True,inplace=True) return dataset2 # Step2 Splits def split(df_bms_tot): df_bms_tot['split']=0 for k in range(1,len(df_bms_tot)): timek=df_bms_tot.loc[k,'时间戳'] timek1=df_bms_tot.loc[k-1,'时间戳'] timek=datetime.datetime.strptime(timek,'%Y-%m-%d %H:%M:%S') #type: datetime timek1=datetime.datetime.strptime(timek1,'%Y-%m-%d %H:%M:%S') deltatime=(timek-timek1).total_seconds() if (deltatime>600) | (df_bms_tot.loc[k,'sn']!=df_bms_tot.loc[k-1,'sn']): df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split']+1 else: df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split'] return df_bms_tot # Step3 MakeDataset: TimeSeries def makedataset(dataset): df_bms=pd.DataFrame() for split in list(set(dataset['split'])): set2=dataset[dataset['split']==split] set2.reset_index(drop=True,inplace=True) data_set=pd.DataFrame() start=set2.loc[0,'时间戳'] end=set2.loc[len(set2)-1,'时间戳'] data_set['Time']=pd.date_range(start=start, end=end, freq='S') #每分钟一条记录 data_set['Time']=list(map(lambda x:str(x),list(data_set['Time']))) dfbms=pd.merge(data_set,set2,left_on='Time',right_on='时间戳',how='left') dfbms=dfbms.fillna(method='ffill') dfbms=dfbms.fillna(method='bfill') dfbms=dfbms.drop(['时间戳'],axis=1) dfbms['Time']=list(map(lambda x:x[:18]+'0',list(dfbms['Time']))) dfbms.drop_duplicates(subset='Time',keep='last',inplace=True) df_bms=df_bms.append(dfbms) df_bms.reset_index(drop=True,inplace=True) return df_bms # Step4 Scaler def scaler_pred(df_bms,scaler): Xtest=df_bms.drop(['Time','sn','split'],axis=1) Xsc_colnames=list(Xtest.columns) Xtsc=scaler.transform(np.array(Xtest)) Xtsc=pd.DataFrame(Xtsc) Xtsc.columns=Xsc_colnames return Xtsc # Step5 MakeIndex def make_index(train): indextr=[] for i in list(set(train['split'])): tr=train[train['split'] == i].index.tolist() indextr.append(min(tr)) indextr=sorted(indextr) indextr.append(len(train)) return indextr # Step5 CreateWindows def create_win_pred(X2,Xtest,index,time_steps=12): conf=pd.DataFrame() a=[] for k in range(1,len(index)): dataset=X2[index[k-1]:index[k]] dataset=dataset.reset_index(drop=True) dataset2=Xtest[index[k-1]:index[k]] dataset2=dataset2.reset_index(drop=True) if len(dataset)>time_steps: dataX = [] win_step=[] for i in range(len(dataset)-time_steps): win_step.append(i) #v1 = np.array(dataset.iloc[i:(i+time_steps)],dtype='float32') v1 = dataset.iloc[i:(i+time_steps)].values dataX.append(v1) test=dataset2.iloc[:len(dataset)-time_steps] dataX2=np.array(dataX,dtype='float32') conf=conf.append(test) a.append(dataX2) if len(a)>0: aa=np.vstack(a) else: aa=[] conf.reset_index(drop=True,inplace=True) return aa,conf # Step6 Prediction def prediction(model,cc,conf,col): predict_dd = model.predict(cc) df_pred=pd.DataFrame(predict_dd) df_pred.columns=col df_pred2 = df_pred.idxmax(axis=1) conf['pred']=df_pred2 return conf # Step7 Output def makeres(res,end_time): df_res=pd.DataFrame(columns=['product_id','start_time','end_time','fault_class','update_time']) result_faults=res[res['pred']!='正常'] list_faults=list(set(list(result_faults['pred']))) for fault in list_faults: res_faults=result_faults[result_faults['pred']==fault] res_faults.reset_index(drop=True,inplace=True) update_time=str(res_faults.loc[len(res_faults)-1,'Time']) end=datetime.datetime.strptime(str(res_faults.loc[len(res_faults)-1,'Time']),'%Y-%m-%d %H:%M:%S') end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S') if (end_time-end).total_seconds()<900: res_faults.loc[len(res_faults)-1,'Time']='0000-00-00 00:00:00' df_res=df_res.append(pd.DataFrame({'product_id':[res_faults.loc[0,'sn']],'start_time':[str(res_faults.loc[0,'Time'])], 'end_time':[str(res_faults.loc[len(res_faults)-1,'Time'])],'fault_class':[res_faults.loc[0,'pred']], 'update_time':[update_time]})) return df_res # Step7 Merge def arrange(result,result_final,start_time): result.reset_index(drop=True,inplace=True) result_final.reset_index(drop=True,inplace=True) list_faults=list(set(list(result_final['fault_class']))) res_update=pd.DataFrame() res_new=result.copy() for fault in list_faults: result0=result_final[result_final['fault_class']==fault] result1=result[result['fault_class']==fault] st=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S') start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S') if len(result1)>0: if (start_time-st).total_seconds()<900: result0['end_time']=result1['end_time'] result0['update_time']=result1['update_time'] res_update=res_update.append(result0) res_new.drop(result1.index,inplace=True) else: result0['end_time']=result0['update_time'] res_update=res_update.append(result0) res_new.drop(result1.index,inplace=True) else: result0['end_time']=result0['update_time'] res_update=res_update.append(result0) return res_new,res_update def arrange2(dataorg,df_res,start_time,fault_name): res_new=df_res.copy() res_update=pd.DataFrame() if len(dataorg)>0: dataorg=dataorg[dataorg['fault_class']==fault_name] res_new,res_update=arrange(df_res,dataorg,start_time) return res_new,res_update # Step8 Process def pred(data_fea,model,scaler,col,end_time,time_steps): df_res=pd.DataFrame() fea=split(data_fea) f=makedataset(fea) sc=scaler_pred(f,scaler) index=make_index(f) dataX,pred=create_win_pred(sc,f,index,time_steps=time_steps) if len(dataX)>0: res=prediction(model,dataX,pred,col) df_res=makeres(res,end_time) return df_res ################################################################################################################################# #### Process1 - New Model ### # Step1 Features Filtre def features_filtre(dataset2,cols): dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]'],axis=1,errors='ignore') cellvolt_list = [s for s in list(dataset2) if '单体电压' in s] celltemp_name = [s for s in list(dataset2) if '单体温度' in s] celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s] dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1) dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1) dataset2['volt_mean'] = round(dataset2[cellvolt_list].mean(axis=1),3) #每行平均 dataset2['volt_sigma'] =list(dataset2[cellvolt_list].apply(lambda x: np.std(x.values),axis=1)) cell_volt_max =list(dataset2[cellvolt_list].apply(lambda x: np.argmax(x.values)+1,axis=1)) cell_volt_min =list(dataset2[cellvolt_list].apply(lambda x: np.argmin(x.values)+1,axis=1)) dataset2['mm_volt_cont'] = list(np.array(cell_volt_max) - np.array(cell_volt_min)) dataset2['mm_volt_cont']=list(map(lambda x : 1 if (abs(x)==1) | (abs(x)==len(cellvolt_list)-1) else 0, list(dataset2['mm_volt_cont']))) #for k in range(len(dataset2)): #dataset2.loc[k,'mm_volt_cont']=1 if (abs(list(dataset2['mm_volt_cont'])[k])==1) | (abs(list(dataset2['mm_volt_cont'])[k])==len(cellvolt_list)-1) else 0 dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1) dataset2['temp_max']=dataset2[celltemp_name].max(axis=1) dataset2['temp_min']=dataset2[celltemp_name].min(axis=1) dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min'])) dataset2=dataset2.drop(celltemp_name,axis=1) datatest3=dataset2[cols] datatest3.reset_index(drop=True,inplace=True) return datatest3 # Step2 Data Filtre def data_filtre(datatest3,col_key,compare,threshold): if compare==0: datatest4=datatest3[datatest3[col_key]==threshold] elif compare==1: datatest4=datatest3[datatest3[col_key]>threshold] else: datatest4=datatest3[datatest3[col_key]2*len(df_bms): sp=list(set(list(nor['split']))) sp_ran=random.sample(sp, k=int(len(sp)*(len(df_bms)/len(nor)))) nor=nor[nor['split'].isin(sp_ran)] nor.reset_index(drop=True,inplace=True) if 2*len(nor)time_steps: dataX, dataY = [], [] for i in range(len(dataset)-time_steps): v1 = dataset.iloc[i:(i+time_steps)].values v2 = datay.iloc[i].values dataX.append(v1) dataY.append(v2) dataX2=np.array(dataX,dtype='float32') dataY2=np.array(dataY) else: continue a.append(dataX2) b.append(dataY2) aa=np.vstack(a) bb=np.vstack(b) return aa,bb def create_win_test(X2,Y2,Xtest,index,time_steps=12): a,b=[],[] conf=pd.DataFrame() for k in range(1,len(index)): dataset=X2[index[k-1]:index[k]] dataset=dataset.reset_index(drop=True) datay=Y2[index[k-1]:index[k]] datay=datay.reset_index(drop=True) dataset2=Xtest[index[k-1]:index[k]] dataset2=dataset2.reset_index(drop=True) if len(dataset)>time_steps: dataX, dataY = [], [] win_step=[] for i in range(len(dataset)-time_steps): win_step.append(i) v1 = dataset.iloc[i:(i+time_steps)].values v2 = datay.iloc[i].values dataX.append(v1) dataY.append(v2) test=dataset2.iloc[:len(dataset)-time_steps] test['win']=win_step test=pd.merge(test,datay,left_index=True,right_index=True) dataX2=np.array(dataX,dtype='float32') dataY2=np.array(dataY) else: continue a.append(dataX2) b.append(dataY2) conf=conf.append(test) aa=np.vstack(a) bb=np.vstack(b) conf.reset_index(drop=True,inplace=True) return aa,bb,conf # Step10 Create Model def modelGRU(time_steps,nbr_features,nbr_neurons,nbr_class,Xwin,Ywin,Xtwin,Ytwin,batch_size,epochs,dropout,lr,activation,loss,metrics): time_steps=time_steps inputs = Input(shape=[time_steps,nbr_features]) x = GRU(nbr_neurons, input_shape = (time_steps,nbr_features),return_sequences=False, return_state=False)(inputs) x = Dropout(dropout)(x) x = Dense(nbr_class)(x) x = Dropout(dropout)(x) x = Activation(activation)(x) LR = lr model = Model(inputs,x) adam = adam_v2.Adam(LR) model.compile(loss = loss,optimizer = adam,metrics = [metrics]) model.fit(Xwin,Ywin,epochs=epochs,validation_data=(Xtwin,Ytwin),batch_size=batch_size,verbose=1,shuffle=True) return model # Step11 Process def pre_model(nor,df_bms,time_steps,nbr_features,nbr_neurons,nbr_class,batch_size,epochs,dropout,lr,activation,loss): nor,df_bms=resample(nor,df_bms) newtrain,newtest=shuffle_data(nor,df_bms) train_sh=shuffle_data2(newtrain) test_sh=shuffle_data2(newtest) Xtrain,Ytrain,Ytrain2=xy(train_sh) Xtest,Ytest,Ytest2=xy(test_sh) Xsc,scaler=scaler_train(Xtrain) Xtsc=scaler_test(Xtest,scaler) indextr=make_index(train_sh) indexte=make_index(test_sh) Xwin,Ywin=create_win_train(Xsc,Ytrain2,indextr,time_steps=time_steps) Xtwin,Ytwin,conf=create_win_test(Xtsc,Ytest2,test_sh,indexte,time_steps=time_steps) model=modelGRU(time_steps=time_steps,nbr_features=nbr_features,nbr_neurons=nbr_neurons,nbr_class=nbr_class,Xwin=Xwin,Ywin=Ywin, Xtwin=Xtwin,Ytwin=Ytwin,batch_size=batch_size,epochs=epochs,dropout=dropout,lr=lr,activation=activation, loss=loss,metrics='accuracy') loss,acc=model.evaluate(Xtwin,Ytwin) return scaler,model,acc