faultclass.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. import pandas as pd
  2. import numpy as np
  3. import datetime
  4. from random import shuffle
  5. from sklearn.preprocessing import StandardScaler
  6. from keras.layers import Activation,Dense,Input
  7. from keras.layers.recurrent import GRU
  8. from keras.models import Model
  9. from keras.optimizers import adam_v2
  10. from keras.layers import Dropout
  11. import random
  12. #### Process1 - Prediction - Model1+Model2 ###
  13. # Step1 Features
  14. # Model1
  15. def features1(dataset2):
  16. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','总电流[A]'],axis=1,errors='ignore')
  17. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  18. celltemp_name = [s for s in list(dataset2) if '温度' in s]
  19. dataset2=dataset2.drop(celltemp_name,axis=1)
  20. dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1)
  21. dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1)
  22. dataset2=dataset2.drop(cellvolt_list,axis=1)
  23. dataset2.reset_index(drop=True,inplace=True)
  24. return dataset2
  25. # Model2
  26. def features2(dataset2):
  27. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','单体压差','总电压[V]'],axis=1,errors='ignore')
  28. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  29. celltemp_name = [s for s in list(dataset2) if '单体温度' in s]
  30. celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s]
  31. dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1)
  32. dataset2['temp_max']=dataset2[celltemp_name].max(axis=1)
  33. dataset2['temp_min']=dataset2[celltemp_name].min(axis=1)
  34. dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min']))
  35. dataset2=dataset2.drop(celltemp_name,axis=1)
  36. dataset2.reset_index(drop=True,inplace=True)
  37. return dataset2
  38. # Step2 Splits
  39. def split(df_bms_tot):
  40. df_bms_tot['split']=0
  41. for k in range(1,len(df_bms_tot)):
  42. timek=df_bms_tot.loc[k,'时间戳']
  43. timek1=df_bms_tot.loc[k-1,'时间戳']
  44. timek=datetime.datetime.strptime(timek,'%Y-%m-%d %H:%M:%S') #type: datetime
  45. timek1=datetime.datetime.strptime(timek1,'%Y-%m-%d %H:%M:%S')
  46. deltatime=(timek-timek1).total_seconds()
  47. if (deltatime>600) | (df_bms_tot.loc[k,'sn']!=df_bms_tot.loc[k-1,'sn']):
  48. df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split']+1
  49. else:
  50. df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split']
  51. return df_bms_tot
  52. # Step3 MakeDataset: TimeSeries
  53. def makedataset(dataset):
  54. df_bms=pd.DataFrame()
  55. for split in list(set(dataset['split'])):
  56. set2=dataset[dataset['split']==split]
  57. set2.reset_index(drop=True,inplace=True)
  58. data_set=pd.DataFrame()
  59. start=set2.loc[0,'时间戳']
  60. end=set2.loc[len(set2)-1,'时间戳']
  61. data_set['Time']=pd.date_range(start=start, end=end, freq='S') #每分钟一条记录
  62. data_set['Time']=list(map(lambda x:str(x),list(data_set['Time'])))
  63. dfbms=pd.merge(data_set,set2,left_on='Time',right_on='时间戳',how='left')
  64. dfbms=dfbms.fillna(method='ffill')
  65. dfbms=dfbms.fillna(method='bfill')
  66. dfbms=dfbms.drop(['时间戳'],axis=1)
  67. dfbms['Time']=list(map(lambda x:x[:18]+'0',list(dfbms['Time'])))
  68. dfbms.drop_duplicates(subset='Time',keep='last',inplace=True)
  69. df_bms=df_bms.append(dfbms)
  70. df_bms.reset_index(drop=True,inplace=True)
  71. return df_bms
  72. # Step4 Scaler
  73. def scaler_pred(df_bms,scaler):
  74. Xtest=df_bms.drop(['Time','sn','split'],axis=1)
  75. Xsc_colnames=list(Xtest.columns)
  76. Xtsc=scaler.transform(np.array(Xtest))
  77. Xtsc=pd.DataFrame(Xtsc)
  78. Xtsc.columns=Xsc_colnames
  79. return Xtsc
  80. # Step5 MakeIndex
  81. def make_index(train):
  82. indextr=[]
  83. for i in list(set(train['split'])):
  84. tr=train[train['split'] == i].index.tolist()
  85. indextr.append(min(tr))
  86. indextr=sorted(indextr)
  87. indextr.append(len(train))
  88. return indextr
  89. # Step5 CreateWindows
  90. def create_win_pred(X2,Xtest,index,time_steps=12):
  91. conf=pd.DataFrame()
  92. a=[]
  93. for k in range(1,len(index)):
  94. dataset=X2[index[k-1]:index[k]]
  95. dataset=dataset.reset_index(drop=True)
  96. dataset2=Xtest[index[k-1]:index[k]]
  97. dataset2=dataset2.reset_index(drop=True)
  98. if len(dataset)>time_steps:
  99. dataX = []
  100. win_step=[]
  101. for i in range(len(dataset)-time_steps):
  102. win_step.append(i)
  103. #v1 = np.array(dataset.iloc[i:(i+time_steps)],dtype='float32')
  104. v1 = dataset.iloc[i:(i+time_steps)].values
  105. dataX.append(v1)
  106. test=dataset2.iloc[:len(dataset)-time_steps]
  107. dataX2=np.array(dataX,dtype='float32')
  108. conf=conf.append(test)
  109. a.append(dataX2)
  110. if len(a)>0:
  111. aa=np.vstack(a)
  112. else:
  113. aa=[]
  114. conf.reset_index(drop=True,inplace=True)
  115. return aa,conf
  116. # Step6 Prediction
  117. def prediction(model,cc,conf,col):
  118. predict_dd = model.predict(cc)
  119. df_pred=pd.DataFrame(predict_dd)
  120. df_pred.columns=col
  121. df_pred2 = df_pred.idxmax(axis=1)
  122. conf['pred']=df_pred2
  123. return conf
  124. # Step7 Output
  125. def makeres(res,end_time):
  126. df_res=pd.DataFrame(columns=['product_id','start_time','end_time','fault_class','update_time'])
  127. result_faults=res[res['pred']!='正常']
  128. list_faults=list(set(list(result_faults['pred'])))
  129. for fault in list_faults:
  130. res_faults=result_faults[result_faults['pred']==fault]
  131. res_faults.reset_index(drop=True,inplace=True)
  132. update_time=str(res_faults.loc[len(res_faults)-1,'Time'])
  133. end=datetime.datetime.strptime(str(res_faults.loc[len(res_faults)-1,'Time']),'%Y-%m-%d %H:%M:%S')
  134. end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S')
  135. if (end_time-end).total_seconds()<900:
  136. res_faults.loc[len(res_faults)-1,'Time']='0000-00-00 00:00:00'
  137. df_res=df_res.append(pd.DataFrame({'product_id':[res_faults.loc[0,'sn']],'start_time':[str(res_faults.loc[0,'Time'])],
  138. 'end_time':[str(res_faults.loc[len(res_faults)-1,'Time'])],'fault_class':[res_faults.loc[0,'pred']],
  139. 'update_time':[update_time]}))
  140. return df_res
  141. # Step7 Merge
  142. def arrange(result,result_final,start_time):
  143. result.reset_index(drop=True,inplace=True)
  144. result_final.reset_index(drop=True,inplace=True)
  145. list_faults=list(set(list(result_final['fault_class'])))
  146. res_update=pd.DataFrame()
  147. res_new=result.copy()
  148. for fault in list_faults:
  149. result0=result_final[result_final['fault_class']==fault]
  150. result1=result[result['fault_class']==fault]
  151. st=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S')
  152. start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S')
  153. if len(result1)>0:
  154. if (start_time-st).total_seconds()<900:
  155. result0['end_time']=result1['end_time']
  156. result0['update_time']=result1['update_time']
  157. res_update=res_update.append(result0)
  158. res_new.drop(result1.index,inplace=True)
  159. else:
  160. result0['end_time']=result0['update_time']
  161. res_update=res_update.append(result0)
  162. res_new.drop(result1.index,inplace=True)
  163. else:
  164. result0['end_time']=result0['update_time']
  165. res_update=res_update.append(result0)
  166. return res_new,res_update
  167. # Step8 Process
  168. def pred(data_fea,model,scaler,col,end_time,time_steps):
  169. df_res=pd.DataFrame()
  170. fea=split(data_fea)
  171. f=makedataset(fea)
  172. sc=scaler_pred(f,scaler)
  173. index=make_index(f)
  174. dataX,pred=create_win_pred(sc,f,index,time_steps=time_steps)
  175. if len(dataX)>0:
  176. res=prediction(model,dataX,pred,col)
  177. df_res=makeres(res,end_time)
  178. return df_res
  179. #################################################################################################################################
  180. #### Process1 - New Model ###
  181. # Step1 Features Filtre
  182. def features_filtre(dataset2,cols):
  183. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]'],axis=1,errors='ignore')
  184. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  185. celltemp_name = [s for s in list(dataset2) if '单体温度' in s]
  186. celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s]
  187. dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1)
  188. dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1)
  189. dataset2['volt_mean'] = round(dataset2[cellvolt_list].mean(axis=1),3) #每行平均
  190. dataset2['volt_sigma'] =list(dataset2[cellvolt_list].apply(lambda x: np.std(x.values),axis=1))
  191. cell_volt_max =list(dataset2[cellvolt_list].apply(lambda x: np.argmax(x.values)+1,axis=1))
  192. cell_volt_min =list(dataset2[cellvolt_list].apply(lambda x: np.argmin(x.values)+1,axis=1))
  193. dataset2['mm_volt_cont'] = list(np.array(cell_volt_max) - np.array(cell_volt_min))
  194. dataset2['mm_volt_cont']=list(map(lambda x : 1 if (abs(x)==1) | (abs(x)==len(cellvolt_list)-1) else 0, list(dataset2['mm_volt_cont'])))
  195. #for k in range(len(dataset2)):
  196. #dataset2.loc[k,'mm_volt_cont']=1 if (abs(list(dataset2['mm_volt_cont'])[k])==1) | (abs(list(dataset2['mm_volt_cont'])[k])==len(cellvolt_list)-1) else 0
  197. dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1)
  198. dataset2['temp_max']=dataset2[celltemp_name].max(axis=1)
  199. dataset2['temp_min']=dataset2[celltemp_name].min(axis=1)
  200. dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min']))
  201. dataset2=dataset2.drop(celltemp_name,axis=1)
  202. datatest3=dataset2[cols]
  203. datatest3.reset_index(drop=True,inplace=True)
  204. return datatest3
  205. # Step2 Data Filtre
  206. def data_filtre(datatest3,col_key,compare,threshold):
  207. if compare==0:
  208. datatest4=datatest3[datatest3[col_key]==threshold]
  209. elif compare==1:
  210. datatest4=datatest3[datatest3[col_key]>threshold]
  211. else:
  212. datatest4=datatest3[datatest3[col_key]<threshold]
  213. datatest4.reset_index(drop=True,inplace=True)
  214. return datatest4
  215. # Step3 Faults Pre-processing
  216. def make_fault_set(dataset,cols,col_key,compare,threshold_filtre,fault_name):
  217. datatest3=features_filtre(dataset,cols)
  218. datatest4=data_filtre(datatest3,col_key,compare,threshold_filtre)
  219. df_tot=split(datatest4)
  220. df_bms=makedataset(df_tot)
  221. df_bms['fault_class']=fault_name
  222. return df_bms
  223. # Step4 Normal Pre-processing
  224. def normalset(df_bms,cols):
  225. df_bms.drop(['Unnamed: 0'],axis=1,inplace=True)
  226. nor_fea1=features_filtre(df_bms,cols)
  227. norfea1=split(nor_fea1)
  228. normalf1=makedataset(norfea1)
  229. normalf1['fault_class']='正常'
  230. return normalf1
  231. def normalset2(df_bms1,df_bms2,df_bms3,df_bms4,df_bms5,df_bms6,cols):
  232. normalf1=normalset(df_bms1,cols)
  233. normalf2=normalset(df_bms2,cols)
  234. normalf3=normalset(df_bms3,cols)
  235. normalf4=normalset(df_bms4,cols)
  236. normalf5=normalset(df_bms5,cols)
  237. normalf6=normalset(df_bms6,cols)
  238. nor=pd.concat([normalf1,normalf2,normalf3,normalf4,normalf5,normalf6])
  239. nor.reset_index(drop=True,inplace=True)
  240. return nor
  241. # Step5 Resample
  242. def resample(nor,df_bms):
  243. if len(nor)>2*len(df_bms):
  244. sp=list(set(list(nor['split'])))
  245. sp_ran=random.sample(sp, k=int(len(sp)*(len(df_bms)/len(nor))))
  246. nor=nor[nor['split'].isin(sp_ran)]
  247. nor.reset_index(drop=True,inplace=True)
  248. if 2*len(nor)<len(df_bms):
  249. sp=list(set(list(df_bms['split'])))
  250. sp_ran=random.sample(sp, k=int(len(sp)*(len(nor)/len(df_bms))))
  251. df_bms=df_bms[df_bms['split'].isin(sp_ran)]
  252. df_bms.reset_index(drop=True,inplace=True)
  253. return nor,df_bms
  254. # Step6 Shuffle Data
  255. def shuffle_data(nor,dataset_faults):
  256. sn_nor=list(set(nor['sn']))
  257. sn_fau=list(set(dataset_faults['sn']))
  258. shuffle(sn_nor)
  259. shuffle(sn_fau)
  260. newtrain=pd.DataFrame()
  261. newtest=pd.DataFrame()
  262. for s1 in sn_nor[:int(0.8*len(sn_nor))]:
  263. nortrain=nor[nor['sn']==s1]
  264. nortrain.reset_index(drop=True,inplace=True)
  265. newtrain=newtrain.append(nortrain)
  266. for s2 in sn_nor[int(0.8*len(sn_nor)):]:
  267. nortest=nor[nor['sn']==s2]
  268. nortest.reset_index(drop=True,inplace=True)
  269. newtest=newtest.append(nortest)
  270. for s3 in sn_fau[:int(0.8*len(sn_fau))]:
  271. fautrain=dataset_faults[dataset_faults['sn']==s3]
  272. fautrain.reset_index(drop=True,inplace=True)
  273. newtrain=newtrain.append(fautrain)
  274. for s4 in sn_fau[int(0.8*len(sn_fau)):]:
  275. fautest=dataset_faults[dataset_faults['sn']==s4]
  276. fautest.reset_index(drop=True,inplace=True)
  277. newtest=newtest.append(fautest)
  278. newtrain.reset_index(drop=True,inplace=True)
  279. newtest.reset_index(drop=True,inplace=True)
  280. return newtrain,newtest
  281. def shuffle_data2(dftrain):
  282. sp=list(set(dftrain['sn']))
  283. shuffle(sp)
  284. newtrain=pd.DataFrame()
  285. for s in sp:
  286. ntr=dftrain[dftrain['sn']==s]
  287. newtrain=newtrain.append(ntr)
  288. newtrain.reset_index(drop=True,inplace=True)
  289. return newtrain
  290. # Step7 X & Y
  291. def xy(train):
  292. Xtrain=train.drop(['fault_class','Time','sn','split'],axis=1)
  293. Ytrain=train[['fault_class']]
  294. Ytrain2=pd.get_dummies(Ytrain,columns=['fault_class'],prefix_sep='_')
  295. return Xtrain,Ytrain,Ytrain2
  296. # Step8 Scaler
  297. def scaler_train(Xtrain):
  298. Xsc_colnames=list(Xtrain.columns)
  299. scaler=StandardScaler()
  300. scaler.fit(Xtrain) #保存train_sc的均值和标准差
  301. Xsc=scaler.transform(np.array(Xtrain))
  302. Xsc=pd.DataFrame(Xsc)
  303. Xsc.columns=Xsc_colnames
  304. return Xsc,scaler
  305. def scaler_test(Xtest,scaler):
  306. Xsc_colnames=list(Xtest.columns)
  307. Xtsc=scaler.transform(np.array(Xtest))
  308. Xtsc=pd.DataFrame(Xtsc)
  309. Xtsc.columns=Xsc_colnames
  310. return Xtsc
  311. # Step9 Create windows
  312. def create_win_train(X2,Y2,index,time_steps=6):
  313. a,b=[],[]
  314. for k in range(1,len(index)):
  315. dataset=X2[index[k-1]:index[k]]
  316. dataset=dataset.reset_index(drop=True)
  317. datay=Y2[index[k-1]:index[k]]
  318. datay=datay.reset_index(drop=True)
  319. if len(dataset)>time_steps:
  320. dataX, dataY = [], []
  321. for i in range(len(dataset)-time_steps):
  322. v1 = dataset.iloc[i:(i+time_steps)].values
  323. v2 = datay.iloc[i].values
  324. dataX.append(v1)
  325. dataY.append(v2)
  326. dataX2=np.array(dataX,dtype='float32')
  327. dataY2=np.array(dataY)
  328. else:
  329. continue
  330. a.append(dataX2)
  331. b.append(dataY2)
  332. aa=np.vstack(a)
  333. bb=np.vstack(b)
  334. return aa,bb
  335. def create_win_test(X2,Y2,Xtest,index,time_steps=12):
  336. a,b=[],[]
  337. conf=pd.DataFrame()
  338. for k in range(1,len(index)):
  339. dataset=X2[index[k-1]:index[k]]
  340. dataset=dataset.reset_index(drop=True)
  341. datay=Y2[index[k-1]:index[k]]
  342. datay=datay.reset_index(drop=True)
  343. dataset2=Xtest[index[k-1]:index[k]]
  344. dataset2=dataset2.reset_index(drop=True)
  345. if len(dataset)>time_steps:
  346. dataX, dataY = [], []
  347. win_step=[]
  348. for i in range(len(dataset)-time_steps):
  349. win_step.append(i)
  350. v1 = dataset.iloc[i:(i+time_steps)].values
  351. v2 = datay.iloc[i].values
  352. dataX.append(v1)
  353. dataY.append(v2)
  354. test=dataset2.iloc[:len(dataset)-time_steps]
  355. test['win']=win_step
  356. test=pd.merge(test,datay,left_index=True,right_index=True)
  357. dataX2=np.array(dataX,dtype='float32')
  358. dataY2=np.array(dataY)
  359. else:
  360. continue
  361. a.append(dataX2)
  362. b.append(dataY2)
  363. conf=conf.append(test)
  364. aa=np.vstack(a)
  365. bb=np.vstack(b)
  366. conf.reset_index(drop=True,inplace=True)
  367. return aa,bb,conf
  368. # Step10 Create Model
  369. def modelGRU(time_steps,nbr_features,nbr_neurons,nbr_class,Xwin,Ywin,Xtwin,Ytwin,batch_size,epochs,dropout,lr,activation,loss,metrics):
  370. time_steps=time_steps
  371. inputs = Input(shape=[time_steps,nbr_features])
  372. x = GRU(nbr_neurons, input_shape = (time_steps,nbr_features),return_sequences=False, return_state=False)(inputs)
  373. x = Dropout(dropout)(x)
  374. x = Dense(nbr_class)(x)
  375. x = Dropout(dropout)(x)
  376. x = Activation(activation)(x)
  377. LR = lr
  378. model = Model(inputs,x)
  379. adam = adam_v2.Adam(LR)
  380. model.compile(loss = loss,optimizer = adam,metrics = [metrics])
  381. model.fit(Xwin,Ywin,epochs=epochs,validation_data=(Xtwin,Ytwin),batch_size=batch_size,verbose=1,shuffle=True)
  382. return model
  383. # Step11 Process
  384. def pre_model(nor,df_bms,time_steps,nbr_features,nbr_neurons,nbr_class,batch_size,epochs,dropout,lr,activation,loss):
  385. nor,df_bms=resample(nor,df_bms)
  386. newtrain,newtest=shuffle_data(nor,df_bms)
  387. train_sh=shuffle_data2(newtrain)
  388. test_sh=shuffle_data2(newtest)
  389. Xtrain,Ytrain,Ytrain2=xy(train_sh)
  390. Xtest,Ytest,Ytest2=xy(test_sh)
  391. Xsc,scaler=scaler_train(Xtrain)
  392. Xtsc=scaler_test(Xtest,scaler)
  393. indextr=make_index(train_sh)
  394. indexte=make_index(test_sh)
  395. Xwin,Ywin=create_win_train(Xsc,Ytrain2,indextr,time_steps=time_steps)
  396. Xtwin,Ytwin,conf=create_win_test(Xtsc,Ytest2,test_sh,indexte,time_steps=time_steps)
  397. model=modelGRU(time_steps=time_steps,nbr_features=nbr_features,nbr_neurons=nbr_neurons,nbr_class=nbr_class,Xwin=Xwin,Ywin=Ywin,
  398. Xtwin=Xtwin,Ytwin=Ytwin,batch_size=batch_size,epochs=epochs,dropout=dropout,lr=lr,activation=activation,
  399. loss=loss,metrics='accuracy')
  400. loss,acc=model.evaluate(Xtwin,Ytwin)
  401. return scaler,model,acc