faultclass.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. import pandas as pd
  2. import numpy as np
  3. import datetime
  4. from random import shuffle
  5. from sklearn.preprocessing import StandardScaler
  6. from keras.layers import Activation,Dense,Input
  7. from keras.layers.recurrent import GRU
  8. from keras.models import Model
  9. from keras.optimizers import adam_v2
  10. from keras.layers import Dropout
  11. import random
  12. #### Process1 - Prediction - Model1+Model2 ###
  13. # Step1 Features
  14. # Model1
  15. def features1(dataset2):
  16. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','总电流[A]'],axis=1,errors='ignore')
  17. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  18. celltemp_name = [s for s in list(dataset2) if '温度' in s]
  19. dataset2=dataset2.drop(celltemp_name,axis=1)
  20. dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1)
  21. dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1)
  22. dataset2=dataset2.drop(cellvolt_list,axis=1)
  23. dataset2.reset_index(drop=True,inplace=True)
  24. return dataset2
  25. # Model2
  26. def features2(dataset2):
  27. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]','SOC[%]','单体压差','总电压[V]'],axis=1,errors='ignore')
  28. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  29. celltemp_name = [s for s in list(dataset2) if '单体温度' in s]
  30. celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s]
  31. dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1)
  32. dataset2['temp_max']=dataset2[celltemp_name].max(axis=1)
  33. dataset2['temp_min']=dataset2[celltemp_name].min(axis=1)
  34. dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min']))
  35. dataset2=dataset2.drop(celltemp_name,axis=1)
  36. dataset2.reset_index(drop=True,inplace=True)
  37. return dataset2
  38. # Step2 Splits
  39. def split(df_bms_tot):
  40. df_bms_tot['split']=0
  41. for k in range(1,len(df_bms_tot)):
  42. timek=df_bms_tot.loc[k,'时间戳']
  43. timek1=df_bms_tot.loc[k-1,'时间戳']
  44. timek=datetime.datetime.strptime(timek,'%Y-%m-%d %H:%M:%S') #type: datetime
  45. timek1=datetime.datetime.strptime(timek1,'%Y-%m-%d %H:%M:%S')
  46. deltatime=(timek-timek1).total_seconds()
  47. if (deltatime>600) | (df_bms_tot.loc[k,'sn']!=df_bms_tot.loc[k-1,'sn']):
  48. df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split']+1
  49. else:
  50. df_bms_tot.loc[k,'split']=df_bms_tot.loc[k-1,'split']
  51. return df_bms_tot
  52. # Step3 MakeDataset: TimeSeries
  53. def makedataset(dataset):
  54. df_bms=pd.DataFrame()
  55. for split in list(set(dataset['split'])):
  56. set2=dataset[dataset['split']==split]
  57. set2.reset_index(drop=True,inplace=True)
  58. data_set=pd.DataFrame()
  59. start=set2.loc[0,'时间戳']
  60. end=set2.loc[len(set2)-1,'时间戳']
  61. data_set['Time']=pd.date_range(start=start, end=end, freq='S') #每分钟一条记录
  62. data_set['Time']=list(map(lambda x:str(x),list(data_set['Time'])))
  63. dfbms=pd.merge(data_set,set2,left_on='Time',right_on='时间戳',how='left')
  64. dfbms=dfbms.fillna(method='ffill')
  65. dfbms=dfbms.fillna(method='bfill')
  66. dfbms=dfbms.drop(['时间戳'],axis=1)
  67. dfbms['Time']=list(map(lambda x:x[:18]+'0',list(dfbms['Time'])))
  68. dfbms.drop_duplicates(subset='Time',keep='last',inplace=True)
  69. df_bms=df_bms.append(dfbms)
  70. df_bms.reset_index(drop=True,inplace=True)
  71. return df_bms
  72. # Step4 Scaler
  73. def scaler_pred(df_bms,scaler):
  74. Xtest=df_bms.drop(['Time','sn','split'],axis=1)
  75. Xsc_colnames=list(Xtest.columns)
  76. Xtsc=scaler.transform(np.array(Xtest))
  77. Xtsc=pd.DataFrame(Xtsc)
  78. Xtsc.columns=Xsc_colnames
  79. return Xtsc
  80. # Step5 MakeIndex
  81. def make_index(train):
  82. indextr=[]
  83. for i in list(set(train['split'])):
  84. tr=train[train['split'] == i].index.tolist()
  85. indextr.append(min(tr))
  86. indextr=sorted(indextr)
  87. indextr.append(len(train))
  88. return indextr
  89. # Step5 CreateWindows
  90. def create_win_pred(X2,Xtest,index,time_steps=12):
  91. conf=pd.DataFrame()
  92. a=[]
  93. for k in range(1,len(index)):
  94. dataset=X2[index[k-1]:index[k]]
  95. dataset=dataset.reset_index(drop=True)
  96. dataset2=Xtest[index[k-1]:index[k]]
  97. dataset2=dataset2.reset_index(drop=True)
  98. if len(dataset)>time_steps:
  99. dataX = []
  100. win_step=[]
  101. for i in range(len(dataset)-time_steps):
  102. win_step.append(i)
  103. #v1 = np.array(dataset.iloc[i:(i+time_steps)],dtype='float32')
  104. v1 = dataset.iloc[i:(i+time_steps)].values
  105. dataX.append(v1)
  106. test=dataset2.iloc[:len(dataset)-time_steps]
  107. dataX2=np.array(dataX,dtype='float32')
  108. conf=conf.append(test)
  109. a.append(dataX2)
  110. if len(a)>0:
  111. aa=np.vstack(a)
  112. else:
  113. aa=[]
  114. conf.reset_index(drop=True,inplace=True)
  115. return aa,conf
  116. # Step6 Prediction
  117. def prediction(model,cc,conf,col):
  118. predict_dd = model.predict(cc)
  119. df_pred=pd.DataFrame(predict_dd)
  120. df_pred.columns=col
  121. df_pred2 = df_pred.idxmax(axis=1)
  122. conf['pred']=df_pred2
  123. return conf
  124. # Step7 Output
  125. def makeres(res,end_time):
  126. df_res=pd.DataFrame(columns=['product_id','start_time','end_time','fault_class','update_time'])
  127. result_faults=res[res['pred']!='正常']
  128. list_faults=list(set(list(result_faults['pred'])))
  129. for fault in list_faults:
  130. res_faults=result_faults[result_faults['pred']==fault]
  131. res_faults.reset_index(drop=True,inplace=True)
  132. update_time=str(res_faults.loc[len(res_faults)-1,'Time'])
  133. end=datetime.datetime.strptime(str(res_faults.loc[len(res_faults)-1,'Time']),'%Y-%m-%d %H:%M:%S')
  134. end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S')
  135. if (end_time-end).total_seconds()<900:
  136. res_faults.loc[len(res_faults)-1,'Time']='0000-00-00 00:00:00'
  137. df_res=df_res.append(pd.DataFrame({'product_id':[res_faults.loc[0,'sn']],'start_time':[str(res_faults.loc[0,'Time'])],
  138. 'end_time':[str(res_faults.loc[len(res_faults)-1,'Time'])],'fault_class':[res_faults.loc[0,'pred']],
  139. 'update_time':[update_time]}))
  140. return df_res
  141. # Step7 Merge
  142. def arrange(result,result_final,start_time):
  143. result.reset_index(drop=True,inplace=True)
  144. result_final.reset_index(drop=True,inplace=True)
  145. list_faults=list(set(list(result_final['fault_class'])))
  146. res_update=pd.DataFrame()
  147. res_new=result.copy()
  148. for fault in list_faults:
  149. result0=result_final[result_final['fault_class']==fault]
  150. result1=result[result['fault_class']==fault]
  151. st=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S')
  152. start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S')
  153. if len(result1)>0:
  154. if (start_time-st).total_seconds()<900:
  155. result0['end_time']=result1['end_time']
  156. result0['update_time']=result1['update_time']
  157. res_update=res_update.append(result0)
  158. res_new.drop(result1.index,inplace=True)
  159. else:
  160. result0['end_time']=result0['update_time']
  161. res_update=res_update.append(result0)
  162. res_new.drop(result1.index,inplace=True)
  163. else:
  164. result0['end_time']=result0['update_time']
  165. res_update=res_update.append(result0)
  166. return res_new,res_update
  167. def arrange2(dataorg,df_res,start_time,fault_name):
  168. res_new=df_res.copy()
  169. res_update=pd.DataFrame()
  170. if len(dataorg)>0:
  171. dataorg=dataorg[dataorg['fault_class']==fault_name]
  172. res_new,res_update=arrange(df_res,dataorg,start_time)
  173. return res_new,res_update
  174. # Step8 Process
  175. def pred(data_fea,model,scaler,col,end_time,time_steps):
  176. df_res=pd.DataFrame()
  177. fea=split(data_fea)
  178. f=makedataset(fea)
  179. sc=scaler_pred(f,scaler)
  180. index=make_index(f)
  181. dataX,pred=create_win_pred(sc,f,index,time_steps=time_steps)
  182. if len(dataX)>0:
  183. res=prediction(model,dataX,pred,col)
  184. df_res=makeres(res,end_time)
  185. return df_res
  186. #################################################################################################################################
  187. #### Process1 - New Model ###
  188. # Step1 Features Filtre
  189. def features_filtre(dataset2,cols):
  190. dataset2=dataset2.drop(['GSM信号','故障等级','故障代码','开关状态','绝缘电阻','外电压','总输出状态','上锁状态','加热状态','单体均衡状态','充电状态','SOH[%]'],axis=1,errors='ignore')
  191. cellvolt_list = [s for s in list(dataset2) if '单体电压' in s]
  192. celltemp_name = [s for s in list(dataset2) if '单体温度' in s]
  193. celltemp_name2 = [s for s in list(dataset2) if '其他温度' in s]
  194. dataset2['volt_max']=dataset2[cellvolt_list].max(axis=1)
  195. dataset2['volt_min']=dataset2[cellvolt_list].min(axis=1)
  196. dataset2['volt_mean'] = round(dataset2[cellvolt_list].mean(axis=1),3) #每行平均
  197. dataset2['volt_sigma'] =list(dataset2[cellvolt_list].apply(lambda x: np.std(x.values),axis=1))
  198. cell_volt_max =list(dataset2[cellvolt_list].apply(lambda x: np.argmax(x.values)+1,axis=1))
  199. cell_volt_min =list(dataset2[cellvolt_list].apply(lambda x: np.argmin(x.values)+1,axis=1))
  200. dataset2['mm_volt_cont'] = list(np.array(cell_volt_max) - np.array(cell_volt_min))
  201. dataset2['mm_volt_cont']=list(map(lambda x : 1 if (abs(x)==1) | (abs(x)==len(cellvolt_list)-1) else 0, list(dataset2['mm_volt_cont'])))
  202. #for k in range(len(dataset2)):
  203. #dataset2.loc[k,'mm_volt_cont']=1 if (abs(list(dataset2['mm_volt_cont'])[k])==1) | (abs(list(dataset2['mm_volt_cont'])[k])==len(cellvolt_list)-1) else 0
  204. dataset2=dataset2.drop(cellvolt_list+celltemp_name2,axis=1)
  205. dataset2['temp_max']=dataset2[celltemp_name].max(axis=1)
  206. dataset2['temp_min']=dataset2[celltemp_name].min(axis=1)
  207. dataset2['temp_diff']=list(np.array(dataset2['temp_max'])-np.array(dataset2['temp_min']))
  208. dataset2=dataset2.drop(celltemp_name,axis=1)
  209. datatest3=dataset2[cols]
  210. datatest3.reset_index(drop=True,inplace=True)
  211. return datatest3
  212. # Step2 Data Filtre
  213. def data_filtre(datatest3,col_key,compare,threshold):
  214. if compare==0:
  215. datatest4=datatest3[datatest3[col_key]==threshold]
  216. elif compare==1:
  217. datatest4=datatest3[datatest3[col_key]>threshold]
  218. else:
  219. datatest4=datatest3[datatest3[col_key]<threshold]
  220. datatest4.reset_index(drop=True,inplace=True)
  221. return datatest4
  222. # Step3 Faults Pre-processing
  223. def make_fault_set(dataset,cols,col_key,compare,threshold_filtre,fault_name):
  224. datatest3=features_filtre(dataset,cols)
  225. datatest4=data_filtre(datatest3,col_key,compare,threshold_filtre)
  226. df_tot=split(datatest4)
  227. df_bms=makedataset(df_tot)
  228. df_bms['fault_class']=fault_name
  229. return df_bms
  230. # Step4 Normal Pre-processing
  231. def normalset(df_bms,cols):
  232. df_bms.drop(['Unnamed: 0'],axis=1,inplace=True)
  233. nor_fea1=features_filtre(df_bms,cols)
  234. norfea1=split(nor_fea1)
  235. normalf1=makedataset(norfea1)
  236. normalf1['fault_class']='正常'
  237. return normalf1
  238. def normalset2(df_bms1,df_bms2,df_bms3,df_bms4,df_bms5,df_bms6,cols):
  239. normalf1=normalset(df_bms1,cols)
  240. normalf2=normalset(df_bms2,cols)
  241. normalf3=normalset(df_bms3,cols)
  242. normalf4=normalset(df_bms4,cols)
  243. normalf5=normalset(df_bms5,cols)
  244. normalf6=normalset(df_bms6,cols)
  245. nor=pd.concat([normalf1,normalf2,normalf3,normalf4,normalf5,normalf6])
  246. nor.reset_index(drop=True,inplace=True)
  247. return nor
  248. # Step5 Resample
  249. def resample(nor,df_bms):
  250. if len(nor)>2*len(df_bms):
  251. sp=list(set(list(nor['split'])))
  252. sp_ran=random.sample(sp, k=int(len(sp)*(len(df_bms)/len(nor))))
  253. nor=nor[nor['split'].isin(sp_ran)]
  254. nor.reset_index(drop=True,inplace=True)
  255. if 2*len(nor)<len(df_bms):
  256. sp=list(set(list(df_bms['split'])))
  257. sp_ran=random.sample(sp, k=int(len(sp)*(len(nor)/len(df_bms))))
  258. df_bms=df_bms[df_bms['split'].isin(sp_ran)]
  259. df_bms.reset_index(drop=True,inplace=True)
  260. return nor,df_bms
  261. # Step6 Shuffle Data
  262. def shuffle_data(nor,dataset_faults):
  263. sn_nor=list(set(nor['sn']))
  264. sn_fau=list(set(dataset_faults['sn']))
  265. shuffle(sn_nor)
  266. shuffle(sn_fau)
  267. newtrain=pd.DataFrame()
  268. newtest=pd.DataFrame()
  269. for s1 in sn_nor[:int(0.8*len(sn_nor))]:
  270. nortrain=nor[nor['sn']==s1]
  271. nortrain.reset_index(drop=True,inplace=True)
  272. newtrain=newtrain.append(nortrain)
  273. for s2 in sn_nor[int(0.8*len(sn_nor)):]:
  274. nortest=nor[nor['sn']==s2]
  275. nortest.reset_index(drop=True,inplace=True)
  276. newtest=newtest.append(nortest)
  277. for s3 in sn_fau[:int(0.8*len(sn_fau))]:
  278. fautrain=dataset_faults[dataset_faults['sn']==s3]
  279. fautrain.reset_index(drop=True,inplace=True)
  280. newtrain=newtrain.append(fautrain)
  281. for s4 in sn_fau[int(0.8*len(sn_fau)):]:
  282. fautest=dataset_faults[dataset_faults['sn']==s4]
  283. fautest.reset_index(drop=True,inplace=True)
  284. newtest=newtest.append(fautest)
  285. newtrain.reset_index(drop=True,inplace=True)
  286. newtest.reset_index(drop=True,inplace=True)
  287. return newtrain,newtest
  288. def shuffle_data2(dftrain):
  289. sp=list(set(dftrain['sn']))
  290. shuffle(sp)
  291. newtrain=pd.DataFrame()
  292. for s in sp:
  293. ntr=dftrain[dftrain['sn']==s]
  294. newtrain=newtrain.append(ntr)
  295. newtrain.reset_index(drop=True,inplace=True)
  296. return newtrain
  297. # Step7 X & Y
  298. def xy(train):
  299. Xtrain=train.drop(['fault_class','Time','sn','split'],axis=1)
  300. Ytrain=train[['fault_class']]
  301. Ytrain2=pd.get_dummies(Ytrain,columns=['fault_class'],prefix_sep='_')
  302. return Xtrain,Ytrain,Ytrain2
  303. # Step8 Scaler
  304. def scaler_train(Xtrain):
  305. Xsc_colnames=list(Xtrain.columns)
  306. scaler=StandardScaler()
  307. scaler.fit(Xtrain) #保存train_sc的均值和标准差
  308. Xsc=scaler.transform(np.array(Xtrain))
  309. Xsc=pd.DataFrame(Xsc)
  310. Xsc.columns=Xsc_colnames
  311. return Xsc,scaler
  312. def scaler_test(Xtest,scaler):
  313. Xsc_colnames=list(Xtest.columns)
  314. Xtsc=scaler.transform(np.array(Xtest))
  315. Xtsc=pd.DataFrame(Xtsc)
  316. Xtsc.columns=Xsc_colnames
  317. return Xtsc
  318. # Step9 Create windows
  319. def create_win_train(X2,Y2,index,time_steps=6):
  320. a,b=[],[]
  321. for k in range(1,len(index)):
  322. dataset=X2[index[k-1]:index[k]]
  323. dataset=dataset.reset_index(drop=True)
  324. datay=Y2[index[k-1]:index[k]]
  325. datay=datay.reset_index(drop=True)
  326. if len(dataset)>time_steps:
  327. dataX, dataY = [], []
  328. for i in range(len(dataset)-time_steps):
  329. v1 = dataset.iloc[i:(i+time_steps)].values
  330. v2 = datay.iloc[i].values
  331. dataX.append(v1)
  332. dataY.append(v2)
  333. dataX2=np.array(dataX,dtype='float32')
  334. dataY2=np.array(dataY)
  335. else:
  336. continue
  337. a.append(dataX2)
  338. b.append(dataY2)
  339. aa=np.vstack(a)
  340. bb=np.vstack(b)
  341. return aa,bb
  342. def create_win_test(X2,Y2,Xtest,index,time_steps=12):
  343. a,b=[],[]
  344. conf=pd.DataFrame()
  345. for k in range(1,len(index)):
  346. dataset=X2[index[k-1]:index[k]]
  347. dataset=dataset.reset_index(drop=True)
  348. datay=Y2[index[k-1]:index[k]]
  349. datay=datay.reset_index(drop=True)
  350. dataset2=Xtest[index[k-1]:index[k]]
  351. dataset2=dataset2.reset_index(drop=True)
  352. if len(dataset)>time_steps:
  353. dataX, dataY = [], []
  354. win_step=[]
  355. for i in range(len(dataset)-time_steps):
  356. win_step.append(i)
  357. v1 = dataset.iloc[i:(i+time_steps)].values
  358. v2 = datay.iloc[i].values
  359. dataX.append(v1)
  360. dataY.append(v2)
  361. test=dataset2.iloc[:len(dataset)-time_steps]
  362. test['win']=win_step
  363. test=pd.merge(test,datay,left_index=True,right_index=True)
  364. dataX2=np.array(dataX,dtype='float32')
  365. dataY2=np.array(dataY)
  366. else:
  367. continue
  368. a.append(dataX2)
  369. b.append(dataY2)
  370. conf=conf.append(test)
  371. aa=np.vstack(a)
  372. bb=np.vstack(b)
  373. conf.reset_index(drop=True,inplace=True)
  374. return aa,bb,conf
  375. # Step10 Create Model
  376. def modelGRU(time_steps,nbr_features,nbr_neurons,nbr_class,Xwin,Ywin,Xtwin,Ytwin,batch_size,epochs,dropout,lr,activation,loss,metrics):
  377. time_steps=time_steps
  378. inputs = Input(shape=[time_steps,nbr_features])
  379. x = GRU(nbr_neurons, input_shape = (time_steps,nbr_features),return_sequences=False, return_state=False)(inputs)
  380. x = Dropout(dropout)(x)
  381. x = Dense(nbr_class)(x)
  382. x = Dropout(dropout)(x)
  383. x = Activation(activation)(x)
  384. LR = lr
  385. model = Model(inputs,x)
  386. adam = adam_v2.Adam(LR)
  387. model.compile(loss = loss,optimizer = adam,metrics = [metrics])
  388. model.fit(Xwin,Ywin,epochs=epochs,validation_data=(Xtwin,Ytwin),batch_size=batch_size,verbose=1,shuffle=True)
  389. return model
  390. # Step11 Process
  391. def pre_model(nor,df_bms,time_steps,nbr_features,nbr_neurons,nbr_class,batch_size,epochs,dropout,lr,activation,loss):
  392. nor,df_bms=resample(nor,df_bms)
  393. newtrain,newtest=shuffle_data(nor,df_bms)
  394. train_sh=shuffle_data2(newtrain)
  395. test_sh=shuffle_data2(newtest)
  396. Xtrain,Ytrain,Ytrain2=xy(train_sh)
  397. Xtest,Ytest,Ytest2=xy(test_sh)
  398. Xsc,scaler=scaler_train(Xtrain)
  399. Xtsc=scaler_test(Xtest,scaler)
  400. indextr=make_index(train_sh)
  401. indexte=make_index(test_sh)
  402. Xwin,Ywin=create_win_train(Xsc,Ytrain2,indextr,time_steps=time_steps)
  403. Xtwin,Ytwin,conf=create_win_test(Xtsc,Ytest2,test_sh,indexte,time_steps=time_steps)
  404. model=modelGRU(time_steps=time_steps,nbr_features=nbr_features,nbr_neurons=nbr_neurons,nbr_class=nbr_class,Xwin=Xwin,Ywin=Ywin,
  405. Xtwin=Xtwin,Ytwin=Ytwin,batch_size=batch_size,epochs=epochs,dropout=dropout,lr=lr,activation=activation,
  406. loss=loss,metrics='accuracy')
  407. loss,acc=model.evaluate(Xtwin,Ytwin)
  408. return scaler,model,acc