aelstm.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. import pandas as pd
  2. import numpy as np
  3. import datetime
  4. from sklearn.preprocessing import StandardScaler
  5. import tensorflow.keras as keras
  6. from LIB.BACKEND import DataPreProcess
  7. def data_groups(data_bms,sn,start_time,end_time):
  8. data_bms=data_bms.drop(['GSM信号','外电压','开关状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
  9. data_set=pd.DataFrame()
  10. data_set['时间戳'] = pd.date_range(start=start_time, end=end_time, freq='T') #每分钟一条记录
  11. for i in range(len(data_set)):
  12. data_set.loc[i,'时间戳'] = data_set.loc[i,'时间戳'].replace(second=0)
  13. #给数据重建新特征:充放电状态,序列
  14. if len(data_bms['总电流[A]']==0)>0:
  15. if sn[:4] in ['MGMC','UD02']:
  16. data_bms=DataPreProcess.DataPreProcess.data_split_by_status_forMGMCUD02(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
  17. else:
  18. data_bms=DataPreProcess.DataPreProcess.data_split_by_status(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
  19. else:
  20. data_bms['data_split_by_status']=1
  21. data_bms['data_status']='work'
  22. #构建等差时间序列
  23. data_bms['时间戳']=pd.to_datetime(data_bms['时间戳'])
  24. for i in range(len(data_bms)):
  25. data_bms.loc[i,'时间戳'] = data_bms.loc[i,'时间戳'].replace(second=0)
  26. data_bms.drop_duplicates(subset='时间戳',keep='last',inplace=False)
  27. data_bms2=pd.merge(data_set,data_bms,on='时间戳',how='left')
  28. data_bms2=data_bms2.fillna(method='ffill')
  29. data_bms2=data_bms2.fillna(method='bfill')
  30. data_bms2.drop_duplicates(subset='时间戳',keep='last',inplace=True)
  31. data_bms2=data_bms2.reset_index()
  32. #删除无用特征
  33. data_bms2=data_bms2.drop(['Unnamed: 0','level_0','index','Unnamed: 0.1','充电状态','data_split_by_crnt'],axis=1,errors='ignore')
  34. #按状态分表
  35. data_stand=data_bms2[data_bms2['data_status']=='stand']
  36. return data_stand
  37. def split(data0):
  38. data0=data0.reset_index(drop=True)
  39. data0=data0.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1,errors='ignore')
  40. data0['n_split']=np.nan
  41. data1=data0.copy()
  42. data1.drop_duplicates(subset=['data_split_by_status'],keep='first',inplace=True)
  43. data1['n_split']=range(1,len(data1)+1)
  44. data0.loc[data1.index,'n_split']=list(data1['n_split'])
  45. data0['n_split']=list(data0['n_split'].fillna(method='ffill'))
  46. return data0
  47. #特征工程I
  48. def makedataset(data_set):
  49. listT1=[s for s in list(data_set) if '单体温度' in s]
  50. listT2=[s for s in list(data_set) if '其他温度' in s]
  51. data_set=data_set.drop(["单体温度"+str(i) for i in range(1,len(listT1)+1)],axis=1)
  52. data_set=data_set.drop(["其他温度"+str(i) for i in range(1,len(listT2)+1)],axis=1)
  53. data_set=data_set.drop(['单体压差'],axis=1)
  54. return data_set
  55. #特征工程II
  56. def makedataset2(data_set):
  57. listV=[s for s in list(data_set) if '单体电压' in s]
  58. data_set=data_set.drop(["单体电压"+str(i) for i in range(1,len(listV)+1)],axis=1)
  59. data_set=data_set.drop(['总电压[V]','单体压差','SOC[%]','其他温度3'],axis=1,errors='ignore')
  60. return data_set
  61. def makescaler_test(scaler,data_test):
  62. data_test=data_test.reset_index(drop=True)
  63. data_test_pro=data_test.drop(['时间戳','sn'],axis=1)
  64. test_sc=data_test_pro.drop('n_split',axis=1)
  65. test_sc=scaler.transform(np.array(test_sc))
  66. test_sc=pd.DataFrame(test_sc)
  67. test_sc['n_split']=data_test_pro['n_split'].values
  68. return test_sc
  69. #滑窗
  70. def create_dataset(data_set,data_train,time_steps=5): #X为dataframe,y为serie
  71. a,b=[],[]
  72. index=pd.DataFrame()
  73. List_n_split=sorted(list(set(data_set['n_split'])))
  74. for k in List_n_split:
  75. dataset=data_set[data_set['n_split']==k]
  76. datatrain=data_train[data_train['n_split']==k]
  77. if len(dataset)>time_steps:
  78. dataset2=dataset.reset_index(drop=True)
  79. dataset=dataset.drop(['n_split'],axis=1)
  80. dataX, dataY = [], []
  81. index_step=[]
  82. for i in range(len(dataset)-time_steps):
  83. v1 = dataset.iloc[i:(i+time_steps)].values
  84. v2 = dataset.iloc[i+time_steps]
  85. dataX.append(v1)
  86. dataY.append(v2)
  87. index_step.append(i)
  88. dataset3=dataset2.iloc[:len(dataset2)-time_steps]
  89. newdatatrain=datatrain[:len(dataset3)]
  90. newdatatrain2=newdatatrain.copy()
  91. newdatatrain2['window_step']=index_step
  92. dataX2=np.array(dataX)
  93. dataY2=np.array(dataY)
  94. a.append(dataX2)
  95. b.append(dataY2)
  96. index=index.append(newdatatrain2)
  97. aa=np.vstack(a)
  98. bb=np.vstack(b)
  99. return aa,bb,index
  100. def pred(Test,model):
  101. test_pred = model.predict(Test)
  102. test_loss = np.mean(np.abs(test_pred - Test), axis=1)
  103. return test_loss
  104. def ref(test_loss,new_test):
  105. test_loss_sum=test_loss.sum(axis=1)
  106. test_loss_max=test_loss.max(axis=1)
  107. ref_test=new_test[['n_split','window_step']].reset_index(drop=True)
  108. ref_test['test_loss_sum']=list(map(lambda x: round(x,3),test_loss_sum))
  109. ref_test['test_loss_max']=list(map(lambda x: round(x,3),test_loss_max))
  110. return ref_test
  111. def prediction(df_stand,scaler,scaler2,model,model2):
  112. data_set_test=df_stand.drop(['Unnamed: 0','index','总电流[A]','SOH[%]','data_split_by_status','data_status'],axis=1,errors='ignore')
  113. dataset1_test=makedataset(data_set_test)
  114. dataset2_test=makedataset2(data_set_test)
  115. test_sc=makescaler_test(scaler,dataset1_test)
  116. test_sc2=makescaler_test(scaler2,dataset2_test)
  117. data_test_int=create_dataset(test_sc,dataset1_test,5)
  118. Test=data_test_int[0]
  119. data_test_int2=create_dataset(test_sc2,dataset2_test,5)
  120. Test2=data_test_int2[0]
  121. new_test=data_test_int[2]
  122. new_test2=data_test_int2[2]
  123. test_loss1=pred(Test,model)
  124. test_loss2=pred(Test2,model2)
  125. ref_test=ref(test_loss1,new_test)
  126. ref_test2=ref(test_loss2,new_test2)
  127. new_test['test_lossV_sum']=list(ref_test['test_loss_sum'])
  128. new_test['test_lossV_max']=list(ref_test['test_loss_max'])
  129. new_test2['test_lossTemp_sum']=list(ref_test2['test_loss_sum'])
  130. new_test2['test_lossTemp_max']=list(ref_test2['test_loss_max'])
  131. res_test=pd.merge(new_test, new_test2, left_index=True, right_index=True,suffixes=('', '_y'))
  132. res_test=res_test.drop(['sn_y','n_split_y','window_step_y','时间戳_y'],axis=1)
  133. #根据异常指数设置阈值判定异常
  134. res=res_test[(res_test['test_lossTemp_sum']>5) | (res_test['test_lossV_sum']>10) | (res_test['test_lossV_max']>4) | (res_test['test_lossTemp_max']>2)]
  135. return res
  136. def makeres1(res):
  137. df_res=pd.DataFrame(columns=['product_id','n_split','AnoScoreV_sum_max','AnoScoreV_max_max','AnoScoreT_sum_max','AnoScoreT_max_max'])
  138. maxVsum=list(res['test_lossV_sum'].groupby(res['n_split']).max())
  139. maxTsum=list(res['test_lossTemp_sum'].groupby(res['n_split']).max())
  140. maxTmax=list(res['test_lossTemp_max'].groupby(res['n_split']).max())
  141. maxVmax=list(res['test_lossV_max'].groupby(res['n_split']).max())
  142. df_res['n_split']=list(res['test_lossV_sum'].groupby(res['n_split']).max().index)
  143. sn= list(map(lambda x: list(res[res['n_split']==x]['sn'])[0], list(df_res['n_split'].values)))
  144. df_res['product_id']=sn
  145. df_res['AnoScoreV_sum_max']=maxVsum
  146. df_res['AnoScoreV_max_max']=maxVmax
  147. df_res['AnoScoreT_sum_max']=maxTsum
  148. df_res['AnoScoreT_max_max']=maxTmax
  149. listT2=[s for s in list(res) if '其他温度' in s]
  150. if len(listT2)>0:
  151. for k in listT2:
  152. temp=list(res[k].groupby(res['n_split']).max())
  153. df_res[k]=temp
  154. df_res['最大其他温度']= df_res[[k for k in listT2]].max(axis=1)
  155. df_res=df_res.drop([k for k in listT2],axis=1)
  156. return df_res
  157. def makeres2(res):
  158. df_res=pd.DataFrame(columns=['start_time','end_time','product_id','n_split','code','level','SOC[%]','AnoScoreV_sum_start','AnoScoreT_sum_start','AnoScoreV_sum_end','AnoScoreT_sum_end','AnoScoreV_max_start','AnoScoreT_max_start','AnoScoreV_max_end','AnoScoreT_max_end','info','advice'])
  159. res_start=res.drop_duplicates(subset=['n_split'],keep='first',inplace=False)
  160. res_end=res.drop_duplicates(subset=['n_split'],keep='last',inplace=False)
  161. start=list(res_start['时间戳'].values)
  162. end=list(res_end['时间戳'].values)
  163. product_id=list(res_start['sn'].values)
  164. soc=list(res_start['SOC[%]'].values)
  165. AnoScoreV_sum_start=list(res_start['test_lossV_sum'].values)
  166. AnoScoreT_sum_start=list(res_start['test_lossTemp_sum'].values)
  167. AnoScoreV_sum_end=list(res_end['test_lossV_sum'].values)
  168. AnoScoreT_sum_end=list(res_end['test_lossTemp_sum'].values)
  169. AnoScoreV_max_start=list(res_start['test_lossV_max'].values)
  170. AnoScoreT_max_start=list(res_start['test_lossTemp_max'].values)
  171. AnoScoreV_max_end=list(res_end['test_lossV_max'].values)
  172. AnoScoreT_max_end=list(res_end['test_lossTemp_max'].values)
  173. df_res['n_split']=list(res['test_lossV_sum'].groupby(res['n_split']).max().index)
  174. df_res['start_time']=start
  175. df_res['end_time']=end
  176. df_res['product_id']=product_id
  177. df_res['SOC[%]']=soc
  178. df_res['AnoScoreV_sum_start']=AnoScoreV_sum_start
  179. df_res['AnoScoreT_sum_start']=AnoScoreT_sum_start
  180. df_res['AnoScoreV_sum_end']=AnoScoreV_sum_end
  181. df_res['AnoScoreT_sum_end']=AnoScoreT_sum_end
  182. df_res['AnoScoreV_max_start']=AnoScoreV_max_start
  183. df_res['AnoScoreT_max_start']=AnoScoreT_max_start
  184. df_res['AnoScoreV_max_end']=AnoScoreV_max_end
  185. df_res['AnoScoreT_max_end']=AnoScoreT_max_end
  186. return df_res
  187. def difftime(delta):
  188. seconds = delta.total_seconds()
  189. minutes = seconds/60
  190. return minutes
  191. def diffmin(res):
  192. start=list(res['start_time'])
  193. end=list(res['end_time'])
  194. start=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),start))
  195. end=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),end))
  196. diff=np.array(end)-np.array(start)
  197. diff_min=list(map(lambda x: difftime(x),diff))
  198. return diff_min
  199. def makeres(res,end_time):
  200. df_res1=makeres1(res)
  201. df_res2=makeres2(res)
  202. df_res=pd.merge(df_res1,df_res2,left_on='n_split', right_on='n_split')
  203. diff_min=diffmin(df_res)
  204. df_res['diff_min']=diff_min
  205. df_res.reset_index(drop=True,inplace=True)
  206. end=datetime.datetime.strptime(str(df_res.loc[len(df_res)-1,'end_time']),'%Y-%m-%d %H:%M:%S')
  207. end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S')
  208. diff=(end_time-end).total_seconds()
  209. if diff<600:
  210. df_res.loc[len(df_res)-1,'end_time']='0000-00-00 00:00:00'
  211. return df_res,diff
  212. def threshold(res,group,end_time):
  213. df_res,diff=makeres(res,end_time)
  214. #删除SOC过低导致的欠压
  215. df_res=df_res[(df_res['diff_min']>60) | (df_res['SOC[%]']>10) | (df_res['AnoScoreT_sum_max']>5) | (df_res['AnoScoreV_sum_max']>50) | (df_res['AnoScoreV_max_max']>9) | (df_res['AnoScoreT_max_max']>2)]
  216. #删除PK系列其他温度非故障升高
  217. if group in ['PK504','PK502','PK500']:
  218. df_res=df_res[(df_res['diff_min']>20) | (df_res['最大其他温度']>80) |(df_res['AnoScoreT_sum_max']>15) | (df_res['AnoScoreV_sum_max']>10) | (df_res['AnoScoreV_max_max']>4) | (df_res['AnoScoreT_max_max']>8)]
  219. #删除PK504满充导致的过压
  220. if group=='PK504':
  221. df_res=df_res[((df_res['diff_min']>10) & (df_res['AnoScoreV_sum_max']>35)) | (df_res['SOC[%]']<93)| (df_res['AnoScoreT_sum_max']>5) | (df_res['AnoScoreV_max_max']>6) | (df_res['AnoScoreT_max_max']>2)]
  222. df_res=df_res.drop(['n_split','product_id_y','AnoScoreV_sum_start','AnoScoreV_max_start','AnoScoreT_sum_start','AnoScoreT_max_start','AnoScoreV_sum_end','AnoScoreT_sum_end','AnoScoreT_max_end','AnoScoreV_max_end','最大其他温度'],axis=1,errors='ignore')
  223. df_res=df_res.rename(columns = {"product_id_x": "product_id"})
  224. df_res=df_res.rename(columns = {"SOC[%]": "SOC"})
  225. df_res2=df_res[['product_id','start_time','end_time','diff_min','SOC','AnoScoreV_sum_max','AnoScoreV_max_max','AnoScoreT_sum_max','AnoScoreT_max_max']]
  226. df_res2['start_time']=list(map(lambda x:str(x),list(df_res2['start_time'])))
  227. df_res2['end_time']=list(map(lambda x:str(x),list(df_res2['end_time'])))
  228. return df_res2,diff
  229. def arrange(result,result_final,start_time,diff):
  230. result=result.reset_index(drop=True)
  231. start=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S')
  232. start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S')
  233. diff_time=(start-start_time).total_seconds()
  234. if diff_time<600:
  235. result_final['end_time']=result.loc[0,'end_time']
  236. diff_min_org=result_final['diff_min']
  237. diff_min_new=result.loc[0,'diff_min']
  238. result_final['diff_min']=diff_min_org+diff_time+diff+diff_min_new
  239. result=result.drop(0)
  240. return result,result_final