Trunaway.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. from sklearn.preprocessing import StandardScaler
  2. import keras
  3. import os
  4. import pandas as pd
  5. import numpy as np
  6. from LIB.BACKEND import DataPreProcess
  7. import datetime
  8. #数据预处理
  9. #删除采样异常点
  10. def delete(data_bms):
  11. listV=[s for s in list(data_bms) if '单体电压' in s]
  12. listT=[s for s in list(data_bms) if '单体温度' in s]
  13. listT2=[s for s in list(data_bms) if '其他温度' in s]
  14. #data_bms2=data_bms.copy()
  15. for i in range(1,len(listV)+1):
  16. data_bms=data_bms[(data_bms['单体电压'+str(i)]>1000) & (data_bms['单体电压'+str(i)]<6000)]
  17. for i in range(1,len(listT)+1):
  18. data_bms=data_bms[(data_bms['单体温度'+str(i)]>-20) & (data_bms['单体温度'+str(i)]<100)]
  19. #for i in range(1,len(listT2)+1):
  20. #data_bms=data_bms[(data_bms['其他温度'+str(1)]>-20) & (data_bms['其他温度'+str(1)]<100)]
  21. #data_outliers=data_bms2.iloc[list(set(list(data_bms2.index)).difference(set(list(data_bms.index))))]
  22. data_bms=data_bms.reset_index(drop=True)
  23. return data_bms
  24. #构建时间序列&选取静置状态
  25. def data_groups(data_bms,sn,start_time,end_time):
  26. data_bms=data_bms.drop(['GSM信号','外电压','开关状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
  27. data_set=pd.DataFrame()
  28. start_time=start_time[:17]+'00'
  29. end_time=end_time[:17]+'00'
  30. data_set['时间戳'] = pd.date_range(start=start_time, end=end_time, freq='T') #每分钟一条记录
  31. #给数据重建新特征:充放电状态,序列
  32. if len(data_bms['总电流[A]']==0)>0:
  33. if sn[:4] in ['MGMC','UD02']:
  34. #data_bms=rest_stscs_v1.cell_statistic.rest_sta(data_bms)
  35. data_bms=DataPreProcess.DataPreProcess.data_split_by_status_forMGMCUD02(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
  36. else:
  37. data_bms=DataPreProcess.DataPreProcess.data_split_by_status(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
  38. else:
  39. data_bms['data_split_by_status']=1
  40. data_bms['data_status']='work'
  41. #构建等差时间序列
  42. data_bms['时间戳']=pd.to_datetime(data_bms['时间戳'])
  43. for i in range(len(data_bms)):
  44. data_bms.loc[i,'时间戳'] = data_bms.loc[i,'时间戳'].replace(second=0)
  45. data_bms.drop_duplicates(subset='时间戳',keep='last',inplace=False)
  46. data_bms2=pd.merge(data_set,data_bms,on='时间戳',how='left')
  47. data_bms2=data_bms2.fillna(method='ffill')
  48. data_bms2=data_bms2.fillna(method='bfill')
  49. data_bms2.drop_duplicates(subset='时间戳',keep='last',inplace=True)
  50. data_bms2=data_bms2.reset_index()
  51. #删除无用特征
  52. data_bms2=data_bms2.drop(['Unnamed: 0','level_0','index','Unnamed: 0.1','充电状态','data_split_by_crnt'],axis=1,errors='ignore')
  53. #按状态分表
  54. data_stand=data_bms2[data_bms2['data_status']=='stand']
  55. return data_stand
  56. #标记时段
  57. def split(data0):
  58. data0=data0.reset_index(drop=True)
  59. data0=data0.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1,errors='ignore')
  60. data0['n_split']=np.nan
  61. data1=data0.copy()
  62. data1.drop_duplicates(subset=['data_split_by_status'],keep='first',inplace=True)
  63. data1['n_split']=range(1,len(data1)+1)
  64. data0.loc[data1.index,'n_split']=list(data1['n_split'])
  65. data0['n_split']=list(data0['n_split'].fillna(method='ffill'))
  66. time=list(map(lambda x: str(x),list(data0['时间戳'])))
  67. data0['时间戳']=time
  68. return data0
  69. ####################################################################################################################
  70. #每10min一条记录:平均
  71. def create_dataset(data_set): #X为dataframe,y为serie
  72. data_set=data_set.drop(['总电流[A]','SOH[%]','data_status','data_split_by_status'],axis=1,errors='ignore')
  73. time=list(map(lambda x: x[:15]+'0'+x[16:],list(data_set['时间戳'])))
  74. data_set['时间戳']=time
  75. List_n_split=sorted(list(set(data_set['n_split'])))
  76. data_set2=pd.DataFrame()
  77. for k in List_n_split:
  78. dataset=data_set[data_set['n_split']==k]
  79. if len(dataset)>10:
  80. dataset=dataset.reset_index(drop=True)
  81. sn=list(dataset['sn'].values)[0]
  82. dataset=dataset.drop(['sn','n_split'],axis=1)
  83. dataset2=dataset.groupby(dataset['时间戳']).mean()
  84. dataset2=dataset2.reset_index()
  85. dataset2['sn']=sn
  86. dataset2['n_split']=k
  87. data_set2=data_set2.append(dataset2)
  88. return data_set2
  89. # 计算各单体电压下降量
  90. def cal_dataset(df_stand): #X为dataframe,y为serie
  91. List_n_split=sorted(list(set(df_stand['n_split'])))
  92. listV=[s for s in list(df_stand) if '单体电压' in s]
  93. listT=[s for s in list(df_stand) if '温度' in s]
  94. newdataset=pd.DataFrame()
  95. for k in List_n_split:
  96. dataset=df_stand[df_stand['n_split']==k]
  97. dataset=dataset.reset_index(drop=True)
  98. dataset2=dataset[listV]
  99. dataset3=dataset2.diff() #periods=1, axis=0
  100. dataset3['最大电压下降']=dataset3[listV].min(axis=1)
  101. dataset3['平均电压下降']=dataset3[listV].mean(axis=1)
  102. dataset3['电压下降低偏']=dataset3[listV].mean(axis=1)-dataset3[listV].min(axis=1)
  103. dataset3=dataset3.drop(listV+['平均电压下降'],axis=1)
  104. dataset4=dataset.drop(listT+listV+['总电压[V]'],axis=1)
  105. dataset5=pd.merge(dataset4,dataset3,left_index=True,right_index=True)
  106. dataset5=dataset5.dropna(axis=0)
  107. newdataset=newdataset.append(dataset5)
  108. return newdataset
  109. #每1hour一条记录:总和
  110. def timeserie(data_set): #X为dataframe,y为serie
  111. List_n_split=sorted(list(set(data_set['n_split'])))
  112. time=list(map(lambda x: x[:14]+'00'+x[16:],list(data_set['时间戳'])))
  113. data_set['时间戳']=time
  114. data_set2=pd.DataFrame()
  115. for k in List_n_split:
  116. dataset=data_set[data_set['n_split']==k]
  117. if len(dataset)>10:
  118. dataset=dataset.reset_index(drop=True)
  119. sn=list(dataset['sn'].values)[0]
  120. soc=list(dataset['SOC[%]'].values)[0]
  121. dataset=dataset.drop(['sn','n_split'],axis=1)
  122. dataset2=dataset.groupby(dataset['时间戳']).sum()
  123. dataset2=dataset2.reset_index()
  124. dataset2['sn']=sn
  125. dataset2['n_split']=k
  126. dataset2['SOC[%]']=soc
  127. data_set2=data_set2.append(dataset2)
  128. return data_set2
  129. def makescaler_test(scaler,data_test):
  130. data_test=data_test.reset_index(drop=True)
  131. data_test_pro=data_test.drop(['n_split','时间戳','sn','SOC[%]'],axis=1)
  132. test_sc=scaler.transform(np.array(data_test_pro))
  133. test_sc=pd.DataFrame(test_sc)
  134. test_sc['n_split']=data_test['n_split'].values
  135. return test_sc
  136. #滑窗
  137. def create_win(data_set,data_train,time_steps=5): #X为dataframe,y为serie
  138. a,b=[],[]
  139. index=pd.DataFrame()
  140. List_n_split=sorted(list(set(data_set['n_split'])))
  141. for k in List_n_split:
  142. dataset=data_set[data_set['n_split']==k]
  143. datatrain=data_train[data_train['n_split']==k]
  144. if len(dataset)>time_steps:
  145. dataset2=dataset.reset_index(drop=True)
  146. dataset=dataset.drop(['n_split'],axis=1)
  147. dataX, dataY = [], []
  148. index_step=[]
  149. for i in range(len(dataset)-time_steps):
  150. v1 = dataset.iloc[i:(i+time_steps)].values
  151. v2 = dataset.iloc[i+time_steps]
  152. dataX.append(v1)
  153. dataY.append(v2)
  154. index_step.append(i)
  155. dataset3=dataset2.iloc[:len(dataset2)-time_steps]
  156. newdatatrain=datatrain[:len(dataset3)]
  157. newdatatrain2=newdatatrain.copy()
  158. newdatatrain2['window_step']=index_step
  159. dataX2=np.array(dataX)
  160. dataY2=np.array(dataY)
  161. a.append(dataX2)
  162. b.append(dataY2)
  163. index=index.append(newdatatrain2)
  164. aa=np.vstack(a)
  165. bb=np.vstack(b)
  166. return aa,bb,index
  167. def pred(Test,model):
  168. test_pred = model.predict(Test)
  169. test_loss = np.mean(np.abs(test_pred - Test), axis=1)
  170. return test_loss
  171. def ref(test_loss,new_test):
  172. test_loss_sum=test_loss.sum(axis=1)
  173. test_loss_max=test_loss.max(axis=1)
  174. ref_test=new_test.reset_index(drop=True)
  175. ref_test['test_loss_sum']=test_loss_sum
  176. ref_test['test_loss_max']=test_loss_max
  177. ref_test['test_loss压差']=test_loss[:,0]
  178. ref_test['test_loss降幅']=test_loss[:,1]
  179. ref_test['test_loss降差']=test_loss[:,2]
  180. return ref_test
  181. def difftime(delta):
  182. seconds = delta.total_seconds()
  183. minutes = seconds/60
  184. return minutes
  185. def diffmin(res):
  186. start=list(res['start_time'])
  187. end=list(res['end_time'])
  188. start=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),start))
  189. end=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),end))
  190. diff=np.array(end)-np.array(start)
  191. diff_min=list(map(lambda x: difftime(x),diff))
  192. return diff_min
  193. def res_output(TestOrg,scaler,model,group,end_time):
  194. df_res=pd.DataFrame(columns=['product_id', 'start_time', 'end_time', 'diff_min','soc','loss_sum','loss_max','diffV','downV','diffdownV','window_step'])
  195. diff=0
  196. test2=create_dataset(TestOrg)
  197. test3=cal_dataset(test2)
  198. newtest=timeserie(test3)
  199. if len(newtest)>0:
  200. test_sc=makescaler_test(scaler,newtest)
  201. Test,y_test,win_test=create_win(test_sc,newtest,time_steps=3)
  202. test_loss=pred(Test,model)
  203. ref_test=ref(test_loss,win_test)
  204. ref_test['test_loss_diff']=list(map(lambda x: x[0]-x[1], zip(list(ref_test['test_loss_sum']), list(ref_test['test_loss_max']))))
  205. if group=='MGMCL':
  206. res=ref_test[(ref_test['test_loss_max']>0.04) & (ref_test['SOC[%]']>15) & (ref_test['test_loss_sum']>0.06) & (ref_test['window_step']>0) & (ref_test['最大电压下降']<-3)]
  207. elif group=='PK504':
  208. res=ref_test[(ref_test['test_loss_diff']>0.03) & (ref_test['test_loss_max']>0.03) & (ref_test['SOC[%]']>15) & (ref_test['window_step']>0) & (ref_test['最大电压下降']<-3) &((ref_test['test_loss_sum']>3) | (ref_test['SOC[%]']<90))]
  209. else:
  210. res=ref_test[(ref_test['test_loss_diff']>0.6) & (ref_test['test_loss_max']>0.6) & (ref_test['SOC[%]']>15) & (ref_test['window_step']>0) & (ref_test['电压下降低偏']>3.5) &((ref_test['test_loss_sum']>3) | (ref_test['SOC[%]']<90))]
  211. if len(res)>0:
  212. res=res.reset_index()
  213. for k in range(len(res)):
  214. if res.loc[k,'最大电压下降']<-130:
  215. sn=res.loc[k,'sn']
  216. win=res.loc[k,'window_step']
  217. index = res[(res["sn"]== sn)&(res["window_step"]== win)].index.tolist()[0]
  218. res=res.drop([index-2,index-1,index],errors='ignore')
  219. if len(res)>0:
  220. maxsum=list(res['test_loss_sum'].groupby(res['n_split']).max())
  221. maxmax=list(res['test_loss_max'].groupby(res['n_split']).max())
  222. res_start=res.drop_duplicates(subset=['n_split'],keep='first',inplace=False)
  223. res_end=res.drop_duplicates(subset=['n_split'],keep='last',inplace=False)
  224. start=list(map(lambda x:str(x),list(res_start['时间戳'].values)))
  225. end=list(map(lambda x:str(x),list(res_end['时间戳'].values)))
  226. product_id=list(res_start['sn'].values)
  227. df_res['product_id']=product_id
  228. df_res['start_time']=start
  229. df_res['end_time']=end
  230. df_res['loss_sum']=list(map(lambda x:round(x,3),maxsum))
  231. df_res['loss_max']=list(map(lambda x:round(x,3),maxmax))
  232. soc=list(res_start['SOC[%]'].values)
  233. df_res['SOC']=soc
  234. df_res['diffV']=list(res_start['单体压差'].values)
  235. df_res['downV']=list(res_start['最大电压下降'].values)
  236. df_res['diffdownV']=list(res_start['电压下降低偏'].values)
  237. #df_res['window_step']=list(res_start['window_step'].values)
  238. diff_min=diffmin(df_res)
  239. df_res['diff_min']=diff_min
  240. df_res.reset_index(drop=True,inplace=True)
  241. end=datetime.datetime.strptime(str(df_res.loc[len(df_res)-1,'end_time']),'%Y-%m-%d %H:%M:%S')
  242. end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S')
  243. diff=(end_time-end).total_seconds()
  244. if diff<600:
  245. df_res.loc[len(df_res)-1,'end_time']='0000-00-00 00:00:00'
  246. return df_res,diff
  247. ##################################################################################################################
  248. def arrange(result,result_final,start_time,diff):
  249. result=result.reset_index(drop=True)
  250. start=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S')
  251. start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S')
  252. diff_time=(start-start_time).total_seconds()
  253. if diff_time<600:
  254. result_final['end_time']=result.loc[0,'end_time']
  255. diff_min_org=result_final['diff_min']
  256. diff_min_new=result.loc[0,'diff_min']
  257. result_final['diff_min']=diff_min_org+(diff_time+diff)/60+diff_min_new
  258. result=result.drop(0)
  259. return result,result_final