|
@@ -0,0 +1,260 @@
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import datetime
|
|
|
+from sklearn.preprocessing import StandardScaler
|
|
|
+import tensorflow.keras as keras
|
|
|
+from LIB.BACKEND import DataPreProcess
|
|
|
+
|
|
|
+def data_groups(data_bms,sn,start_time,end_time):
|
|
|
+ data_bms=data_bms.drop(['GSM信号','外电压','开关状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
|
|
|
+ data_set=pd.DataFrame()
|
|
|
+ data_set['时间戳'] = pd.date_range(start=start_time, end=end_time, freq='T') #每分钟一条记录
|
|
|
+ for i in range(len(data_set)):
|
|
|
+ data_set.loc[i,'时间戳'] = data_set.loc[i,'时间戳'].replace(second=0)
|
|
|
+ #给数据重建新特征:充放电状态,序列
|
|
|
+ if len(data_bms['总电流[A]']==0)>0:
|
|
|
+ if sn[:4] in ['MGMC','UD02']:
|
|
|
+ data_bms=DataPreProcess.DataPreProcess.data_split_by_status_forMGMCUD02(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
|
|
|
+ else:
|
|
|
+ data_bms=DataPreProcess.DataPreProcess.data_split_by_status(DataPreProcess, data_bms, drive_interval_threshold=120, charge_interval_threshold=300,drive_stand_threshold=120, charge_stand_threshold=300)
|
|
|
+ else:
|
|
|
+ data_bms['data_split_by_status']=1
|
|
|
+ data_bms['data_status']='work'
|
|
|
+ #构建等差时间序列
|
|
|
+ data_bms['时间戳']=pd.to_datetime(data_bms['时间戳'])
|
|
|
+ for i in range(len(data_bms)):
|
|
|
+ data_bms.loc[i,'时间戳'] = data_bms.loc[i,'时间戳'].replace(second=0)
|
|
|
+ data_bms.drop_duplicates(subset='时间戳',keep='last',inplace=False)
|
|
|
+ data_bms2=pd.merge(data_set,data_bms,on='时间戳',how='left')
|
|
|
+ data_bms2=data_bms2.fillna(method='ffill')
|
|
|
+ data_bms2=data_bms2.fillna(method='bfill')
|
|
|
+ data_bms2.drop_duplicates(subset='时间戳',keep='last',inplace=True)
|
|
|
+ data_bms2=data_bms2.reset_index()
|
|
|
+ #删除无用特征
|
|
|
+ data_bms2=data_bms2.drop(['Unnamed: 0','level_0','index','Unnamed: 0.1','充电状态','data_split_by_crnt'],axis=1,errors='ignore')
|
|
|
+ #按状态分表
|
|
|
+ data_stand=data_bms2[data_bms2['data_status']=='stand']
|
|
|
+ return data_stand
|
|
|
+
|
|
|
+def split(data0):
|
|
|
+ data0=data0.reset_index(drop=True)
|
|
|
+ data0=data0.drop(['Unnamed: 0','Unnamed: 0.1'],axis=1,errors='ignore')
|
|
|
+ data0['n_split']=np.nan
|
|
|
+ data1=data0.copy()
|
|
|
+ data1.drop_duplicates(subset=['data_split_by_status'],keep='first',inplace=True)
|
|
|
+ data1['n_split']=range(1,len(data1)+1)
|
|
|
+ data0.loc[data1.index,'n_split']=list(data1['n_split'])
|
|
|
+ data0['n_split']=list(data0['n_split'].fillna(method='ffill'))
|
|
|
+ return data0
|
|
|
+
|
|
|
+#特征工程I
|
|
|
+def makedataset(data_set):
|
|
|
+ listT1=[s for s in list(data_set) if '单体温度' in s]
|
|
|
+ listT2=[s for s in list(data_set) if '其他温度' in s]
|
|
|
+ data_set=data_set.drop(["单体温度"+str(i) for i in range(1,len(listT1)+1)],axis=1)
|
|
|
+ data_set=data_set.drop(["其他温度"+str(i) for i in range(1,len(listT2)+1)],axis=1)
|
|
|
+ data_set=data_set.drop(['单体压差'],axis=1)
|
|
|
+ return data_set
|
|
|
+#特征工程II
|
|
|
+def makedataset2(data_set):
|
|
|
+ listV=[s for s in list(data_set) if '单体电压' in s]
|
|
|
+ data_set=data_set.drop(["单体电压"+str(i) for i in range(1,len(listV)+1)],axis=1)
|
|
|
+ data_set=data_set.drop(['总电压[V]','单体压差','SOC[%]','其他温度3'],axis=1,errors='ignore')
|
|
|
+ return data_set
|
|
|
+
|
|
|
+def makescaler_test(scaler,data_test):
|
|
|
+ data_test=data_test.reset_index(drop=True)
|
|
|
+ data_test_pro=data_test.drop(['时间戳','sn'],axis=1)
|
|
|
+ test_sc=data_test_pro.drop('n_split',axis=1)
|
|
|
+ test_sc=scaler.transform(np.array(test_sc))
|
|
|
+ test_sc=pd.DataFrame(test_sc)
|
|
|
+ test_sc['n_split']=data_test_pro['n_split'].values
|
|
|
+ return test_sc
|
|
|
+
|
|
|
+#滑窗
|
|
|
+def create_dataset(data_set,data_train,time_steps=5): #X为dataframe,y为serie
|
|
|
+ a,b=[],[]
|
|
|
+ index=pd.DataFrame()
|
|
|
+ List_n_split=sorted(list(set(data_set['n_split'])))
|
|
|
+ for k in List_n_split:
|
|
|
+ dataset=data_set[data_set['n_split']==k]
|
|
|
+ datatrain=data_train[data_train['n_split']==k]
|
|
|
+ if len(dataset)>time_steps:
|
|
|
+ dataset2=dataset.reset_index(drop=True)
|
|
|
+ dataset=dataset.drop(['n_split'],axis=1)
|
|
|
+ dataX, dataY = [], []
|
|
|
+ index_step=[]
|
|
|
+ for i in range(len(dataset)-time_steps):
|
|
|
+ v1 = dataset.iloc[i:(i+time_steps)].values
|
|
|
+ v2 = dataset.iloc[i+time_steps]
|
|
|
+ dataX.append(v1)
|
|
|
+ dataY.append(v2)
|
|
|
+ index_step.append(i)
|
|
|
+ dataset3=dataset2.iloc[:len(dataset2)-time_steps]
|
|
|
+ newdatatrain=datatrain[:len(dataset3)]
|
|
|
+ newdatatrain2=newdatatrain.copy()
|
|
|
+ newdatatrain2['window_step']=index_step
|
|
|
+ dataX2=np.array(dataX)
|
|
|
+ dataY2=np.array(dataY)
|
|
|
+ a.append(dataX2)
|
|
|
+ b.append(dataY2)
|
|
|
+ index=index.append(newdatatrain2)
|
|
|
+ aa=np.vstack(a)
|
|
|
+ bb=np.vstack(b)
|
|
|
+ return aa,bb,index
|
|
|
+
|
|
|
+def pred(Test,model):
|
|
|
+ test_pred = model.predict(Test)
|
|
|
+ test_loss = np.mean(np.abs(test_pred - Test), axis=1)
|
|
|
+ return test_loss
|
|
|
+
|
|
|
+def ref(test_loss,new_test):
|
|
|
+ test_loss_sum=test_loss.sum(axis=1)
|
|
|
+ test_loss_max=test_loss.max(axis=1)
|
|
|
+ ref_test=new_test[['n_split','window_step']].reset_index(drop=True)
|
|
|
+ ref_test['test_loss_sum']=test_loss_sum
|
|
|
+ ref_test['test_loss_max']=test_loss_max
|
|
|
+ return ref_test
|
|
|
+
|
|
|
+def prediction(df_stand,scaler,scaler2,model,model2):
|
|
|
+ data_set_test=df_stand.drop(['Unnamed: 0','index','总电流[A]','SOH[%]','data_split_by_status','data_status'],axis=1,errors='ignore')
|
|
|
+ dataset1_test=makedataset(data_set_test)
|
|
|
+ dataset2_test=makedataset2(data_set_test)
|
|
|
+ test_sc=makescaler_test(scaler,dataset1_test)
|
|
|
+ test_sc2=makescaler_test(scaler2,dataset2_test)
|
|
|
+ data_test_int=create_dataset(test_sc,dataset1_test,5)
|
|
|
+ Test=data_test_int[0]
|
|
|
+ data_test_int2=create_dataset(test_sc2,dataset2_test,5)
|
|
|
+ Test2=data_test_int2[0]
|
|
|
+ new_test=data_test_int[2]
|
|
|
+ new_test2=data_test_int2[2]
|
|
|
+ test_loss1=pred(Test,model)
|
|
|
+ test_loss2=pred(Test2,model2)
|
|
|
+ ref_test=ref(test_loss1,new_test)
|
|
|
+ ref_test2=ref(test_loss2,new_test2)
|
|
|
+ new_test['test_lossV_sum']=list(ref_test['test_loss_sum'])
|
|
|
+ new_test['test_lossV_max']=list(ref_test['test_loss_max'])
|
|
|
+ new_test2['test_lossTemp_sum']=list(ref_test2['test_loss_sum'])
|
|
|
+ new_test2['test_lossTemp_max']=list(ref_test2['test_loss_max'])
|
|
|
+ res_test=pd.merge(new_test, new_test2, left_index=True, right_index=True,suffixes=('', '_y'))
|
|
|
+ res_test=res_test.drop(['sn_y','n_split_y','window_step_y','时间戳_y'],axis=1)
|
|
|
+ #根据异常指数设置阈值判定异常
|
|
|
+ res=res_test[(res_test['test_lossTemp_sum']>5) | (res_test['test_lossV_sum']>10) | (res_test['test_lossV_max']>4) | (res_test['test_lossTemp_max']>2)]
|
|
|
+ return res
|
|
|
+
|
|
|
+def makeres1(res):
|
|
|
+ df_res=pd.DataFrame(columns=['product_id','n_split','AnoScoreV_sum_max','AnoScoreV_max_max','AnoScoreT_sum_max','AnoScoreT_max_max'])
|
|
|
+ maxVsum=list(res['test_lossV_sum'].groupby(res['n_split']).max())
|
|
|
+ maxTsum=list(res['test_lossTemp_sum'].groupby(res['n_split']).max())
|
|
|
+ maxTmax=list(res['test_lossTemp_max'].groupby(res['n_split']).max())
|
|
|
+ maxVmax=list(res['test_lossV_max'].groupby(res['n_split']).max())
|
|
|
+ df_res['n_split']=list(res['test_lossV_sum'].groupby(res['n_split']).max().index)
|
|
|
+ sn= list(map(lambda x: list(res[res['n_split']==x]['sn'])[0], list(df_res['n_split'].values)))
|
|
|
+ df_res['product_id']=sn
|
|
|
+ df_res['AnoScoreV_sum_max']=maxVsum
|
|
|
+ df_res['AnoScoreV_max_max']=maxVmax
|
|
|
+ df_res['AnoScoreT_sum_max']=maxTsum
|
|
|
+ df_res['AnoScoreT_max_max']=maxTmax
|
|
|
+ listT2=[s for s in list(res) if '其他温度' in s]
|
|
|
+ if len(listT2)>0:
|
|
|
+ for k in listT2:
|
|
|
+ temp=list(res[k].groupby(res['n_split']).max())
|
|
|
+ df_res[k]=temp
|
|
|
+ df_res['最大其他温度']= df_res[[k for k in listT2]].max(axis=1)
|
|
|
+ df_res=df_res.drop([k for k in listT2],axis=1)
|
|
|
+ return df_res
|
|
|
+
|
|
|
+def makeres2(res):
|
|
|
+ df_res=pd.DataFrame(columns=['start_time','end_time','product_id','n_split','code','level','SOC[%]','AnoScoreV_sum_start','AnoScoreT_sum_start','AnoScoreV_sum_end','AnoScoreT_sum_end','AnoScoreV_max_start','AnoScoreT_max_start','AnoScoreV_max_end','AnoScoreT_max_end','info','advice'])
|
|
|
+ res_start=res.drop_duplicates(subset=['n_split'],keep='first',inplace=False)
|
|
|
+ res_end=res.drop_duplicates(subset=['n_split'],keep='last',inplace=False)
|
|
|
+ start=list(res_start['时间戳'].values)
|
|
|
+ end=list(res_end['时间戳'].values)
|
|
|
+ product_id=list(res_start['sn'].values)
|
|
|
+ soc=list(res_start['SOC[%]'].values)
|
|
|
+ AnoScoreV_sum_start=list(res_start['test_lossV_sum'].values)
|
|
|
+ AnoScoreT_sum_start=list(res_start['test_lossTemp_sum'].values)
|
|
|
+ AnoScoreV_sum_end=list(res_end['test_lossV_sum'].values)
|
|
|
+ AnoScoreT_sum_end=list(res_end['test_lossTemp_sum'].values)
|
|
|
+ AnoScoreV_max_start=list(res_start['test_lossV_max'].values)
|
|
|
+ AnoScoreT_max_start=list(res_start['test_lossTemp_max'].values)
|
|
|
+ AnoScoreV_max_end=list(res_end['test_lossV_max'].values)
|
|
|
+ AnoScoreT_max_end=list(res_end['test_lossTemp_max'].values)
|
|
|
+ df_res['n_split']=list(res['test_lossV_sum'].groupby(res['n_split']).max().index)
|
|
|
+ df_res['start_time']=start
|
|
|
+ df_res['end_time']=end
|
|
|
+ df_res['product_id']=product_id
|
|
|
+ df_res['SOC[%]']=soc
|
|
|
+ df_res['AnoScoreV_sum_start']=AnoScoreV_sum_start
|
|
|
+ df_res['AnoScoreT_sum_start']=AnoScoreT_sum_start
|
|
|
+ df_res['AnoScoreV_sum_end']=AnoScoreV_sum_end
|
|
|
+ df_res['AnoScoreT_sum_end']=AnoScoreT_sum_end
|
|
|
+ df_res['AnoScoreV_max_start']=AnoScoreV_max_start
|
|
|
+ df_res['AnoScoreT_max_start']=AnoScoreT_max_start
|
|
|
+ df_res['AnoScoreV_max_end']=AnoScoreV_max_end
|
|
|
+ df_res['AnoScoreT_max_end']=AnoScoreT_max_end
|
|
|
+ return df_res
|
|
|
+
|
|
|
+def difftime(delta):
|
|
|
+ seconds = delta.total_seconds()
|
|
|
+ minutes = seconds/60
|
|
|
+ return minutes
|
|
|
+
|
|
|
+def diffmin(res):
|
|
|
+ start=list(res['start_time'])
|
|
|
+ end=list(res['end_time'])
|
|
|
+ start=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),start))
|
|
|
+ end=list(map(lambda x: datetime.datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S'),end))
|
|
|
+ diff=np.array(end)-np.array(start)
|
|
|
+ diff_min=list(map(lambda x: difftime(x),diff))
|
|
|
+ return diff_min
|
|
|
+
|
|
|
+def makeres(res,end_time):
|
|
|
+ df_res1=makeres1(res)
|
|
|
+ df_res2=makeres2(res)
|
|
|
+ df_res=pd.merge(df_res1,df_res2,left_on='n_split', right_on='n_split')
|
|
|
+ diff_min=diffmin(df_res)
|
|
|
+ df_res['diff_min']=diff_min
|
|
|
+ df_res.reset_index(drop=True,inplace=True)
|
|
|
+ end=datetime.datetime.strptime(str(df_res.loc[len(df_res)-1,'end_time']),'%Y-%m-%d %H:%M:%S')
|
|
|
+ end_time=datetime.datetime.strptime(str(end_time),'%Y-%m-%d %H:%M:%S')
|
|
|
+ diff=(end_time-end).total_seconds()
|
|
|
+ if diff<600:
|
|
|
+ df_res.loc[len(df_res)-1,'end_time']='0000-00-00 00:00:00'
|
|
|
+ return df_res,diff
|
|
|
+
|
|
|
+def threshold(res,group,end_time):
|
|
|
+ df_res,diff=makeres(res,end_time)
|
|
|
+ #删除SOC过低导致的欠压
|
|
|
+ df_res=df_res[(df_res['diff_min']>60) | (df_res['SOC[%]']>10) | (df_res['AnoScoreT_sum_max']>5) | (df_res['AnoScoreV_sum_max']>50) | (df_res['AnoScoreV_max_max']>9) | (df_res['AnoScoreT_max_max']>2)]
|
|
|
+ #删除PK系列其他温度非故障升高
|
|
|
+ if group in ['PK504','PK502','PK500']:
|
|
|
+ df_res=df_res[(df_res['diff_min']>20) | (df_res['最大其他温度']>80) |(df_res['AnoScoreT_sum_max']>15) | (df_res['AnoScoreV_sum_max']>10) | (df_res['AnoScoreV_max_max']>4) | (df_res['AnoScoreT_max_max']>8)]
|
|
|
+ #删除PK504满充导致的过压
|
|
|
+ if group=='PK504':
|
|
|
+ df_res=df_res[((df_res['diff_min']>10) & (df_res['AnoScoreV_sum_max']>35)) | (df_res['SOC[%]']<93)| (df_res['AnoScoreT_sum_max']>5) | (df_res['AnoScoreV_max_max']>6) | (df_res['AnoScoreT_max_max']>2)]
|
|
|
+ df_res=df_res.drop(['n_split','product_id_y','AnoScoreV_sum_start','AnoScoreV_max_start','AnoScoreT_sum_start','AnoScoreT_max_start','AnoScoreV_sum_end','AnoScoreT_sum_end','AnoScoreT_max_end','AnoScoreV_max_end','最大其他温度'],axis=1,errors='ignore')
|
|
|
+ df_res=df_res.rename(columns = {"product_id_x": "product_id"})
|
|
|
+ df_res=df_res.rename(columns = {"SOC[%]": "SOC"})
|
|
|
+ df_res2=df_res[['product_id','start_time','end_time','diff_min','SOC','AnoScoreV_sum_max','AnoScoreV_max_max','AnoScoreT_sum_max','AnoScoreT_max_max']]
|
|
|
+ return df_res2,diff
|
|
|
+
|
|
|
+def arrange(result,result_final,start_time,diff):
|
|
|
+ result=result.reset_index(drop=True)
|
|
|
+ start=datetime.datetime.strptime(str(result.loc[0,'start_time']),'%Y-%m-%d %H:%M:%S')
|
|
|
+ start_time=datetime.datetime.strptime(str(start_time),'%Y-%m-%d %H:%M:%S')
|
|
|
+ diff_time=(start-start_time).total_seconds()
|
|
|
+ if diff_time<600:
|
|
|
+ result_final['end_time']=result.loc[0,'end_time']
|
|
|
+ diff_min_org=result_final['diff_min']
|
|
|
+ diff_min_new=result.loc[0,'diff_min']
|
|
|
+ result_final['diff_min']=diff_min_org+diff_time+diff+diff_min_new
|
|
|
+ result=result.drop(0)
|
|
|
+ return result,result_final
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|