anomalyPCA.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. import pandas as pd
  2. import numpy as np
  3. from scipy.signal import savgol_filter
  4. from sklearn.preprocessing import RobustScaler
  5. from sklearn.decomposition import PCA
  6. import matplotlib.pyplot as plt
  7. def makedataset(df_data):
  8. df_data=df_data.drop(['Unnamed: 0','总电流[A]','GSM信号','外电压','单体压差','SOH[%]','开关状态','充电状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
  9. for i in range(1,21):
  10. df_data=df_data[(df_data['单体电压'+str(i)]>2200) & (df_data['单体电压'+str(i)]<4800)]
  11. df_data=df_data[df_data['SOC[%]']>12]
  12. df_data['时间']=[df_data.loc[i,'时间戳'][0:15] for i in df_data.index]
  13. df_data=df_data.groupby('时间').mean()
  14. for k in df_data.columns:
  15. df_data[k]=savgol_filter(df_data[k],3,2)
  16. return df_data
  17. def process(data_set):
  18. features=data_set.columns
  19. sX=RobustScaler(copy=True)
  20. data_set2=data_set.copy()
  21. data_set2.loc[:,features]=sX.fit_transform(data_set2[features])
  22. return data_set2
  23. def anomalyScores(originalDF,reducedDF):
  24. loss=np.sum((np.array(originalDF)-np.array(reducedDF))**2,axis=1)
  25. loss=pd.Series(data=loss,index=originalDF.index)
  26. loss=(loss-np.min(loss))/(np.max(loss)-np.min(loss))
  27. return loss
  28. def anomalyPCA(x_train_pro):
  29. n_components=4
  30. whiten=True
  31. random_state=2
  32. pca=PCA(n_components=n_components,whiten=whiten,random_state=random_state)
  33. pca.fit(x_train_pro)
  34. return pca
  35. def transform(df_data_pro,model,df_data):
  36. #降维
  37. X_train=model.transform(df_data_pro)
  38. X_train=pd.DataFrame(data=X_train,index=df_data_pro.index)
  39. #还原
  40. X_train_inverse=model.inverse_transform(X_train)
  41. X_train_inverse=pd.DataFrame(data=X_train_inverse,index=df_data_pro.index)
  42. #异常指数
  43. anomalyScoresModel=anomalyScores(df_data_pro,X_train_inverse)
  44. anomalyScoresModel=savgol_filter(anomalyScoresModel,15,3)
  45. df_data2=df_data.copy()
  46. df_data2['anomalyScores_'+str(model)]=anomalyScoresModel
  47. return df_data2
  48. def detect_outliers(data,threshold=3):
  49. anomaly=data['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)']
  50. mean_d=np.mean(anomaly.values)
  51. std_d=np.std(anomaly.values)
  52. outliers=pd.DataFrame()
  53. for k in anomaly.index:
  54. z_score= (anomaly[k]-mean_d)/std_d
  55. if np.abs(z_score) >threshold:
  56. outliers=outliers.append(data[anomaly.values==anomaly[k]])
  57. return outliers
  58. def detect_outliers2(data,pred,threshold=3):
  59. anomaly=data['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)']
  60. anomalypred=pred['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)']
  61. mean_d=np.mean(anomaly.values)
  62. std_d=np.std(anomaly.values)
  63. outliers2=pd.DataFrame()
  64. for k in anomalypred.index:
  65. z_score= (anomalypred[k]-mean_d)/std_d
  66. if np.abs(z_score) >threshold:
  67. outliers2=outliers2.append(pred[anomalypred.values==anomalypred[k]])
  68. return outliers2