|
@@ -0,0 +1,49 @@
|
|
|
+from sklearn.decomposition import PCA
|
|
|
+import pandas as pd
|
|
|
+from sklearn.preprocessing import StandardScaler
|
|
|
+import numpy as np
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+import seaborn as sns
|
|
|
+from sklearn.decomposition import SparsePCA
|
|
|
+import datetime
|
|
|
+from scipy.signal import savgol_filter
|
|
|
+
|
|
|
+#异常指数
|
|
|
+def anomalyScores(originalDF,reducedDF):
|
|
|
+ loss=np.sum((np.array(originalDF)-np.array(reducedDF))**2,axis=1)
|
|
|
+ loss=pd.Series(data=loss,index=originalDF.index)
|
|
|
+ loss=(loss-np.min(loss))/(np.max(loss)-np.min(loss))
|
|
|
+ return loss
|
|
|
+
|
|
|
+
|
|
|
+#判定异常
|
|
|
+def anomaly(df_data):
|
|
|
+ #数据预处理
|
|
|
+ df_data=df_data.drop(['Unnamed: 0','GSM信号','外电压','单体压差','SOH[%]','开关状态','充电状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
|
|
|
+ df_data['时间']=[df_data.loc[i,'时间戳'][0:13] for i in range(len(df_data))]
|
|
|
+ df_data=df_data.groupby('时间').mean()
|
|
|
+ data_set=df_data.drop(['时间戳','时间'],axis=1,errors='ignore')
|
|
|
+ features=data_set.columns
|
|
|
+ sX=StandardScaler(copy=True)
|
|
|
+ data_set.loc[:,features]=sX.fit_transform(data_set[features])
|
|
|
+ #稀疏PCA异常检测
|
|
|
+ n_components=20
|
|
|
+ alpha=0.0001
|
|
|
+ random_state=10001
|
|
|
+ n_jobs=-1
|
|
|
+ #降维
|
|
|
+ sparsePCA=SparsePCA(n_components=n_components,alpha=alpha,random_state=random_state,n_jobs=n_jobs)
|
|
|
+ sparsePCA.fit(data_set.loc[:,:])
|
|
|
+ X_train_sparsePCA=sparsePCA.transform(data_set)
|
|
|
+ X_train_sparsePCA=pd.DataFrame(data=X_train_sparsePCA,index=data_set.index)
|
|
|
+ #还原
|
|
|
+ X_train_sparsePCA_inverse=np.array(X_train_sparsePCA).dot(sparsePCA.components_)+np.array(data_set.mean(axis=0))
|
|
|
+ X_train_sparsePCA_inverse=pd.DataFrame(data=X_train_sparsePCA_inverse,index=data_set.index)
|
|
|
+ #异常指数
|
|
|
+ anomalyScoressparsePCA=anomalyScores(data_set,X_train_sparsePCA_inverse)
|
|
|
+ anomalyScoressparsePCA=savgol_filter(anomalyScoressparsePCA,101,3)
|
|
|
+ df_data['anomalyScoressparsePCA']=anomalyScoressparsePCA
|
|
|
+
|
|
|
+ return df_data
|
|
|
+
|
|
|
+
|