Parcourir la source

更新两个程序

zhuxi il y a 2 ans
Parent
commit
273515aa42

+ 12 - 2
LIB/MIDDLE/Anomaly_Detection/V1_0_0/anomalyPCA.py

@@ -109,11 +109,21 @@ def prediction(data_test,pca1,pca2):
     pred2=transform(x_test_pro2,pca2,x_test2)
     return pred1,pred2
 
+def boxplot_fill(res2):
+    col=res2['低压差']
+    # 计算iqr:数据四分之三分位值与四分之一分位值的差
+    iqr=col.quantile(0.75)-col.quantile(0.25)
+    # 根据iqr计算异常值判断阈值
+    u_th=col.quantile(0.75) + 2*iqr # 上界
+    return u_th
+
 #判定异常
-def check_anomaly(outliers1,outliers2):
+def check_anomaly(outliers1,outliers2,res2):
     outliers=pd.merge(outliers1,outliers2,on='时间')
-    outliers=outliers[outliers['SOC[%]_x']>45]
+    outliers=outliers[outliers['SOC[%]_x']>50]
     outliers=outliers.drop(['总电压[V]_y','单体压差_y','SOC[%]_y'],axis=1)
+    u_th=boxplot_fill(res2)
+    outliers=outliers[outliers['低压差']>u_th]
     return outliers
     
 

+ 2 - 4
LIB/MIDDLE/Anomaly_Detection/V1_0_0/main_detection.py

@@ -55,7 +55,7 @@ for k in range(l):
         df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms'])
         data_test = df_data['bms']
         data_test=data_test[data_test['SOC[%]']>20]
-        if len(data_test)>0:
+        if len(data_test)>5:
             pca1 = joblib.load('pca1_'+sn+'.m')  
             pca2 = joblib.load('pca2_'+sn+'.m') 
             res1 = pd.read_csv('res1_'+sn+'.csv',encoding='gbk')
@@ -64,12 +64,10 @@ for k in range(l):
             outliers1=detect_outliers(res1,pred1,threshold=30)
             outliers2=detect_outliers(res2,pred2,threshold=16)
             if (len(outliers1)>0) & (len(outliers2)>0):
-                outliers=check_anomaly(outliers1,outliers2)
+                outliers=check_anomaly(outliers1,outliers2,res2)
                 if len(outliers)>5:
-                    outliers.to_csv('outliers'+sn+'.csv',encoding='gbk')
                     outliers['sn']=sn
                     anomalies=anomalies.append(outliers)
-                    anomalies.to_csv('anomalies.csv',encoding='gbk')
                     if df_diag_ram_sn.empty: 
                         product_id=sn
                         start_time=outliers.loc[0,'时间']