Browse Source

Merge branch 'dev' of http://git.fast-fun.cn:92/lmstack/data_analyze_platform into dev

zhuxi 3 years ago
parent
commit
4f1311e53b

+ 169 - 0
LIB/MIDDLE/OutlierDetection/VoltOutlier/V_1_0_0/sta.py

@@ -0,0 +1,169 @@
+import pandas as pd
+import pdb
+from sklearn.ensemble import IsolationForest
+import numpy as np
+
+# 计算充电过程
+def preprocess(df):
+    # 滤除前后电压存在一增一减的情况(采样异常)
+    pass
+
+# 计算电压的偏离度    
+def cal_volt_uniform(dfin, volt_column, window=10, step=5, threshold=3):
+    
+    df = dfin.copy()
+    time_list = dfin['time'].tolist()
+
+    # 电压滤波
+    df_volt = df[volt_column] 
+    df_volt_rolling = df_volt.rolling(window).mean()[window-1::step]  # 滑动平均值
+    time_list = time_list[window-1::step] 
+
+    # 电压偏离度
+    mean = df_volt_rolling.mean(axis=1)
+    std = df_volt_rolling.std(axis=1)
+    df_volt_rolling_norm = df_volt_rolling.sub(mean, axis=0).div(std,axis=0)
+    df_volt_rolling_norm = df_volt_rolling_norm.reset_index(drop=True)
+    return df_volt_rolling_norm, time_list
+
+
+# 计算电压变化量的偏离度    
+def cal_voltdiff_uniform(dfin, volt_column, window=10, step=5, window2=10, step2=5,threshold=3):
+    
+    df = dfin.copy()
+    time_list = dfin['time'].tolist()
+
+    # 电压滤波
+    df_volt = df[volt_column] 
+    df_volt_rolling = df_volt.rolling(window).mean()[window-1::step]  # 滑动平均值
+    time_list = time_list[window-1::step] 
+
+    # 计算电压变化量的绝对值(# 计算前后的差值的绝对值,  时间列-1)
+    df_volt_diff = abs(df_volt_rolling.diff()[1:])
+    df_volt_diff = df_volt_diff.reset_index(drop=True)
+    time_list = time_list[1:]
+
+    # 压差归一化(偏离度)
+    # mean = df_volt_diff.mean(axis=1)
+    # std = df_volt_diff.std(axis=1)
+    # df_voltdiff_norm = df_volt_diff.sub(mean, axis=0).div(std,axis=0)
+    df_voltdiff_norm = df_volt_diff.copy()
+
+    # 压差偏离度滑动平均滤波
+    df_voltdiff_rolling = df_voltdiff_norm.rolling(window2).mean()[window2-1::step2]  # 滑动平均值
+    time_list = time_list[window2-1::step2] 
+    mean = df_voltdiff_rolling.mean(axis=1)
+    std = df_voltdiff_rolling.std(axis=1)
+    df_voltdiff_rolling_norm = df_voltdiff_rolling.sub(mean, axis=0).div(std,axis=0)
+    df_voltdiff_rolling_norm = df_voltdiff_rolling_norm.reset_index(drop=True)
+    return df_voltdiff_rolling_norm, time_list
+
+
+
+    # this_alarm = {}
+    # df_alarm = df_voltdiff_rolling_norm[abs(df_voltdiff_rolling_norm)>threshold].dropna(how='all')
+    # for index in df_alarm.index:
+    #     df_temp = df_alarm.loc[index, :].dropna(how='all', axis=0)
+    #     this_alarm.update({df_cell_volt.loc[index+1, 'date']:[str(df_temp.keys().tolist()), str([round(x, 2) for x in df_temp.values.tolist()])]})
+    # df_alarm1 = pd.DataFrame(this_alarm)
+
+
+    # return pd.DataFrame(df_alarm1.values.T, index=df_alarm1.columns, columns=df_alarm1.index), pd.DataFrame(df_alarm2.values.T, index=df_alarm2.columns, columns=df_alarm2.index)
+
+    # 孤立森林算法
+    def iso_froest(df):
+
+        #1. 生成训练数据
+        rng = np.random.RandomState(42)
+        X = 0.3 * rng.randn(100, 2) #生成100 行,2 列
+        X_train = np.r_[X + 2, X - 2]
+        print(X_train)
+        # 产生一些异常数据
+        X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
+        iForest= IsolationForest(contamination=0.1)
+        iForest = iForest.fit(X_train)
+        #预测
+        pred = iForest.predict(X_outliers)
+        print(pred)
+        # [-1 1 -1 -1 -1 -1 -1 1 -
+
+# 计算相关系数
+def cal_coff(df):
+    cc_mean = np.mean(df, axis=0)  #axis=0,表示按列求均值 ——— 即第一维
+    cc_std = np.std(df, axis=0)
+    cc_zscore = (df-cc_mean)/cc_std   #标准化
+    cc_zscore = cc_zscore.dropna(axis=0, how='any')
+
+    cc_zscore_corr = cc_zscore.corr(method='spearman')
+    
+    result = []
+    for i in range(len(cc_zscore_corr)):
+        v = abs(np.array((sorted(cc_zscore_corr.iloc[i]))[2:-1])).mean() # 去掉1 和两个最小值后求均值
+        result.append(v)
+    return cc_zscore_corr, result
+
+
+def instorage(sn, df_voltdiff_result, df_volt_result):
+    
+    df_all_result = pd.DataFrame(columns=['sn', 'time', 'cellnum', 'value', 'type'])
+    
+    value_list = []
+    cellnum_list = []
+    time_list = []
+    type_list = []
+    df_result = df_voltdiff_result.copy().drop(columns='time')
+    time_list_1 = df_voltdiff_result['time']
+    df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
+    for column in df_result.columns:
+        df = df_result[[column]].dropna(axis=0, how='all')
+        value_list.extend(df[column].tolist())
+        cellnum_list.extend([column]*len(df))
+        time_list.extend([time_list_1[x] for x in df.index])
+    length_1 = len(value_list)
+    
+
+
+    df_result = df_volt_result.copy().drop(columns='time')
+    time_list_2 = df_volt_result['time']
+    df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
+    for column in df_result.columns:
+        df = df_result[[column]].dropna(axis=0, how='all')
+        value_list.extend(df[column].tolist())
+        cellnum_list.extend([column]*len(df))
+        time_list.extend([time_list_2[x] for x in df.index])
+
+    length_2 = len(value_list) - length_1
+    type_list.extend(['电压变化量离群'] * length_1)
+    type_list.extend(['电压离群'] * length_2)
+    df_all_result['sn'] = [sn] * len(value_list)
+    df_all_result['cellnum'] = cellnum_list
+    df_all_result['value'] = value_list
+    df_all_result['time'] = time_list
+    df_all_result['type'] = type_list
+    return df_all_result
+
+# 报警.如果在某个窗口内,有超过ratio个的点,偏离度超过threshold, 则报警
+def alarm(dfin, volt_column, alarm_window=10, alarm_ratio=0.8, alarm_threshold=2.5):
+
+    
+    time_list = dfin['time'].tolist()
+    df_result = dfin[volt_column].copy()
+    alarm_result = pd.DataFrame(columns=['type', 'num', 'alarm_time'])      
+    df_result_1 = df_result.copy()
+    df_result_1[df_result_1<alarm_threshold] = 0
+    df_result_1[df_result_1>alarm_threshold] = 1    
+    df_result_1 = df_result_1.rolling(alarm_window).sum()
+    for column in volt_column:
+        if len(df_result_1[df_result_1[column]>alarm_window * alarm_ratio])>0:
+            alarm_result = alarm_result.append({'type':'1', 'num':column, 'alarm_time':time_list[df_result_1[df_result_1[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
+
+    # time_list = time_list[window-1::step] 
+
+    df_result_2 = df_result.copy()
+    df_result_2[df_result_2>-alarm_threshold] = 0
+    df_result_2[df_result_2<-alarm_threshold] = 1
+    df_result_2 = df_result_2.rolling(alarm_window).sum()
+    for column in volt_column:
+        if len(df_result_2[df_result_2[column]>alarm_window * alarm_ratio])>0:
+            alarm_result = alarm_result.append({'type':'2', 'num':column, 'alarm_time':time_list[df_result_2[df_result_2[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
+    return alarm_result

File diff suppressed because it is too large
+ 36 - 0
LIB/MIDDLE/OutlierDetection/VoltOutlier/main.ipynb


+ 0 - 52
LIB/MIDDLE/Stray/VoltageStray/V_1_0_0/VoltageStray.py

@@ -1,52 +0,0 @@
-import pandas as pd
-
-class VoltageStray:
-    def __init__(self):  #参数初始化
-        pass
-
-    def cal_vol_uniform(self, df, roll=1, alarm_threshold=5):
-        volt_column = [x for x in df.columns if "单体电压" in x]
-        all_columns = volt_column.copy()
-        all_columns.append("时间戳")
-        df = df[all_columns]
-        df_filt = df.copy()
-        if len(df_filt) > 0:
-
-            # df_filt = df_t.dropna(how='all')
-            df_filt = df_filt.reset_index(drop=True)
-            volt_count = len(volt_column)-1
-            volt_column = volt_column
-            df_cell_volt = df_filt[all_columns]
-            df_cell_volt = df_cell_volt.dropna().reset_index(drop=True)
-            
-
-            # 观察单体电压
-            df_rolling = df_cell_volt.loc[:, volt_column].rolling(roll, center=True, min_periods=1).mean()  # 滑动平均值
-
-            # df_temp_change = df_temp_change.dropna().reset_index(drop=True)
-            # df_temp_change_2 = df_temp_change_2.dropna().reset_index(drop=True)
-
-            mean = df_rolling.mean(axis=1)
-            std = df_rolling.std(axis=1)
-            df_rolling_norm = df_rolling.sub(mean, axis=0).div(std,axis=0)
-
-            # fig = plt.figure(figsize=(20,12))
-            # ax1 = fig.add_subplot(2,1,1)
-            # ax1.set_title("电压 - " + file[0:-5])
-            # ax1.plot(df_rolling_norm)
-
-            # plt.savefig('./res/'+str(count) + '-' + file[0:-5] +"--原始电压-"+ '.png')
-            # plt.close()
-            # plt.clf()
-            # plt.cla()
-
-            all_alarm = {'alarm_time':[], 'alarm_cell':[]}
-            df_alarm = df_rolling_norm[abs(df_rolling_norm)>1].dropna(how='all')
-            for index in df_alarm.index:
-                print(index)
-                df_temp = df_alarm.loc[index, :].dropna(how='all', axis=0)
-                all_alarm['alarm_time'].append(df_cell_volt.loc[index, '时间戳'])
-                alarm_cell = df_temp.keys().tolist()
-                all_alarm['alarm_cell'].append(str([x[4:] for x in alarm_cell]))
-            df_alarm = pd.DataFrame(all_alarm)
-            return df_alarm

+ 0 - 15
LIB/MIDDLE/Stray/VoltageStray/main.py

@@ -1,15 +0,0 @@
-# 获取数据
-import sys
-from LIB.BACKEND import DBManager
-from LIB.MIDDLE.Stray.VoltageStray.V_1_0_0 import VoltageStray
-
-sn = "MGMCLN750N215N116"
-st = '2021-09-17 21:33:07'
-et = '2021-09-17 21:35:07'
-
-dbManager = DBManager.DBManager()
-df_data = dbManager.get_data(sn=sn, start_time=st, end_time=et, data_groups=['bms'])
-# 
-df_bms = df_data['bms']
-voltageStray = VoltageStray.VoltageStray();
-print(voltageStray.cal_vol_uniform(df_bms, 1, 5))

Some files were not shown because too many files changed in this diff