123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- import pandas as pd
- import pdb
- from sklearn.ensemble import IsolationForest
- import numpy as np
- def preprocess(df):
-
- pass
- def cal_volt_uniform(dfin, volt_column, window=10, step=5, threshold=3):
-
- df = dfin.copy()
- time_list = dfin['time'].tolist()
-
- df_volt = df[volt_column]
- df_volt_rolling = df_volt.rolling(window).mean()[window-1::step]
- time_list = time_list[window-1::step]
-
- mean = df_volt_rolling.mean(axis=1)
- std = df_volt_rolling.std(axis=1)
- df_volt_rolling_norm = df_volt_rolling.sub(mean, axis=0).div(std,axis=0)
- df_volt_rolling_norm = df_volt_rolling_norm.reset_index(drop=True)
- return df_volt_rolling_norm, time_list
- def cal_voltdiff_uniform(dfin, volt_column, window=10, step=5, window2=10, step2=5,threshold=3):
-
- df = dfin.copy()
- time_list = dfin['time'].tolist()
-
- df_volt = df[volt_column]
- df_volt_rolling = df_volt.rolling(window).mean()[window-1::step]
- time_list = time_list[window-1::step]
-
- df_volt_diff = abs(df_volt_rolling.diff()[1:])
- df_volt_diff = df_volt_diff.reset_index(drop=True)
- time_list = time_list[1:]
-
-
-
-
- df_voltdiff_norm = df_volt_diff.copy()
-
- df_voltdiff_rolling = df_voltdiff_norm.rolling(window2).mean()[window2-1::step2]
- time_list = time_list[window2-1::step2]
- mean = df_voltdiff_rolling.mean(axis=1)
- std = df_voltdiff_rolling.std(axis=1)
- df_voltdiff_rolling_norm = df_voltdiff_rolling.sub(mean, axis=0).div(std,axis=0)
- df_voltdiff_rolling_norm = df_voltdiff_rolling_norm.reset_index(drop=True)
- return df_voltdiff_rolling_norm, time_list
-
-
-
-
-
-
-
-
- def iso_froest(df):
-
- rng = np.random.RandomState(42)
- X = 0.3 * rng.randn(100, 2)
- X_train = np.r_[X + 2, X - 2]
- print(X_train)
-
- X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
- iForest= IsolationForest(contamination=0.1)
- iForest = iForest.fit(X_train)
-
- pred = iForest.predict(X_outliers)
- print(pred)
-
- def cal_coff(df):
- cc_mean = np.mean(df, axis=0)
- cc_std = np.std(df, axis=0)
- cc_zscore = (df-cc_mean)/cc_std
- cc_zscore = cc_zscore.dropna(axis=0, how='any')
- cc_zscore_corr = cc_zscore.corr(method='spearman')
-
- result = []
- for i in range(len(cc_zscore_corr)):
- v = abs(np.array((sorted(cc_zscore_corr.iloc[i]))[2:-1])).mean()
- result.append(v)
- return cc_zscore_corr, result
- def instorage(sn, df_voltdiff_result, df_volt_result):
- df_all_result = pd.DataFrame(columns=['sn', 'time', 'cellnum', 'value', 'type'])
-
- value_list = []
- cellnum_list = []
- time_list = []
- type_list = []
- df_result = df_voltdiff_result.copy()
- time_list_1 = df_voltdiff_result['time']
- df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
- for column in df_result.columns:
- df = df_result[[column]].dropna(axis=0, how='all')
- value_list.extend(df[column].tolist())
- cellnum_list.extend([column]*len(df))
- time_list.extend([time_list_1[x] for x in df.index])
- length_1 = len(value_list)
-
- df_result = df_volt_result.copy()
- time_list_2 = df_volt_result['time']
- df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
- for column in df_result.columns:
- df = df_result[[column]].dropna(axis=0, how='all')
- value_list.extend(df[column].tolist())
- cellnum_list.extend([column]*len(df))
- time_list.extend([time_list_2[x] for x in df.index])
- length_2 = len(value_list) - length_1
- type_list.extend(['电压变化量离群'] * length_1)
- type_list.extend(['电压离群'] * length_2)
- df_all_result['sn'] = [sn] * len(value_list)
- df_all_result['cellnum'] = cellnum_list
- df_all_result['value'] = value_list
- df_all_result['time'] = time_list
- df_all_result['type'] = type_list
- return df_all_result
- def alarm(dfin, volt_column, alarm_window=10, alarm_ratio=0.8, alarm_threshold=2.5):
-
- time_list = dfin['time'].tolist()
- df_result = dfin[volt_column].copy()
- alarm_result = pd.DataFrame(columns=['type', 'num', 'alarm_time'])
- df_result_1 = df_result.copy()
- df_result_1[df_result_1<alarm_threshold] = 0
- df_result_1[df_result_1>alarm_threshold] = 1
- df_result_1 = df_result_1.rolling(alarm_window).sum()
- for column in volt_column:
- if len(df_result_1[df_result_1[column]>alarm_window * alarm_ratio])>0:
- alarm_result = alarm_result.append({'type':'1', 'num':column, 'alarm_time':time_list[df_result_1[df_result_1[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
-
- df_result_2 = df_result.copy()
- df_result_2[df_result_2>-alarm_threshold] = 0
- df_result_2[df_result_2<-alarm_threshold] = 1
- df_result_2 = df_result_2.rolling(alarm_window).sum()
- for column in volt_column:
- if len(df_result_2[df_result_2[column]>alarm_window * alarm_ratio])>0:
- alarm_result = alarm_result.append({'type':'2', 'num':column, 'alarm_time':time_list[df_result_2[df_result_2[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
- return alarm_result
|