import pandas as pd import pdb from sklearn.ensemble import IsolationForest import numpy as np # 计算充电过程 def preprocess(df): # 滤除前后电压存在一增一减的情况(采样异常) pass # 计算电压的偏离度 def cal_volt_uniform(dfin, volt_column, window=10, step=5, threshold=3): df = dfin.copy() time_list = dfin['time'].tolist() # 电压滤波 df_volt = df[volt_column] df_volt_rolling = df_volt.rolling(window).mean()[window-1::step] # 滑动平均值 time_list = time_list[window-1::step] # 电压偏离度 mean = df_volt_rolling.mean(axis=1) std = df_volt_rolling.std(axis=1) df_volt_rolling_norm = df_volt_rolling.sub(mean, axis=0).div(std,axis=0) df_volt_rolling_norm = df_volt_rolling_norm.reset_index(drop=True) return df_volt_rolling_norm, time_list # 计算电压变化量的偏离度 def cal_voltdiff_uniform(dfin, volt_column, window=10, step=5, window2=10, step2=5,threshold=3): df = dfin.copy() time_list = dfin['time'].tolist() # 电压滤波 df_volt = df[volt_column] df_volt_rolling = df_volt.rolling(window).mean()[window-1::step] # 滑动平均值 time_list = time_list[window-1::step] # 计算电压变化量的绝对值(# 计算前后的差值的绝对值, 时间列-1) df_volt_diff = abs(df_volt_rolling.diff()[1:]) df_volt_diff = df_volt_diff.reset_index(drop=True) time_list = time_list[1:] # 压差归一化(偏离度) # mean = df_volt_diff.mean(axis=1) # std = df_volt_diff.std(axis=1) # df_voltdiff_norm = df_volt_diff.sub(mean, axis=0).div(std,axis=0) df_voltdiff_norm = df_volt_diff.copy() # 压差偏离度滑动平均滤波 df_voltdiff_rolling = df_voltdiff_norm.rolling(window2).mean()[window2-1::step2] # 滑动平均值 time_list = time_list[window2-1::step2] mean = df_voltdiff_rolling.mean(axis=1) std = df_voltdiff_rolling.std(axis=1) df_voltdiff_rolling_norm = df_voltdiff_rolling.sub(mean, axis=0).div(std,axis=0) df_voltdiff_rolling_norm = df_voltdiff_rolling_norm.reset_index(drop=True) return df_voltdiff_rolling_norm, time_list # this_alarm = {} # df_alarm = df_voltdiff_rolling_norm[abs(df_voltdiff_rolling_norm)>threshold].dropna(how='all') # for index in df_alarm.index: # df_temp = df_alarm.loc[index, :].dropna(how='all', axis=0) # this_alarm.update({df_cell_volt.loc[index+1, 'date']:[str(df_temp.keys().tolist()), str([round(x, 2) for x in df_temp.values.tolist()])]}) # df_alarm1 = pd.DataFrame(this_alarm) # return pd.DataFrame(df_alarm1.values.T, index=df_alarm1.columns, columns=df_alarm1.index), pd.DataFrame(df_alarm2.values.T, index=df_alarm2.columns, columns=df_alarm2.index) # 孤立森林算法 def iso_froest(df): #1. 生成训练数据 rng = np.random.RandomState(42) X = 0.3 * rng.randn(100, 2) #生成100 行,2 列 X_train = np.r_[X + 2, X - 2] print(X_train) # 产生一些异常数据 X_outliers = rng.uniform(low=-4, high=4, size=(20, 2)) iForest= IsolationForest(contamination=0.1) iForest = iForest.fit(X_train) #预测 pred = iForest.predict(X_outliers) print(pred) # [-1 1 -1 -1 -1 -1 -1 1 - # 计算相关系数 def cal_coff(df): cc_mean = np.mean(df, axis=0) #axis=0,表示按列求均值 ——— 即第一维 cc_std = np.std(df, axis=0) cc_zscore = (df-cc_mean)/cc_std #标准化 cc_zscore = cc_zscore.dropna(axis=0, how='any') cc_zscore_corr = cc_zscore.corr(method='spearman') result = [] for i in range(len(cc_zscore_corr)): v = abs(np.array((sorted(cc_zscore_corr.iloc[i]))[2:-1])).mean() # 去掉1 和两个最小值后求均值 result.append(v) return cc_zscore_corr, result # 输出偏离的情况,记录到数据库 def instorage(sn, df_voltdiff_result, df_volt_result): df_all_result = pd.DataFrame(columns=['sn', 'time', 'cellnum', 'value', 'type']) value_list = [] cellnum_list = [] time_list = [] type_list = [] df_result = df_voltdiff_result.copy() time_list_1 = df_voltdiff_result['time'] df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all') for column in df_result.columns: df = df_result[[column]].dropna(axis=0, how='all') value_list.extend(df[column].tolist()) cellnum_list.extend([column]*len(df)) time_list.extend([time_list_1[x] for x in df.index]) length_1 = len(value_list) df_result = df_volt_result.copy() time_list_2 = df_volt_result['time'] df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all') for column in df_result.columns: df = df_result[[column]].dropna(axis=0, how='all') value_list.extend(df[column].tolist()) cellnum_list.extend([column]*len(df)) time_list.extend([time_list_2[x] for x in df.index]) length_2 = len(value_list) - length_1 type_list.extend(['电压变化量离群'] * length_1) type_list.extend(['电压离群'] * length_2) df_all_result['sn'] = [sn] * len(value_list) df_all_result['cellnum'] = cellnum_list df_all_result['value'] = value_list df_all_result['time'] = time_list df_all_result['type'] = type_list return df_all_result # 报警.如果在某个窗口内,有超过ratio个的点,偏离度超过threshold, 则报警 def alarm(dfin, volt_column, alarm_window=10, alarm_ratio=0.8, alarm_threshold=2.5): time_list = dfin['time'].tolist() df_result = dfin[volt_column].copy() alarm_result = pd.DataFrame(columns=['type', 'num', 'alarm_time']) df_result_1 = df_result.copy() df_result_1[df_result_1alarm_threshold] = 1 df_result_1 = df_result_1.rolling(alarm_window).sum() for column in volt_column: if len(df_result_1[df_result_1[column]>alarm_window * alarm_ratio])>0: alarm_result = alarm_result.append({'type':'1', 'num':column, 'alarm_time':time_list[df_result_1[df_result_1[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True) # time_list = time_list[window-1::step] df_result_2 = df_result.copy() df_result_2[df_result_2>-alarm_threshold] = 0 df_result_2[df_result_2<-alarm_threshold] = 1 df_result_2 = df_result_2.rolling(alarm_window).sum() for column in volt_column: if len(df_result_2[df_result_2[column]>alarm_window * alarm_ratio])>0: alarm_result = alarm_result.append({'type':'2', 'num':column, 'alarm_time':time_list[df_result_2[df_result_2[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True) return alarm_result