sta.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import pandas as pd
  2. import pdb
  3. from sklearn.ensemble import IsolationForest
  4. import numpy as np
  5. # 计算充电过程
  6. def preprocess(df):
  7. # 滤除前后电压存在一增一减的情况(采样异常)
  8. pass
  9. # 计算电压的偏离度
  10. def cal_volt_uniform(dfin, volt_column, window=10, step=5, threshold=3):
  11. df = dfin.copy()
  12. time_list = dfin['time'].tolist()
  13. # 电压滤波
  14. df_volt = df[volt_column]
  15. df_volt_rolling = df_volt.rolling(window).mean()[window-1::step] # 滑动平均值
  16. time_list = time_list[window-1::step]
  17. # 电压偏离度
  18. mean = df_volt_rolling.mean(axis=1)
  19. std = df_volt_rolling.std(axis=1)
  20. # mean = [np.array(sorted(x)[1:-1]).mean() for x in df_volt_rolling.values]
  21. # std = [np.array(sorted(x)[1:-1]).std() for x in df_volt_rolling.values]
  22. df_volt_rolling_norm = df_volt_rolling.sub(mean, axis=0).div(std,axis=0)
  23. df_volt_rolling_norm = df_volt_rolling_norm.reset_index(drop=True)
  24. return df_volt_rolling_norm, time_list
  25. # 计算电压变化量的偏离度
  26. def cal_voltdiff_uniform(dfin, volt_column, window=10, step=5, window2=10, step2=5,threshold=3):
  27. df = dfin.copy()
  28. time_list = dfin['time'].tolist()
  29. # 电压滤波
  30. df_volt = df[volt_column]
  31. df_volt_rolling = df_volt.rolling(window).mean()[window-1::step] # 滑动平均值
  32. time_list = time_list[window-1::step]
  33. # 计算电压变化量的绝对值(# 计算前后的差值的绝对值, 时间列-1)
  34. df_volt_diff = abs(df_volt_rolling.diff()[1:])
  35. df_volt_diff = df_volt_diff.reset_index(drop=True)
  36. time_list = time_list[1:]
  37. # 压差归一化(偏离度)
  38. # mean = df_volt_diff.mean(axis=1)
  39. # std = df_volt_diff.std(axis=1)
  40. # df_voltdiff_norm = df_volt_diff.sub(mean, axis=0).div(std,axis=0)
  41. df_voltdiff_norm = df_volt_diff.copy()
  42. # 压差偏离度滑动平均滤波
  43. df_voltdiff_rolling = df_voltdiff_norm.rolling(window2).mean()[window2-1::step2] # 滑动平均值
  44. time_list = time_list[window2-1::step2]
  45. mean = df_voltdiff_rolling.mean(axis=1)
  46. std = df_voltdiff_rolling.std(axis=1)
  47. # mean = [np.array(sorted(x)[1:-1]).mean() for x in df_voltdiff_rolling.values]
  48. # std = [np.array(sorted(x)[1:-1]).std() for x in df_voltdiff_rolling.values]
  49. df_voltdiff_rolling_norm = df_voltdiff_rolling.sub(mean, axis=0).div(std,axis=0)
  50. df_voltdiff_rolling_norm = df_voltdiff_rolling_norm.reset_index(drop=True)
  51. return df_voltdiff_rolling_norm, time_list
  52. # this_alarm = {}
  53. # df_alarm = df_voltdiff_rolling_norm[abs(df_voltdiff_rolling_norm)>threshold].dropna(how='all')
  54. # for index in df_alarm.index:
  55. # df_temp = df_alarm.loc[index, :].dropna(how='all', axis=0)
  56. # this_alarm.update({df_cell_volt.loc[index+1, 'date']:[str(df_temp.keys().tolist()), str([round(x, 2) for x in df_temp.values.tolist()])]})
  57. # df_alarm1 = pd.DataFrame(this_alarm)
  58. # return pd.DataFrame(df_alarm1.values.T, index=df_alarm1.columns, columns=df_alarm1.index), pd.DataFrame(df_alarm2.values.T, index=df_alarm2.columns, columns=df_alarm2.index)
  59. # 孤立森林算法
  60. def iso_froest(df):
  61. #1. 生成训练数据
  62. rng = np.random.RandomState(42)
  63. X = 0.3 * rng.randn(100, 2) #生成100 行,2 列
  64. X_train = np.r_[X + 2, X - 2]
  65. print(X_train)
  66. # 产生一些异常数据
  67. X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
  68. iForest= IsolationForest(contamination=0.1)
  69. iForest = iForest.fit(X_train)
  70. #预测
  71. pred = iForest.predict(X_outliers)
  72. print(pred)
  73. # [-1 1 -1 -1 -1 -1 -1 1 -
  74. # 计算相关系数
  75. def cal_coff(df):
  76. cc_mean = np.mean(df, axis=0) #axis=0,表示按列求均值 ——— 即第一维
  77. cc_std = np.std(df, axis=0)
  78. cc_zscore = (df-cc_mean)/cc_std #标准化
  79. cc_zscore = cc_zscore.dropna(axis=0, how='any')
  80. cc_zscore_corr = cc_zscore.corr(method='spearman')
  81. result = []
  82. for i in range(len(cc_zscore_corr)):
  83. v = abs(np.array((sorted(cc_zscore_corr.iloc[i]))[2:-1])).mean() # 去掉1 和两个最小值后求均值
  84. result.append(v)
  85. return cc_zscore_corr, result
  86. def instorage(sn, df_voltdiff_result, df_volt_result):
  87. df_all_result = pd.DataFrame(columns=['sn', 'time', 'cellnum', 'value', 'type'])
  88. value_list = []
  89. cellnum_list = []
  90. time_list = []
  91. type_list = []
  92. df_result = df_voltdiff_result.copy().drop(columns='time')
  93. time_list_1 = df_voltdiff_result['time']
  94. df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
  95. for column in df_result.columns:
  96. df = df_result[[column]].dropna(axis=0, how='all')
  97. value_list.extend(df[column].tolist())
  98. cellnum_list.extend([column]*len(df))
  99. time_list.extend([time_list_1[x] for x in df.index])
  100. length_1 = len(value_list)
  101. df_result = df_volt_result.copy().drop(columns='time')
  102. time_list_2 = df_volt_result['time']
  103. df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
  104. for column in df_result.columns:
  105. df = df_result[[column]].dropna(axis=0, how='all')
  106. value_list.extend(df[column].tolist())
  107. cellnum_list.extend([column]*len(df))
  108. time_list.extend([time_list_2[x] for x in df.index])
  109. length_2 = len(value_list) - length_1
  110. type_list.extend(['电压变化量离群'] * length_1)
  111. type_list.extend(['电压离群'] * length_2)
  112. df_all_result['sn'] = [sn] * len(value_list)
  113. df_all_result['cellnum'] = cellnum_list
  114. df_all_result['value'] = value_list
  115. df_all_result['time'] = time_list
  116. df_all_result['type'] = type_list
  117. return df_all_result
  118. # 报警.如果在某个窗口内,有超过ratio个的点,偏离度超过threshold, 则报警
  119. def alarm(dfin, volt_column, alarm_window=10, alarm_ratio=0.8, alarm_threshold=2.5):
  120. time_list = dfin['time'].tolist()
  121. df_result = dfin[volt_column].copy()
  122. alarm_result = pd.DataFrame(columns=['type', 'num', 'alarm_time'])
  123. df_result_1 = df_result.copy()
  124. df_result_1[df_result_1<alarm_threshold] = 0
  125. df_result_1[df_result_1>alarm_threshold] = 1
  126. df_result_1 = df_result_1.rolling(alarm_window).sum()
  127. for column in volt_column:
  128. if len(df_result_1[df_result_1[column]>alarm_window * alarm_ratio])>0:
  129. alarm_result = alarm_result.append({'type':'1', 'num':column, 'alarm_time':time_list[df_result_1[df_result_1[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
  130. # time_list = time_list[window-1::step]
  131. df_result_2 = df_result.copy()
  132. df_result_2[df_result_2>-alarm_threshold] = 0
  133. df_result_2[df_result_2<-alarm_threshold] = 1
  134. df_result_2 = df_result_2.rolling(alarm_window).sum()
  135. for column in volt_column:
  136. if len(df_result_2[df_result_2[column]>alarm_window * alarm_ratio])>0:
  137. alarm_result = alarm_result.append({'type':'2', 'num':column, 'alarm_time':time_list[df_result_2[df_result_2[column]>alarm_window * alarm_ratio].index[0]]}, ignore_index=True)
  138. return alarm_result