analysis.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. import pandas as pd
  2. import numpy as np
  3. from datetime import datetime
  4. import datetime
  5. from tqdm import tqdm
  6. from scipy.interpolate import interp1d
  7. from ZlwlAlgosCommon.utils.ProUtils import *
  8. from ZlwlAlgosCommon.service.iotp.IotpAlgoService import IotpAlgoService
  9. from ZlwlAlgosCommon.service.iotp.Beans import DataField
  10. from ZlwlAlgosCommon.orm.models import *
  11. from Data_Cleaning_oop_thr import DataCleaning
  12. input_file_list = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_soh.csv'
  13. output_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/'
  14. t_device_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/t_device.csv'
  15. pack_param_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_pack_param.csv'
  16. history_file = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/update.csv'
  17. df_in_list = pd.read_csv(input_file_list)
  18. t_device = pd.read_csv(t_device_path)
  19. pack_param = pd.read_csv(pack_param_path)
  20. history_df = pd.read_csv(history_file)
  21. CellVoltNums_list = []
  22. for a in tqdm(range(len(df_in_list))):
  23. sn = df_in_list['sn'][a]
  24. pack_model_a = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0]
  25. pack_param_a = pack_param.loc[pack_param['pack_code'] == pack_model_a, 'param'].values[0]
  26. CellVoltNums_a = eval(json.loads(pack_param_a)['CellVoltTotalCount'])
  27. CellVoltNums_list.append(CellVoltNums_a)
  28. CellVoltNums = max(CellVoltNums_list)
  29. df_cell_soh = df_in_list['cellsoh'].apply(lambda x : pd.Series(eval(x)[:CellVoltNums]))
  30. cellsoh_name=['cell_soh'+str(x) for x in range(1, CellVoltNums+1)]
  31. df_soh_mmm_name = ['cell_soh_max','cell_soh_min','cell_soh_diff','cell_soh_mean']
  32. soh_name = cellsoh_name + df_soh_mmm_name
  33. df_soh = pd.DataFrame(columns=soh_name, index=df_in_list.index)
  34. cell_soc_seg_begin_chr_name = ['cell_soc_begin_chr'+str(x) for x in range(1, CellVoltNums+1)]
  35. cell_soc_seg_stop_chr_name = ['cell_soc_stop_chr'+str(x) for x in range(1, CellVoltNums+1)]
  36. cell_soc_seg_begin_dis_name = ['cell_soc_begin_dis'+str(x) for x in range(1, CellVoltNums+1)]
  37. cell_soc_seg_stop_dis_name = ['cell_soc_stop_dis'+str(x) for x in range(1, CellVoltNums+1)]
  38. pack_soc_seg_name = ['pack_soc_begin_chr','pack_soc_stop_chr','pack_soc_begin_dis','pack_soc_stop_dis']
  39. cell_soc_mmm_name = ['cell_soc_begin_chr_max','cell_soc_begin_chr_min','cell_soc_begin_chr_diff','cell_soc_begin_chr_mean',
  40. 'cell_soc_begin_dis_max','cell_soc_begin_dis_min','cell_soc_begin_dis_diff','cell_soc_begin_dis_mean']
  41. soc_name = cell_soc_seg_begin_chr_name + cell_soc_seg_stop_chr_name + cell_soc_seg_begin_dis_name + cell_soc_seg_stop_dis_name + pack_soc_seg_name + cell_soc_mmm_name
  42. soc_name.append('dod')
  43. df_soc_seg = pd.DataFrame(columns=soc_name, index=df_in_list.index)
  44. cell_delta_energy_chr_name = ['cell_delta_energy_chr'+str(x) for x in range(1, CellVoltNums+1)]
  45. cell_delta_energy_dis_name = ['cell_delta_energy_dis'+str(x) for x in range(1, CellVoltNums+1)]
  46. delta_energy_name = cell_delta_energy_chr_name + cell_delta_energy_dis_name + ['pack_delta_energy_chr'] + ['pack_delta_energy_dis']
  47. df_energy = pd.DataFrame(columns=delta_energy_name, index=df_in_list.index)
  48. cell_res_name = ['cell_res'+str(x) for x in range(1, CellVoltNums+1)]
  49. res_name = cell_res_name + ['cell_res_mean'] + ['cell_res_diff']
  50. df_res = pd.DataFrame(columns=res_name, index=df_in_list.index)
  51. for i in tqdm(range(len(df_in_list))):
  52. try:
  53. sn = df_in_list['sn'][i]
  54. begin_time = datetime.datetime.strptime(df_in_list['time_st'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
  55. stop_time = datetime.datetime.strptime(df_in_list['time_sp'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
  56. start_time = datetime.datetime.strptime(df_in_list['time_st'][i].split()[0] + " 00:00:00", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
  57. end_time = datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0] + " 23:59:59", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
  58. cur_env = 'dev' # 设置运行环境
  59. app_path = "/home/limingze/zlwl-algos/" # 设置app绝对路径
  60. sysUtils = SysUtils(cur_env, app_path)
  61. hbase_params = sysUtils.get_cf_param('hbase-datafactory')
  62. iotp_service = IotpAlgoService(hbase_params=hbase_params)
  63. columns = [ DataField.time, DataField.sn, DataField.pack_crnt, DataField.pack_volt, DataField.pack_soc,
  64. DataField.cell_voltage, DataField.cell_temp, DataField.bms_sta, DataField.cell_voltage_count]
  65. df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time)
  66. data_clean = DataCleaning(df_data)
  67. data_clean = data_clean.revise_status_codes()
  68. data_clean['time'] = pd.to_datetime(data_clean['time'])
  69. df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time]
  70. df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta']
  71. first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min()
  72. # data_clean.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '_all.csv', encoding="utf_8_sig", index=False)
  73. j = 1
  74. while pd.isna(first_change_index) & (j <= 6):
  75. print("当前时间段内没有找到放电截止时刻,正在扩大数据范围.........")
  76. end_time = (datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0], '%Y-%m-%d') + datetime.timedelta(days=j)).strftime('%Y-%m-%d') + " 23:59:59"
  77. df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time)
  78. data_clean = DataCleaning(df_data)
  79. data_clean = data_clean.revise_status_codes()
  80. data_clean['time'] = pd.to_datetime(data_clean['time'])
  81. df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time]
  82. df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta']
  83. first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min()
  84. j += 1
  85. result_df = data_clean[data_clean['time'].between(begin_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])]
  86. result_df = result_df.reset_index(drop=True)
  87. # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False)
  88. if j <= 6:
  89. print('找到完整充放电片段')
  90. # 提取充放电片段信息
  91. pack_model_i = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0]
  92. pack_param_i = pack_param.loc[pack_param['pack_code'] == pack_model_i, 'param'].values[0]
  93. charge_ocv_v = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[1])
  94. charge_ocv_v_list = [float(n) for n in charge_ocv_v]
  95. charge_ocv_soc = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[2])
  96. charge_ocv_soc_list = [float(n) for n in charge_ocv_soc]
  97. cap = eval(json.loads(pack_param_i)['capacity'])
  98. cellvolt_name=['cell_voltage'+str(x) for x in range(1, CellVoltNums+1)]
  99. df_volt = result_df['cell_voltage'].apply(lambda x : pd.Series(list(x)[:CellVoltNums]))
  100. df_volt.columns = cellvolt_name[0:df_volt.shape[1]]
  101. df_volt = df_volt.astype('float')
  102. cellvoltmax = df_volt.max(axis=1)
  103. cellvoltmin = df_volt.min(axis=1)
  104. len_cell_volt = df_volt.shape[1]
  105. for p in range(1,CellVoltNums+1):
  106. if not ('cell_voltage' + str(p)) in df_volt:
  107. df_volt[('cell_voltage' + str(p))] = None
  108. result_df = pd.concat([result_df, df_volt],axis=1)
  109. df_volt[['cell_volt_max','cell_volt_min']] = pd.concat([cellvoltmax,cellvoltmin], axis=1)
  110. cellsoc_name = ['cell_soc'+str(x) for x in range(1, CellVoltNums+1)]
  111. df_soc = pd.DataFrame(columns=cellsoc_name, index=df_volt.index)
  112. interp_func = interp1d(charge_ocv_v_list, charge_ocv_soc_list)
  113. for ii in range(len(result_df)):
  114. for j in range(len_cell_volt):
  115. mask1 = (result_df['cell_voltage'+str(j)].values >= charge_ocv_v_list[0]) & (result_df['cell_voltage'+str(j)].values <= charge_ocv_v_list[-1])
  116. mask2 = result_df['cell_voltage'+str(j)].values < charge_ocv_v_list[0]
  117. mask3 = result_df['cell_voltage'+str(j)].values > charge_ocv_v_list[-1]
  118. result_df.loc[mask1, 'cell_soc'+str(j)] = interp_func(result_df['cell_volt'+str(j)].values[mask1])
  119. result_df.loc[mask2, 'cell_soc'+str(j)] = charge_ocv_soc_list[0]
  120. result_df.loc[mask3, 'cell_soc'+str(j)] = charge_ocv_soc_list[-1]
  121. # interp_func = interp1d(charge_ocv_v_list, charge_ocv_soc_list)
  122. # if (result_df['cell_voltage'+str(j + 1)][ii] >= charge_ocv_v_list[0]) & (result_df['cell_voltage'+str(j + 1)][ii] <= charge_ocv_v_list[-1]):
  123. # df_soc.iloc[ii,j] = interp_func(result_df['cell_voltage'+str(j + 1)][ii])
  124. # elif result_df['cell_voltage'+str(j + 1)][ii] < charge_ocv_v_list[0]:
  125. # df_soc.iloc[ii,j] = charge_ocv_soc_list[0]
  126. # elif result_df['cell_voltage'+str(j + 1)][ii] > charge_ocv_v_list[-1]:
  127. # df_soc.iloc[ii,j] = charge_ocv_soc_list[-1]
  128. cellsocmax = df_soc.max(axis=1)
  129. cellsocmin = df_soc.min(axis=1)
  130. cellsocmean = df_soc.mean(axis=1)
  131. cellsocdiff = cellsocmax - cellsocmin
  132. df_soc[['cell_soc_max','cell_soc_min','cell_soc_diff','cell_soc_mean']] = pd.concat([cellsocmax,cellsocmin,cellsocmean,cellsocdiff], axis=1)
  133. result_df = pd.concat([result_df, df_soc],axis=1)
  134. # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False)
  135. # 计算能量变化
  136. # 记录soh不一致性指标
  137. df_soh.loc[i,cellsoh_name] = df_cell_soh.iloc[i].values
  138. df_soh.loc[i,'cell_soh_max'] = df_cell_soh.iloc[i].max()
  139. df_soh.loc[i,'cell_soh_min'] = df_cell_soh.iloc[i].min()
  140. df_soh.loc[i,'cell_soh_mean'] = df_cell_soh.iloc[i].mean()
  141. df_soh.loc[i,'cell_soh_diff'] = df_cell_soh.iloc[i].max() - df_cell_soh.iloc[i].min()
  142. # 记录soc不一致性指标
  143. df_soc_seg.loc[i,cell_soc_seg_begin_chr_name] = result_df[result_df['time'] == df_in_list['time_st'][i]][cellsoc_name].values[0]
  144. df_soc_seg.loc[i,cell_soc_seg_stop_chr_name] = result_df[result_df['time'] == df_in_list['time_sp'][i]][cellsoc_name].values[0]
  145. df_soc_seg.loc[i,cell_soc_seg_begin_dis_name] = result_df[result_df['time'] == df_in_list['time_sp'][i]][cellsoc_name].values[0]
  146. df_soc_seg.loc[i,cell_soc_seg_stop_dis_name] = result_df[cellsoc_name].iloc[-1].values[0]
  147. df_soc_seg.loc[i,'pack_soc_begin_chr'] = result_df[result_df['time'] == df_in_list['time_st'][i]]['pack_soc'].values[0]
  148. df_soc_seg.loc[i,'pack_soc_stop_chr'] = result_df[result_df['time'] == df_in_list['time_sp'][i]]['pack_soc'].values[0]
  149. df_soc_seg.loc[i,'pack_soc_begin_dis'] = result_df[result_df['time'] == df_in_list['time_sp'][i]]['pack_soc'].values[0]
  150. df_soc_seg.loc[i,'pack_soc_stop_dis'] = result_df['pack_soc'].iloc[-1]
  151. df_soc_seg.loc[i,'cell_soc_begin_chr_max'] = df_soc_seg.iloc[i,0:CellVoltNums].max()
  152. df_soc_seg.loc[i,'cell_soc_begin_chr_min'] = df_soc_seg.iloc[i,0:CellVoltNums].min()
  153. df_soc_seg.loc[i,'cell_soc_begin_chr_diff'] = df_soc_seg['cell_soc_begin_chr_max'][i] - df_soc_seg['cell_soc_begin_chr_min'][i]
  154. df_soc_seg.loc[i,'cell_soc_begin_chr_mean'] = df_soc_seg.iloc[i,0:CellVoltNums].mean()
  155. df_soc_seg.loc[i,'cell_soc_begin_dis_max'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].max()
  156. df_soc_seg.loc[i,'cell_soc_begin_dis_min'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].min()
  157. df_soc_seg.loc[i,'cell_soc_begin_dis_diff'] = df_soc_seg['cell_soc_begin_dis_max'][i] - df_soc_seg['cell_soc_begin_dis_min'][i]
  158. df_soc_seg.loc[i,'cell_soc_begin_dis_mean'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].mean()
  159. df_soc_seg.loc[i,'dod'] = df_soc_seg['pack_soc_begin_dis'][i] - df_soc_seg['pack_soc_stop_dis'][i]
  160. df_energy.loc[i,cell_delta_energy_chr_name] = (((df_soc_seg.loc[i,cell_soc_seg_stop_chr_name].reset_index(drop=True)- df_soc_seg.loc[i,cell_soc_seg_begin_chr_name].reset_index(drop=True)) * cap) / CellVoltNums).values
  161. df_energy.loc[i,cell_delta_energy_dis_name] = (((df_soc_seg.loc[i,cell_soc_seg_begin_dis_name].reset_index(drop=True) - df_soc_seg.loc[i,cell_soc_seg_stop_dis_name].reset_index(drop=True)) * cap) / CellVoltNums).values
  162. df_energy.loc[i,'pack_delta_energy_chr'] = (df_soc_seg.loc[i,'pack_soc_stop_chr'] - df_soc_seg.loc[i,'pack_soc_begin_chr']) * cap
  163. df_energy.loc[i,'pack_delta_energy_dis'] = (df_soc_seg.loc[i,'pack_soc_stop_dis'] - df_soc_seg.loc[i,'pack_soc_begin_dis']) * cap
  164. #记录内阻不一致指标
  165. df_dis = result_df[result_df['time'].between(stop_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])]
  166. df_dis = df_dis[df_dis['bms_sta'] == 3]
  167. df_dis = df_dis.reset_index(drop=True)
  168. df_fenduan = [df_dis[i:i+30] for i in range(0, len(df_dis), 30)]
  169. df_res.loc[i,cell_res_name] = np.mean(np.array([(df_fenduan[i][cellvolt_name].iloc[0]-df_fenduan[i][cellvolt_name].iloc[-1]).div(df_fenduan[i]['pack_crnt'].mean()).values for i in range(len(df_fenduan))]), axis=0)
  170. # df_res.loc[i,cell_res_name] = (df_dis[cellvolt_name].iloc[0]-df_dis[cellvolt_name].iloc[-1]).div(df_dis['pack_crnt'].mean()).values
  171. df_res.loc[i,'cell_res_mean'] = df_res.loc[i,cell_res_name].mean()
  172. df_res.loc[i,'cell_res_diff'] = df_res.loc[i,cell_res_name].max() - df_res.loc[i,cell_res_name].min()
  173. df_final = pd.concat([df_in_list, df_soh, df_soc_seg, df_energy, df_res],axis=1)
  174. df_final.to_csv(output_path + 'update.csv', encoding="utf_8_sig", index=False)
  175. except Exception as e:
  176. continue
  177. df_final = pd.concat([df_in_list, df_soh, df_soc_seg, df_energy, df_res],axis=1)
  178. df_final.to_csv(output_path + '_all.csv', encoding="utf_8_sig", index=False)