123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- import pandas as pd
- import numpy as np
- from datetime import datetime
- import datetime
- from tqdm import tqdm
- from scipy.interpolate import interp1d
- from ZlwlAlgosCommon.utils.ProUtils import *
- from ZlwlAlgosCommon.service.iotp.IotpAlgoService import IotpAlgoService
- from ZlwlAlgosCommon.service.iotp.Beans import DataField
- from ZlwlAlgosCommon.orm.models import *
- from Data_Cleaning_oop_thr import DataCleaning
- input_file_list = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_soh.csv'
- output_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/'
- t_device_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/t_device.csv'
- pack_param_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_pack_param.csv'
- history_file = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/update.csv'
- df_in_list = pd.read_csv(input_file_list)
- t_device = pd.read_csv(t_device_path)
- pack_param = pd.read_csv(pack_param_path)
- history_df = pd.read_csv(history_file)
- CellVoltNums_list = []
- for a in tqdm(range(len(df_in_list))):
- sn = df_in_list['sn'][a]
- pack_model_a = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0]
- pack_param_a = pack_param.loc[pack_param['pack_code'] == pack_model_a, 'param'].values[0]
- CellVoltNums_a = eval(json.loads(pack_param_a)['CellVoltTotalCount'])
- CellVoltNums_list.append(CellVoltNums_a)
- CellVoltNums = max(CellVoltNums_list)
- df_cell_soh = df_in_list['cellsoh'].apply(lambda x : pd.Series(eval(x)[:CellVoltNums]))
- cellsoh_name=['cell_soh'+str(x) for x in range(1, CellVoltNums+1)]
- df_soh_mmm_name = ['cell_soh_max','cell_soh_min','cell_soh_diff','cell_soh_mean']
- soh_name = cellsoh_name + df_soh_mmm_name
- df_soh = pd.DataFrame(columns=soh_name, index=df_in_list.index)
- cell_soc_seg_begin_chr_name = ['cell_soc_begin_chr'+str(x) for x in range(1, CellVoltNums+1)]
- cell_soc_seg_stop_chr_name = ['cell_soc_stop_chr'+str(x) for x in range(1, CellVoltNums+1)]
- cell_soc_seg_begin_dis_name = ['cell_soc_begin_dis'+str(x) for x in range(1, CellVoltNums+1)]
- cell_soc_seg_stop_dis_name = ['cell_soc_stop_dis'+str(x) for x in range(1, CellVoltNums+1)]
- pack_soc_seg_name = ['pack_soc_begin_chr','pack_soc_stop_chr','pack_soc_begin_dis','pack_soc_stop_dis']
- cell_soc_mmm_name = ['cell_soc_begin_chr_max','cell_soc_begin_chr_min','cell_soc_begin_chr_diff','cell_soc_begin_chr_mean',
- 'cell_soc_begin_dis_max','cell_soc_begin_dis_min','cell_soc_begin_dis_diff','cell_soc_begin_dis_mean']
- soc_name = cell_soc_seg_begin_chr_name + cell_soc_seg_stop_chr_name + cell_soc_seg_begin_dis_name + cell_soc_seg_stop_dis_name + pack_soc_seg_name + cell_soc_mmm_name
- soc_name.append('dod')
- df_soc_seg = pd.DataFrame(columns=soc_name, index=df_in_list.index)
- cell_delta_energy_chr_name = ['cell_delta_energy_chr'+str(x) for x in range(1, CellVoltNums+1)]
- cell_delta_energy_dis_name = ['cell_delta_energy_dis'+str(x) for x in range(1, CellVoltNums+1)]
- delta_energy_name = cell_delta_energy_chr_name + cell_delta_energy_dis_name + ['pack_delta_energy_chr'] + ['pack_delta_energy_dis']
- df_energy = pd.DataFrame(columns=delta_energy_name, index=df_in_list.index)
- cell_res_name = ['cell_res'+str(x) for x in range(1, CellVoltNums+1)]
- res_name = cell_res_name + ['cell_res_mean'] + ['cell_res_diff']
- df_res = pd.DataFrame(columns=res_name, index=df_in_list.index)
- for i in tqdm(range(len(df_in_list))):
- try:
- sn = df_in_list['sn'][i]
- begin_time = datetime.datetime.strptime(df_in_list['time_st'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
- stop_time = datetime.datetime.strptime(df_in_list['time_sp'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
- start_time = datetime.datetime.strptime(df_in_list['time_st'][i].split()[0] + " 00:00:00", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
- end_time = datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0] + " 23:59:59", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S')
- cur_env = 'dev' # 设置运行环境
- app_path = "/home/limingze/zlwl-algos/" # 设置app绝对路径
- sysUtils = SysUtils(cur_env, app_path)
- hbase_params = sysUtils.get_cf_param('hbase-datafactory')
- iotp_service = IotpAlgoService(hbase_params=hbase_params)
- columns = [ DataField.time, DataField.sn, DataField.pack_crnt, DataField.pack_volt, DataField.pack_soc,
- DataField.cell_voltage, DataField.cell_temp, DataField.bms_sta, DataField.cell_voltage_count]
- df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time)
-
- data_clean = DataCleaning(df_data)
- data_clean = data_clean.revise_status_codes()
- data_clean['time'] = pd.to_datetime(data_clean['time'])
- df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time]
- df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta']
- first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min()
- # data_clean.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '_all.csv', encoding="utf_8_sig", index=False)
- j = 1
- while pd.isna(first_change_index) & (j <= 6):
- print("当前时间段内没有找到放电截止时刻,正在扩大数据范围.........")
- end_time = (datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0], '%Y-%m-%d') + datetime.timedelta(days=j)).strftime('%Y-%m-%d') + " 23:59:59"
- df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time)
- data_clean = DataCleaning(df_data)
- data_clean = data_clean.revise_status_codes()
- data_clean['time'] = pd.to_datetime(data_clean['time'])
- df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time]
- df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta']
- first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min()
- j += 1
- result_df = data_clean[data_clean['time'].between(begin_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])]
- result_df = result_df.reset_index(drop=True)
- # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False)
- if j <= 6:
- print('找到完整充放电片段')
- # 提取充放电片段信息
- pack_model_i = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0]
- pack_param_i = pack_param.loc[pack_param['pack_code'] == pack_model_i, 'param'].values[0]
- charge_ocv_v = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[1])
- charge_ocv_v_list = [float(n) for n in charge_ocv_v]
- charge_ocv_soc = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[2])
- charge_ocv_soc_list = [float(n) for n in charge_ocv_soc]
- cap = eval(json.loads(pack_param_i)['capacity'])
- cellvolt_name=['cell_voltage'+str(x) for x in range(1, CellVoltNums+1)]
- df_volt = result_df['cell_voltage'].apply(lambda x : pd.Series(list(x)[:CellVoltNums]))
- df_volt.columns = cellvolt_name[0:df_volt.shape[1]]
- df_volt = df_volt.astype('float')
- cellvoltmax = df_volt.max(axis=1)
- cellvoltmin = df_volt.min(axis=1)
- len_cell_volt = df_volt.shape[1]
- for p in range(1,CellVoltNums+1):
- if not ('cell_voltage' + str(p)) in df_volt:
- df_volt[('cell_voltage' + str(p))] = None
- result_df = pd.concat([result_df, df_volt],axis=1)
- df_volt[['cell_volt_max','cell_volt_min']] = pd.concat([cellvoltmax,cellvoltmin], axis=1)
-
- cellsoc_name = ['cell_soc'+str(x) for x in range(1, CellVoltNums+1)]
- df_soc = pd.DataFrame(columns=cellsoc_name, index=df_volt.index)
- interp_func = interp1d(charge_ocv_v_list, charge_ocv_soc_list)
- for ii in range(len(result_df)):
- for j in range(len_cell_volt):
- mask1 = (result_df['cell_voltage'+str(j)].values >= charge_ocv_v_list[0]) & (result_df['cell_voltage'+str(j)].values <= charge_ocv_v_list[-1])
- mask2 = result_df['cell_voltage'+str(j)].values < charge_ocv_v_list[0]
- mask3 = result_df['cell_voltage'+str(j)].values > charge_ocv_v_list[-1]
- result_df.loc[mask1, 'cell_soc'+str(j)] = interp_func(result_df['cell_volt'+str(j)].values[mask1])
- result_df.loc[mask2, 'cell_soc'+str(j)] = charge_ocv_soc_list[0]
- result_df.loc[mask3, 'cell_soc'+str(j)] = charge_ocv_soc_list[-1]
- # interp_func = interp1d(charge_ocv_v_list, charge_ocv_soc_list)
- # if (result_df['cell_voltage'+str(j + 1)][ii] >= charge_ocv_v_list[0]) & (result_df['cell_voltage'+str(j + 1)][ii] <= charge_ocv_v_list[-1]):
- # df_soc.iloc[ii,j] = interp_func(result_df['cell_voltage'+str(j + 1)][ii])
- # elif result_df['cell_voltage'+str(j + 1)][ii] < charge_ocv_v_list[0]:
- # df_soc.iloc[ii,j] = charge_ocv_soc_list[0]
- # elif result_df['cell_voltage'+str(j + 1)][ii] > charge_ocv_v_list[-1]:
- # df_soc.iloc[ii,j] = charge_ocv_soc_list[-1]
- cellsocmax = df_soc.max(axis=1)
- cellsocmin = df_soc.min(axis=1)
- cellsocmean = df_soc.mean(axis=1)
- cellsocdiff = cellsocmax - cellsocmin
- df_soc[['cell_soc_max','cell_soc_min','cell_soc_diff','cell_soc_mean']] = pd.concat([cellsocmax,cellsocmin,cellsocmean,cellsocdiff], axis=1)
- result_df = pd.concat([result_df, df_soc],axis=1)
- # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False)
- # 计算能量变化
- # 记录soh不一致性指标
- df_soh.loc[i,cellsoh_name] = df_cell_soh.iloc[i].values
- df_soh.loc[i,'cell_soh_max'] = df_cell_soh.iloc[i].max()
- df_soh.loc[i,'cell_soh_min'] = df_cell_soh.iloc[i].min()
- df_soh.loc[i,'cell_soh_mean'] = df_cell_soh.iloc[i].mean()
- df_soh.loc[i,'cell_soh_diff'] = df_cell_soh.iloc[i].max() - df_cell_soh.iloc[i].min()
- # 记录soc不一致性指标
- df_soc_seg.loc[i,cell_soc_seg_begin_chr_name] = result_df[result_df['time'] == df_in_list['time_st'][i]][cellsoc_name].values[0]
- df_soc_seg.loc[i,cell_soc_seg_stop_chr_name] = result_df[result_df['time'] == df_in_list['time_sp'][i]][cellsoc_name].values[0]
- df_soc_seg.loc[i,cell_soc_seg_begin_dis_name] = result_df[result_df['time'] == df_in_list['time_sp'][i]][cellsoc_name].values[0]
- df_soc_seg.loc[i,cell_soc_seg_stop_dis_name] = result_df[cellsoc_name].iloc[-1].values[0]
- df_soc_seg.loc[i,'pack_soc_begin_chr'] = result_df[result_df['time'] == df_in_list['time_st'][i]]['pack_soc'].values[0]
- df_soc_seg.loc[i,'pack_soc_stop_chr'] = result_df[result_df['time'] == df_in_list['time_sp'][i]]['pack_soc'].values[0]
- df_soc_seg.loc[i,'pack_soc_begin_dis'] = result_df[result_df['time'] == df_in_list['time_sp'][i]]['pack_soc'].values[0]
- df_soc_seg.loc[i,'pack_soc_stop_dis'] = result_df['pack_soc'].iloc[-1]
- df_soc_seg.loc[i,'cell_soc_begin_chr_max'] = df_soc_seg.iloc[i,0:CellVoltNums].max()
- df_soc_seg.loc[i,'cell_soc_begin_chr_min'] = df_soc_seg.iloc[i,0:CellVoltNums].min()
- df_soc_seg.loc[i,'cell_soc_begin_chr_diff'] = df_soc_seg['cell_soc_begin_chr_max'][i] - df_soc_seg['cell_soc_begin_chr_min'][i]
- df_soc_seg.loc[i,'cell_soc_begin_chr_mean'] = df_soc_seg.iloc[i,0:CellVoltNums].mean()
- df_soc_seg.loc[i,'cell_soc_begin_dis_max'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].max()
- df_soc_seg.loc[i,'cell_soc_begin_dis_min'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].min()
- df_soc_seg.loc[i,'cell_soc_begin_dis_diff'] = df_soc_seg['cell_soc_begin_dis_max'][i] - df_soc_seg['cell_soc_begin_dis_min'][i]
- df_soc_seg.loc[i,'cell_soc_begin_dis_mean'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].mean()
- df_soc_seg.loc[i,'dod'] = df_soc_seg['pack_soc_begin_dis'][i] - df_soc_seg['pack_soc_stop_dis'][i]
- df_energy.loc[i,cell_delta_energy_chr_name] = (((df_soc_seg.loc[i,cell_soc_seg_stop_chr_name].reset_index(drop=True)- df_soc_seg.loc[i,cell_soc_seg_begin_chr_name].reset_index(drop=True)) * cap) / CellVoltNums).values
- df_energy.loc[i,cell_delta_energy_dis_name] = (((df_soc_seg.loc[i,cell_soc_seg_begin_dis_name].reset_index(drop=True) - df_soc_seg.loc[i,cell_soc_seg_stop_dis_name].reset_index(drop=True)) * cap) / CellVoltNums).values
- df_energy.loc[i,'pack_delta_energy_chr'] = (df_soc_seg.loc[i,'pack_soc_stop_chr'] - df_soc_seg.loc[i,'pack_soc_begin_chr']) * cap
- df_energy.loc[i,'pack_delta_energy_dis'] = (df_soc_seg.loc[i,'pack_soc_stop_dis'] - df_soc_seg.loc[i,'pack_soc_begin_dis']) * cap
- #记录内阻不一致指标
- df_dis = result_df[result_df['time'].between(stop_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])]
- df_dis = df_dis[df_dis['bms_sta'] == 3]
- df_dis = df_dis.reset_index(drop=True)
- df_fenduan = [df_dis[i:i+30] for i in range(0, len(df_dis), 30)]
- df_res.loc[i,cell_res_name] = np.mean(np.array([(df_fenduan[i][cellvolt_name].iloc[0]-df_fenduan[i][cellvolt_name].iloc[-1]).div(df_fenduan[i]['pack_crnt'].mean()).values for i in range(len(df_fenduan))]), axis=0)
- # df_res.loc[i,cell_res_name] = (df_dis[cellvolt_name].iloc[0]-df_dis[cellvolt_name].iloc[-1]).div(df_dis['pack_crnt'].mean()).values
- df_res.loc[i,'cell_res_mean'] = df_res.loc[i,cell_res_name].mean()
- df_res.loc[i,'cell_res_diff'] = df_res.loc[i,cell_res_name].max() - df_res.loc[i,cell_res_name].min()
- df_final = pd.concat([df_in_list, df_soh, df_soc_seg, df_energy, df_res],axis=1)
- df_final.to_csv(output_path + 'update.csv', encoding="utf_8_sig", index=False)
- except Exception as e:
- continue
- df_final = pd.concat([df_in_list, df_soh, df_soc_seg, df_energy, df_res],axis=1)
- df_final.to_csv(output_path + '_all.csv', encoding="utf_8_sig", index=False)
|