import pandas as pd import numpy as np from datetime import datetime import datetime from tqdm import tqdm from scipy.interpolate import interp1d from ZlwlAlgosCommon.utils.ProUtils import * from ZlwlAlgosCommon.service.iotp.IotpAlgoService import IotpAlgoService from ZlwlAlgosCommon.service.iotp.Beans import DataField from ZlwlAlgosCommon.orm.models import * from Data_Cleaning_oop_thr import DataCleaning input_file_list = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_soh_zk.csv' output_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/' t_device_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/t_device_zk.csv' pack_param_path = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/algo_pack_param.csv' history_file = '/home/limingze/zlwl-algos/USER/limingze/Inconsistency/Data/update.csv' df_in_list = pd.read_csv(input_file_list) t_device = pd.read_csv(t_device_path) pack_param = pd.read_csv(pack_param_path) history_df = pd.read_csv(history_file) CellVoltNums_list = [] for a in tqdm(range(len(df_in_list))): sn = df_in_list['sn'][a] pack_model_a = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0] pack_param_a = pack_param.loc[pack_param['pack_code'] == pack_model_a, 'param'].values[0] CellVoltNums_a = eval(json.loads(pack_param_a)['CellVoltTotalCount']) CellVoltNums_list.append(CellVoltNums_a) CellVoltNums = max(CellVoltNums_list) # soh # df_cell_soh = df_in_list['cellsoh'].apply(lambda x : pd.Series(eval(x)[:CellVoltNums])) # cellsoh_name=['cell_soh'+str(x) for x in range(1, CellVoltNums+1)] # df_soh_mmm_name = ['cell_soh_max','cell_soh_min','cell_soh_diff','cell_soh_mean'] # soh_name = cellsoh_name + df_soh_mmm_name # df_soh = pd.DataFrame(columns=soh_name, index=df_in_list.index) cell_soc_seg_begin_chr_name = ['cell_soc_begin_chr'+str(x) for x in range(1, CellVoltNums+1)] cell_soc_seg_stop_chr_name = ['cell_soc_stop_chr'+str(x) for x in range(1, CellVoltNums+1)] cell_soc_seg_begin_dis_name = ['cell_soc_begin_dis'+str(x) for x in range(1, CellVoltNums+1)] cell_soc_seg_stop_dis_name = ['cell_soc_stop_dis'+str(x) for x in range(1, CellVoltNums+1)] pack_soc_seg_name = ['pack_soc_begin_chr','pack_soc_stop_chr','pack_soc_begin_dis','pack_soc_stop_dis'] cell_soc_mmm_name = ['cell_soc_begin_chr_max','cell_soc_begin_chr_min','cell_soc_begin_chr_diff','cell_soc_begin_chr_mean', 'cell_soc_begin_dis_max','cell_soc_begin_dis_min','cell_soc_begin_dis_diff','cell_soc_begin_dis_mean'] soc_name = cell_soc_seg_begin_chr_name + cell_soc_seg_stop_chr_name + cell_soc_seg_begin_dis_name + cell_soc_seg_stop_dis_name + pack_soc_seg_name + cell_soc_mmm_name soc_name.append('dod') df_soc_seg = pd.DataFrame(columns=soc_name, index=df_in_list.index) cell_delta_energy_chr_name = ['cell_delta_energy_chr'+str(x) for x in range(1, CellVoltNums+1)] cell_delta_energy_dis_name = ['cell_delta_energy_dis'+str(x) for x in range(1, CellVoltNums+1)] delta_energy_name = cell_delta_energy_chr_name + cell_delta_energy_dis_name + ['pack_delta_energy_chr'] + ['pack_delta_energy_dis'] df_energy = pd.DataFrame(columns=delta_energy_name, index=df_in_list.index) cell_res_name = ['cell_res'+str(x) for x in range(1, CellVoltNums+1)] res_name = cell_res_name + ['cell_res_mean'] + ['cell_res_diff'] df_res = pd.DataFrame(columns=res_name, index=df_in_list.index) for i in tqdm(range(len(df_in_list))): sn = df_in_list['sn'][i] begin_time = datetime.datetime.strptime(df_in_list['time_st'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S') stop_time = datetime.datetime.strptime(df_in_list['time_sp'][i], '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S') start_time = datetime.datetime.strptime(df_in_list['time_st'][i].split()[0] + " 00:00:00", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S') end_time = datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0] + " 23:59:59", '%Y-%m-%d %H:%M:%S').strftime('%Y-%m-%d %H:%M:%S') cur_env = 'dev' # 设置运行环境 app_path = "/home/limingze/zlwl-algos/" # 设置app绝对路径 sysUtils = SysUtils(cur_env, app_path) hbase_params = sysUtils.get_cf_param('hbase-datafactory') iotp_service = IotpAlgoService(hbase_params=hbase_params) columns = [ DataField.time, DataField.sn, DataField.pack_crnt, DataField.pack_volt, DataField.pack_soc, DataField.cell_voltage, DataField.cell_temp, DataField.bms_sta, DataField.cell_voltage_count] df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time) data_clean = DataCleaning(df_data) data_clean = data_clean.revise_status_codes() data_clean['time'] = pd.to_datetime(data_clean['time']) df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time] df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta'] first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min() # data_clean.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '_all.csv', encoding="utf_8_sig", index=False) j = 1 while pd.isna(first_change_index) & (j <= 6): print("当前时间段内没有找到放电截止时刻,正在扩大数据范围.........") end_time = (datetime.datetime.strptime(df_in_list['time_sp'][i].split()[0], '%Y-%m-%d') + datetime.timedelta(days=j)).strftime('%Y-%m-%d') + " 23:59:59" df_data = iotp_service.get_data(sn_list=[sn], columns=columns, start_time=start_time, end_time=end_time) data_clean = DataCleaning(df_data) data_clean = data_clean.revise_status_codes() data_clean['time'] = pd.to_datetime(data_clean['time']) df_begin_charge_timestamp = data_clean[data_clean['time'] > stop_time] df_begin_charge_timestamp['bms_sta_change'] = df_begin_charge_timestamp['bms_sta'].shift(-1) - df_begin_charge_timestamp['bms_sta'] first_change_index = df_begin_charge_timestamp[(df_begin_charge_timestamp['bms_sta_change'] == 18) | (df_begin_charge_timestamp['bms_sta_change'] == 21)].index.min() j += 1 # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False) if j <= 6: result_df = data_clean[data_clean['time'].between(begin_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])] result_df = result_df.reset_index(drop=True) print('找到完整充放电片段') # 提取充放电片段信息 pack_model_i = t_device.loc[t_device['sn'] == sn, 'pack_model'].values[0] pack_param_i = pack_param.loc[pack_param['pack_code'] == pack_model_i, 'param'].values[0] charge_ocv_v = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[1]) charge_ocv_v_list = [float(n) for n in charge_ocv_v] charge_ocv_soc = re.findall(r'\d+(?:\.\d+)?', pack_param_i.split("[")[2]) charge_ocv_soc_list = [float(n) for n in charge_ocv_soc] cap = eval(json.loads(pack_param_i)['capacity']) cellvolt_name=['cell_voltage'+str(x) for x in range(1, CellVoltNums+1)] df_volt = result_df['cell_voltage'].apply(lambda x : pd.Series(list(x)[:CellVoltNums])) df_volt.columns = cellvolt_name[0:df_volt.shape[1]] df_volt = df_volt.astype('float') cellvoltmax = df_volt.max(axis=1) cellvoltmin = df_volt.min(axis=1) len_cell_volt = df_volt.shape[1] for p in range(1,CellVoltNums+1): if not ('cell_voltage' + str(p)) in df_volt: df_volt[('cell_voltage' + str(p))] = None result_df = pd.concat([result_df, df_volt],axis=1) df_volt[['cell_volt_max','cell_volt_min']] = pd.concat([cellvoltmax,cellvoltmin], axis=1) cellsoc_name = ['cell_soc'+str(x) for x in range(1, CellVoltNums+1)] df_soc = pd.DataFrame(columns=cellsoc_name, index=df_volt.index) interp_func = interp1d(charge_ocv_v_list, charge_ocv_soc_list) for j in range(len_cell_volt): mask1 = (result_df['cell_voltage'+str(j+1)].values >= charge_ocv_v_list[0]) & (result_df['cell_voltage'+str(j+1)].values <= charge_ocv_v_list[-1]) mask2 = result_df['cell_voltage'+str(j+1)].values < charge_ocv_v_list[0] mask3 = result_df['cell_voltage'+str(j+1)].values > charge_ocv_v_list[-1] df_soc.loc[mask1, 'cell_soc'+str(j+1)] = interp_func(result_df['cell_voltage'+str(j+1)].values[mask1]) df_soc.loc[mask2, 'cell_soc'+str(j+1)] = charge_ocv_soc_list[0] df_soc.loc[mask3, 'cell_soc'+str(j+1)] = charge_ocv_soc_list[-1] cellsocmax = df_soc.max(axis=1) cellsocmin = df_soc.min(axis=1) cellsocmean = df_soc.mean(axis=1) cellsocdiff = cellsocmax - cellsocmin df_soc[['cell_soc_max','cell_soc_min','cell_soc_diff','cell_soc_mean']] = pd.concat([cellsocmax,cellsocmin,cellsocmean,cellsocdiff], axis=1) result_df = pd.concat([result_df, df_soc],axis=1) # result_df.to_csv(output_path + str(sn) + '_' + str(start_time).split()[0] + '_' + str(end_time).split()[0] + '.csv', encoding="utf_8_sig", index=False) # 计算能量变化 # 记录soh不一致性指标 # df_soh.loc[i,cellsoh_name] = df_cell_soh.iloc[i].values # df_soh.loc[i,'cell_soh_max'] = df_cell_soh.iloc[i].max() # df_soh.loc[i,'cell_soh_min'] = df_cell_soh.iloc[i].min() # df_soh.loc[i,'cell_soh_mean'] = df_cell_soh.iloc[i].mean() # df_soh.loc[i,'cell_soh_diff'] = df_cell_soh.iloc[i].max() - df_cell_soh.iloc[i].min() # 记录soc不一致性指标 df_soc_seg.loc[i,cell_soc_seg_begin_chr_name] = result_df[result_df['time'] >= df_in_list['time_st'][i]].iloc[0][cellsoc_name].values df_soc_seg.loc[i,cell_soc_seg_stop_chr_name] = result_df[result_df['time'] <= df_in_list['time_sp'][i]].iloc[-1][cellsoc_name].values df_soc_seg.loc[i,cell_soc_seg_begin_dis_name] = result_df[result_df['time'] <= df_in_list['time_sp'][i]].iloc[-1][cellsoc_name].values df_soc_seg.loc[i,cell_soc_seg_stop_dis_name] = result_df[cellsoc_name].iloc[-1].values df_soc_seg.loc[i,'pack_soc_begin_chr'] = result_df[result_df['time'] >= df_in_list['time_st'][i]].iloc[0]['pack_soc'] df_soc_seg.loc[i,'pack_soc_stop_chr'] = result_df[result_df['time'] <= df_in_list['time_sp'][i]].iloc[-1]['pack_soc'] df_soc_seg.loc[i,'pack_soc_begin_dis'] = result_df[result_df['time'] <= df_in_list['time_sp'][i]].iloc[-1]['pack_soc'] df_soc_seg.loc[i,'pack_soc_stop_dis'] = result_df['pack_soc'].iloc[-1] df_soc_seg.loc[i,'cell_soc_begin_chr_max'] = df_soc_seg.iloc[i,0:CellVoltNums].max() df_soc_seg.loc[i,'cell_soc_begin_chr_min'] = df_soc_seg.iloc[i,0:CellVoltNums].min() df_soc_seg.loc[i,'cell_soc_begin_chr_diff'] = df_soc_seg['cell_soc_begin_chr_max'][i] - df_soc_seg['cell_soc_begin_chr_min'][i] df_soc_seg.loc[i,'cell_soc_begin_chr_mean'] = df_soc_seg.iloc[i,0:CellVoltNums].mean() df_soc_seg.loc[i,'cell_soc_begin_dis_max'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].max() df_soc_seg.loc[i,'cell_soc_begin_dis_min'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].min() df_soc_seg.loc[i,'cell_soc_begin_dis_diff'] = df_soc_seg['cell_soc_begin_dis_max'][i] - df_soc_seg['cell_soc_begin_dis_min'][i] df_soc_seg.loc[i,'cell_soc_begin_dis_mean'] = df_soc_seg.iloc[i,CellVoltNums:2*CellVoltNums].mean() df_soc_seg.loc[i,'dod'] = df_soc_seg['pack_soc_begin_dis'][i] - df_soc_seg['pack_soc_stop_dis'][i] df_energy.loc[i,cell_delta_energy_chr_name] = (((df_soc_seg.loc[i,cell_soc_seg_stop_chr_name].reset_index(drop=True)- df_soc_seg.loc[i,cell_soc_seg_begin_chr_name].reset_index(drop=True)) * cap) / CellVoltNums).values df_energy.loc[i,cell_delta_energy_dis_name] = (((df_soc_seg.loc[i,cell_soc_seg_begin_dis_name].reset_index(drop=True) - df_soc_seg.loc[i,cell_soc_seg_stop_dis_name].reset_index(drop=True)) * cap) / CellVoltNums).values df_energy.loc[i,'pack_delta_energy_chr'] = (df_soc_seg.loc[i,'pack_soc_stop_chr'] - df_soc_seg.loc[i,'pack_soc_begin_chr']) * cap df_energy.loc[i,'pack_delta_energy_dis'] = (df_soc_seg.loc[i,'pack_soc_stop_dis'] - df_soc_seg.loc[i,'pack_soc_begin_dis']) * cap #记录内阻不一致指标 df_dis = result_df[result_df['time'].between(stop_time, df_begin_charge_timestamp.loc[first_change_index, 'time'])] df_dis = df_dis[df_dis['bms_sta'] == 3] df_dis = df_dis.reset_index(drop=True) df_fenduan = [df_dis[i:i+30] for i in range(0, len(df_dis), 30)] df_res.loc[i,cell_res_name] = np.mean(np.array([(df_fenduan[i][cellvolt_name].iloc[0]-df_fenduan[i][cellvolt_name].iloc[-1]).div(df_fenduan[i]['pack_crnt'].mean()).values for i in range(len(df_fenduan))]), axis=0) # df_res.loc[i,cell_res_name] = (df_dis[cellvolt_name].iloc[0]-df_dis[cellvolt_name].iloc[-1]).div(df_dis['pack_crnt'].mean()).values df_res.loc[i,'cell_res_mean'] = df_res.loc[i,cell_res_name].mean() df_res.loc[i,'cell_res_diff'] = df_res.loc[i,cell_res_name].max() - df_res.loc[i,cell_res_name].min() df_final = pd.concat([df_in_list, df_soc_seg, df_energy, df_res],axis=1) df_final.to_csv(output_path + 'update_zk.csv', encoding="utf_8_sig", index=False) df_final = pd.concat([df_in_list, df_soc_seg, df_energy, df_res],axis=1) df_final.to_csv(output_path + '_all_zk.csv', encoding="utf_8_sig", index=False)