|
@@ -174,6 +174,142 @@ class DataPreProcess:
|
|
|
df['data_split_by_status'] = status_id
|
|
|
df['data_status'] = status_list
|
|
|
return df
|
|
|
+ def data_split_by_status_forMGMCUD02(self, dfin, drive_interval_threshold=120, charge_interval_threshold=300,
|
|
|
+ drive_stand_threshold=120, charge_stand_threshold=300):
|
|
|
+ '''
|
|
|
+ # 数据预处理分段, 将原始数据段分为 charge、drive、stand、none段
|
|
|
+ # 状态判断
|
|
|
+ # 1、drive:(状态为2或3 且 存在电流>0 ) 或 (电流持续为0 且 持续时间<阈值 且 上一段数据为行车)
|
|
|
+ # 2、charge:(状态为2或3 且 不存在电流>0 ) 或 (电流持续为0 且 持续时间<阈值 且 上一段数据为充电)
|
|
|
+ # 3、stand:(电流持续为0 且 是数据段的第一段) 或 (电流持续为0 且 持续时间>阈值)
|
|
|
+ # 4、none: 其他
|
|
|
+
|
|
|
+ --------------输入参数-------------:
|
|
|
+ drive_interval_threshold: 行车段拼接阈值,如果两段行车的间隔时间小于该值,则两段行车合并。
|
|
|
+ charge_interval_threshold: 充电段拼接阈值,如果两段充电的间隔时间小于该值,则两段充电合并。
|
|
|
+ drive_stand_threshold: 静置段合并至行车段阈值,如果静置时间小于该值,则合并到上一段的行车中。
|
|
|
+ charge_stand_threshold: 静置段合并至充电段阈值,如果静置时间小于该值,则合并到上一段的充电中。
|
|
|
+
|
|
|
+ --------------输出-----------------:
|
|
|
+ 在原始数据后面,增加data_split_by_crnt, data_split_by_status, data_status 三列
|
|
|
+ data_split_by_crnt: 按电流分段的序号
|
|
|
+ data_split_by_status:按电流和状态分段的序号
|
|
|
+ data_status: 状态标识
|
|
|
+ '''
|
|
|
+ # 首先根据电流是否为0 ,将数据分段
|
|
|
+ df = dfin.copy()
|
|
|
+ df['时间戳'] = pd.to_datetime(df['时间戳'])
|
|
|
+
|
|
|
+ crnt_zero_or_not = df['总电流[A]']==0
|
|
|
+ last_crnt_flag = crnt_zero_or_not[0]
|
|
|
+ temp = 1
|
|
|
+ group_id = [temp]
|
|
|
+ for cur_crnt_flag in crnt_zero_or_not[1:]:
|
|
|
+ if last_crnt_flag ^ cur_crnt_flag:
|
|
|
+ temp = temp + 1
|
|
|
+ last_crnt_flag = cur_crnt_flag
|
|
|
+ group_id.append(temp)
|
|
|
+ df['data_split_by_crnt'] = group_id
|
|
|
+
|
|
|
+ # 然后判断每个段内的 充电状态及电流=0持续时长,决定当前状态
|
|
|
+ temp = 1
|
|
|
+ last_status = ""
|
|
|
+ status_id = []
|
|
|
+ status_list = []
|
|
|
+ data_number_list = sorted(list(set(df['data_split_by_crnt'])))
|
|
|
+
|
|
|
+ for data_number in data_number_list:
|
|
|
+ df_sel = df[df['data_split_by_crnt'] == data_number]
|
|
|
+ origin_index = list(df_sel.index)
|
|
|
+ df_sel = df_sel.reset_index(drop=True)
|
|
|
+ temp_2 = 0
|
|
|
+ # 如果当前数据段的电流非0,则可能分为charge、drive或none段
|
|
|
+ if df_sel.loc[0,'总电流[A]'] != 0:
|
|
|
+ # 电流 分段中可能存在状态变化的时刻, 内部根据状态进行分段.
|
|
|
+ # 该数据段内部,根据bms状态信号进行二次分段
|
|
|
+ status_drive_or_not = df_sel['充电状态']==3
|
|
|
+ last_status_flag = status_drive_or_not[0]
|
|
|
+ temp_2 = 0
|
|
|
+ group_id_2 = [temp_2]
|
|
|
+ for cur_status_flag in status_drive_or_not[1:]:
|
|
|
+ if last_status_flag ^ cur_status_flag:
|
|
|
+ temp_2 = temp_2 + 1
|
|
|
+ last_status_flag = cur_status_flag
|
|
|
+ group_id_2.append(temp_2)
|
|
|
+
|
|
|
+ # 遍历二次状态分段
|
|
|
+ temp_2 = 0
|
|
|
+ last_status_2 = last_status
|
|
|
+ df_sel['index'] = group_id_2
|
|
|
+ data_number_list_2 = sorted(list(set(group_id_2)))
|
|
|
+ for data_number_2 in data_number_list_2:
|
|
|
+
|
|
|
+ df_sel_2 = df_sel[df_sel['index'] == data_number_2]
|
|
|
+ df_sel_2 = df_sel_2.reset_index(drop=True)
|
|
|
+
|
|
|
+ # 根据bms状态 及 电流符号决定是charge还是drive
|
|
|
+ # 如果状态为2或3, 且电流均<=0 则记为充电
|
|
|
+ if df_sel_2.loc[0, '充电状态'] in [2, 3] and len(df_sel_2[df_sel_2['总电流[A]'] > 0]) == 0:
|
|
|
+ cur_status = 'charge'
|
|
|
+ # 如果状态为2或3,且存在电流>0 则记为行车
|
|
|
+ elif df_sel_2.loc[0, '充电状态'] in [2, 3] and len(df_sel_2[df_sel_2['总电流[A]'] > 0]) > 0:
|
|
|
+ cur_status = 'drive'
|
|
|
+ # 否则 记为none
|
|
|
+ else:
|
|
|
+ cur_status = 'none'
|
|
|
+ status_list.extend([cur_status] * len(df_sel_2))
|
|
|
+
|
|
|
+ # 状态id号与前面电流为0的相同状态进行合并, 均判断应不应该与上一段合并
|
|
|
+ if origin_index[0] == 0: # 如果是所有数据的起始段数据,则直接赋值id号
|
|
|
+ status_id.extend([temp + temp_2]*len(df_sel_2))
|
|
|
+
|
|
|
+ else: # 判断是否与上一段数据合并
|
|
|
+ deltaT = (df.loc[origin_index[0], '时间戳'] - df.loc[origin_index[0]-1, '时间戳']).total_seconds()
|
|
|
+ # 如果 状态一致, 且 间隔时间小于阈值,则合并
|
|
|
+ if last_status_2 == 'drive' and cur_status == last_status_2 and deltaT < drive_interval_threshold:
|
|
|
+ temp_2 = temp_2 - 1
|
|
|
+ status_id.extend([temp + temp_2]*len(df_sel_2))
|
|
|
+ # 如果状态一致, 且 间隔时间小于阈值,则合并
|
|
|
+ elif last_status_2 == 'charge' and cur_status == last_status_2 and deltaT < charge_interval_threshold:
|
|
|
+ temp_2 = temp_2 - 1
|
|
|
+ status_id.extend([temp + temp_2]*len(df_sel_2))
|
|
|
+ else:
|
|
|
+ status_id.extend([temp + temp_2]*len(df_sel_2))
|
|
|
+ temp_2 = temp_2 + 1
|
|
|
+ last_status_2 = status_list[-1]
|
|
|
+ temp_2 = temp_2 - 1
|
|
|
+ else:
|
|
|
+ # 如果当前数据段的电流为0,则可能分为stand,charge、drive或none段
|
|
|
+ if origin_index[0] == 0: # 如果是数据的起始,则无论长短,都认为是stand
|
|
|
+ status_id.extend([temp]*len(df_sel))
|
|
|
+ status_list.extend(['stand'] * len(df_sel))
|
|
|
+ else: # 不是数据的起始
|
|
|
+ cur_deltaT = (df.loc[origin_index[-1], '时间戳'] - df.loc[origin_index[0], '时间戳']).total_seconds()
|
|
|
+ if last_status == 'charge': # 如果上一个状态为充电
|
|
|
+ if cur_deltaT < charge_stand_threshold: # 如果本次电流为0的持续时间小于 阈值,则合并
|
|
|
+ status_list.extend(['charge'] * len(df_sel))
|
|
|
+ temp = temp - 1
|
|
|
+ status_id.extend([temp]*len(df_sel))
|
|
|
+ else: # 否则超过了阈值,记为stand
|
|
|
+ status_id.extend([temp]*len(df_sel))
|
|
|
+ status_list.extend(['stand'] * len(df_sel))
|
|
|
+ elif last_status == 'drive': # 如果上一个状态为行车
|
|
|
+ if cur_deltaT < drive_stand_threshold: # 如果本次电流为0的持续时间小于 阈值,则合并
|
|
|
+ status_list.extend(['drive'] * len(df_sel))
|
|
|
+ temp = temp - 1
|
|
|
+ status_id.extend([temp]*len(df_sel))
|
|
|
+ else: # 否则超过了阈值,记为stand
|
|
|
+ status_id.extend([temp]*len(df_sel))
|
|
|
+ status_list.extend(['stand'] * len(df_sel))
|
|
|
+ elif last_status == 'none': # 如果上一个状态未知
|
|
|
+ status_id.extend([temp] * len(df_sel))
|
|
|
+ status_list.extend(['stand'] * len(df_sel))
|
|
|
+ temp = temp + temp_2 + 1
|
|
|
+ last_status = status_list[-1] # 上一组状态
|
|
|
+ df['data_split_by_status'] = status_id
|
|
|
+ df['data_status'] = status_list
|
|
|
+ return df
|
|
|
+
|
|
|
def data_split_by_time(self, dfin, default_time_threshold = 300, drive_time_threshold=300, charge_time_threshold=300,
|
|
|
stand_time_threshold = 1800):
|
|
|
'''
|
|
@@ -488,6 +624,8 @@ class DataPreProcess:
|
|
|
res_record['charge'] = (res_record['charge'])/len(set(df_bms[df_bms['data_status_after_combine']=='charge']['data_split_by_status_after_combine']))
|
|
|
return df_bms, df_gps, res_record
|
|
|
|
|
|
+
|
|
|
+
|
|
|
'''
|
|
|
为故障数据打标签
|
|
|
sn: 电池编码
|