DataPreProcessMGMC.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. from os import defpath
  2. import pandas as pd
  3. import numpy as np
  4. import pdb
  5. from numba import jit
  6. from LIB.BACKEND import Tools
  7. class DataPreProcess:
  8. def __init__(self):
  9. self.tools = Tools.Tools()
  10. pass
  11. # def data_split(self, dfin, drive_interval_threshold=120, charge_interval_threshold=300,
  12. # drive_stand_threshold=120, charge_stand_threshold=300,
  13. # default_time_threshold = 300, drive_time_threshold=300, charge_time_threshold=300,
  14. # stand_time_threshold = 1800):
  15. # '''
  16. # 数据分段函数,会调用_data_split_by_status和_data_split_by_time函数。
  17. # 其中_data_split_by_status 将数据分为charge、drive、stand、和none段;
  18. # _data_split_by_time 将每个段内的数据,根据时间跳变继续分段。
  19. # '''
  20. def time_filter(self, df_bms, df_gps):
  21. df_bms.drop_duplicates(subset=['时间戳'], keep='first', inplace=True)
  22. df_gps.drop_duplicates(subset=['时间戳'], keep='first', inplace=True)
  23. df_bms = df_bms.reset_index(drop=True)
  24. df_gps = df_gps.reset_index(drop=True)
  25. return df_bms, df_gps
  26. def data_split_by_status(self, dfin, drive_interval_threshold=120, charge_interval_threshold=300,
  27. drive_stand_threshold=120, charge_stand_threshold=300):
  28. '''
  29. # 数据预处理分段, 将原始数据段分为 charge、drive、stand、none段
  30. # 状态判断
  31. # 1、drive:(状态为2或3 且 存在电流>0 ) 或 (电流持续为0 且 持续时间<阈值 且 上一段数据为行车)
  32. # 2、charge:(状态为2或3 且 不存在电流>0 ) 或 (电流持续为0 且 持续时间<阈值 且 上一段数据为充电)
  33. # 3、stand:(电流持续为0 且 是数据段的第一段) 或 (电流持续为0 且 持续时间>阈值)
  34. # 4、none: 其他
  35. --------------输入参数-------------:
  36. drive_interval_threshold: 行车段拼接阈值,如果两段行车的间隔时间小于该值,则两段行车合并。
  37. charge_interval_threshold: 充电段拼接阈值,如果两段充电的间隔时间小于该值,则两段充电合并。
  38. drive_stand_threshold: 静置段合并至行车段阈值,如果静置时间小于该值,则合并到上一段的行车中。
  39. charge_stand_threshold: 静置段合并至充电段阈值,如果静置时间小于该值,则合并到上一段的充电中。
  40. --------------输出-----------------:
  41. 在原始数据后面,增加data_split_by_crnt, data_split_by_status, data_status 三列
  42. data_split_by_crnt: 按电流分段的序号
  43. data_split_by_status:按电流和状态分段的序号
  44. data_status: 状态标识
  45. '''
  46. # 首先根据电流是否为0 ,将数据分段
  47. df = dfin.copy()
  48. df['时间戳'] = pd.to_datetime(df['时间戳'])
  49. crnt_zero_or_not = df['总电流[A]']==0
  50. last_crnt_flag = crnt_zero_or_not[0]
  51. temp = 1
  52. group_id = [temp]
  53. for cur_crnt_flag in crnt_zero_or_not[1:]:
  54. if last_crnt_flag ^ cur_crnt_flag:
  55. temp = temp + 1
  56. last_crnt_flag = cur_crnt_flag
  57. group_id.append(temp)
  58. df['data_split_by_crnt'] = group_id
  59. # 然后判断每个段内的 充电状态及电流=0持续时长,决定当前状态
  60. temp = 1
  61. last_status = ""
  62. status_id = []
  63. status_list = []
  64. data_number_list = sorted(list(set(df['data_split_by_crnt'])))
  65. for data_number in data_number_list:
  66. df_sel = df[df['data_split_by_crnt'] == data_number]
  67. origin_index = list(df_sel.index)
  68. df_sel = df_sel.reset_index(drop=True)
  69. temp_2 = 0
  70. # 如果当前数据段的电流非0,则可能分为charge、drive或none段
  71. if df_sel.loc[0,'总电流[A]'] != 0:
  72. # 电流 分段中可能存在状态变化的时刻, 内部根据状态进行分段.
  73. # 该数据段内部,根据bms状态信号进行二次分段
  74. status_drive_or_not = df_sel['充电状态']==3
  75. last_status_flag = status_drive_or_not[0]
  76. temp_2 = 0
  77. group_id_2 = [temp_2]
  78. for cur_status_flag in status_drive_or_not[1:]:
  79. if last_status_flag ^ cur_status_flag:
  80. temp_2 = temp_2 + 1
  81. last_status_flag = cur_status_flag
  82. group_id_2.append(temp_2)
  83. # 遍历二次状态分段
  84. temp_2 = 0
  85. last_status_2 = last_status
  86. df_sel['index'] = group_id_2
  87. data_number_list_2 = sorted(list(set(group_id_2)))
  88. for data_number_2 in data_number_list_2:
  89. df_sel_2 = df_sel[df_sel['index'] == data_number_2]
  90. df_sel_2 = df_sel_2.reset_index(drop=True)
  91. # 根据bms状态 及 电流符号决定是charge还是drive
  92. # 如果状态为2或3, 且电流均>=0 则记为充电
  93. if df_sel_2.loc[0, '充电状态'] in [2, 3] and len(df_sel_2[df_sel_2['总电流[A]'] < 0]) == 0:
  94. cur_status = 'charge'
  95. # 如果状态为2或3,且存在电流<0 则记为行车
  96. elif df_sel_2.loc[0, '充电状态'] in [2, 3] and len(df_sel_2[df_sel_2['总电流[A]'] < 0]) > 0:
  97. cur_status = 'drive'
  98. # 否则 记为none
  99. else:
  100. cur_status = 'none'
  101. status_list.extend([cur_status] * len(df_sel_2))
  102. # 状态id号与前面电流为0的相同状态进行合并, 均判断应不应该与上一段合并
  103. if origin_index[0] == 0: # 如果是所有数据的起始段数据,则直接赋值id号
  104. status_id.extend([temp + temp_2]*len(df_sel_2))
  105. else: # 判断是否与上一段数据合并
  106. deltaT = (df.loc[origin_index[0], '时间戳'] - df.loc[origin_index[0]-1, '时间戳']).total_seconds()
  107. # 如果 状态一致, 且 间隔时间小于阈值,则合并
  108. if last_status_2 == 'drive' and cur_status == last_status_2 and deltaT < drive_interval_threshold:
  109. temp_2 = temp_2 - 1
  110. status_id.extend([temp + temp_2]*len(df_sel_2))
  111. # 如果状态一致, 且 间隔时间小于阈值,则合并
  112. elif last_status_2 == 'charge' and cur_status == last_status_2 and deltaT < charge_interval_threshold:
  113. temp_2 = temp_2 - 1
  114. status_id.extend([temp + temp_2]*len(df_sel_2))
  115. else:
  116. status_id.extend([temp + temp_2]*len(df_sel_2))
  117. temp_2 = temp_2 + 1
  118. last_status_2 = status_list[-1]
  119. temp_2 = temp_2 - 1
  120. else:
  121. # 如果当前数据段的电流为0,则可能分为stand,charge、drive或none段
  122. if origin_index[0] == 0: # 如果是数据的起始,则无论长短,都认为是stand
  123. status_id.extend([temp]*len(df_sel))
  124. status_list.extend(['stand'] * len(df_sel))
  125. else: # 不是数据的起始
  126. cur_deltaT = (df.loc[origin_index[-1], '时间戳'] - df.loc[origin_index[0], '时间戳']).total_seconds()
  127. if last_status == 'charge': # 如果上一个状态为充电
  128. if cur_deltaT < charge_stand_threshold: # 如果本次电流为0的持续时间小于 阈值,则合并
  129. status_list.extend(['charge'] * len(df_sel))
  130. temp = temp - 1
  131. status_id.extend([temp]*len(df_sel))
  132. else: # 否则超过了阈值,记为stand
  133. status_id.extend([temp]*len(df_sel))
  134. status_list.extend(['stand'] * len(df_sel))
  135. elif last_status == 'drive': # 如果上一个状态为行车
  136. if cur_deltaT < drive_stand_threshold: # 如果本次电流为0的持续时间小于 阈值,则合并
  137. status_list.extend(['drive'] * len(df_sel))
  138. temp = temp - 1
  139. status_id.extend([temp]*len(df_sel))
  140. else: # 否则超过了阈值,记为stand
  141. status_id.extend([temp]*len(df_sel))
  142. status_list.extend(['stand'] * len(df_sel))
  143. elif last_status == 'none': # 如果上一个状态未知
  144. status_id.extend([temp] * len(df_sel))
  145. status_list.extend(['stand'] * len(df_sel))
  146. temp = temp + temp_2 + 1
  147. last_status = status_list[-1] # 上一组状态
  148. df['data_split_by_status'] = status_id
  149. df['data_status'] = status_list
  150. return df
  151. def data_split_by_time(self, dfin, default_time_threshold = 300, drive_time_threshold=300, charge_time_threshold=300,
  152. stand_time_threshold = 1800):
  153. '''
  154. # 该函数用来解决数据丢失问题导致的分段序号异常,
  155. # 将经过data_split_by_status分段后的数据,每个段内两行数据的时间跳变如果超过阈值,则继续分为两段
  156. --------------输入参数-------------:
  157. dfin: 调用data_split_by_status之后的函数
  158. default_time_threshold: 默认时间阈值,如果状态内部时间跳变大于该值,则划分为两段
  159. drive_time_threshold: 行车时间阈值,如果行车状态内部时间跳变大于该值,则划分为两段
  160. charge_time_threshold: 充电时间阈值,如果充电状态内部时间跳变大于该值,则划分为两段
  161. stand_time_threshold:静置时间阈值,如果静置状态内部时间跳变大于该值,则划分为两段
  162. --------------输出-----------------:
  163. 在输入数据后面,增加data_split_by_status_time 一列
  164. data_split_by_status_time: 按照状态和时间分段后的序号
  165. '''
  166. data_id = []
  167. temp = 1
  168. data_number_list = sorted(list(set(dfin['data_split_by_status'])))
  169. for data_number in data_number_list:
  170. # if data_number == 1203:
  171. # pdb.set_trace()
  172. status = list(dfin[dfin['data_split_by_status']==data_number]['data_status'])[0]
  173. cur_indexes = dfin[dfin['data_split_by_status']==data_number].index
  174. time_array = np.array(dfin[dfin['data_split_by_status']==data_number]['时间戳'])
  175. time_diff = np.diff(time_array)
  176. time_diff = time_diff.astype(np.int64)
  177. time_interval = default_time_threshold
  178. if status == 'drive':
  179. time_interval = drive_time_threshold
  180. elif status == 'charge':
  181. time_interval = charge_time_threshold
  182. elif status == 'stand':
  183. time_interval = stand_time_threshold
  184. time_diff_index = (np.argwhere(((time_diff/1e9) > time_interval)==True))[:,0]
  185. time_diff_origin_index = cur_indexes[time_diff_index]+1
  186. if len(time_diff_index) == 0:
  187. data_id.extend([temp] * len(cur_indexes))
  188. temp += 1
  189. else:
  190. last_index = cur_indexes[0]
  191. for index, cur_index in enumerate(time_diff_origin_index):
  192. if index == len(time_diff_origin_index)-1: # 如果是最后一个index,则
  193. data_id.extend([temp]* (cur_index-last_index))
  194. last_index = cur_index
  195. temp += 1
  196. data_id.extend([temp]* (cur_indexes[-1]-last_index+1))
  197. else:
  198. data_id.extend([temp]* (cur_index-last_index))
  199. last_index = cur_index
  200. temp += 1
  201. dfin['data_split_by_status_time'] = data_id
  202. return dfin
  203. def combine_drive_stand(self, dfin):
  204. '''
  205. 合并放电和静置段:将两次充电之间的所有数据段合并为一段, 状态分为 charge 和not charge
  206. ---------------输入----------
  207. dfin: 调用data_split_by_status()后输出的bms数据
  208. ---------------输出----------
  209. 在输入数据后面,增加data_split_by_status_after_combine, data_status_after_combine 两列
  210. data_split_by_status_after_combine: 将两次充电间的数据合并后的段序号
  211. data_status_after_combine: 每段数据的状态标识
  212. '''
  213. df = dfin.copy()
  214. data_split_by_status_1 = []
  215. data_status_1 = []
  216. number = 1
  217. first_flag = True
  218. data_number_list = sorted(list(set(df['data_split_by_status_time'])))
  219. for data_number in data_number_list:
  220. status = list(df[df['data_split_by_status_time']==data_number]['data_status'])
  221. cur_status = status[0]
  222. if first_flag:
  223. first_flag = False
  224. elif (last_status not in ['charge'] and cur_status in ['charge']) or (last_status in ['charge'] and cur_status not in ['charge']):
  225. number += 1
  226. data_split_by_status_1.extend([number]*len(status))
  227. if cur_status in ['charge']:
  228. data_status_1.extend(['charge']*len(status))
  229. else:
  230. data_status_1.extend(['not charge']*len(status))
  231. last_status = cur_status
  232. df['data_split_by_status_after_combine'] = data_split_by_status_1
  233. df['data_status_after_combine'] = data_status_1
  234. return df
  235. def cal_stand_time(self, dfin):
  236. '''
  237. # 计算静置时间
  238. # 将每次行车或充电的前后静置时间,赋值给stand_time 列, 单位为分钟
  239. ----------------输入参数---------
  240. dfin: 调用data_split_by_status()后输出的bms数据
  241. ----------------输出参数----------
  242. 在输入数据后面,增加stand_time列
  243. stand_time : 在行车段或充电段的起止两个位置处,表明开始前和结束后的静置时长,单位为分钟
  244. '''
  245. df = dfin.copy()
  246. stand_time = []
  247. first_flag = True
  248. data_number_list = sorted(list(set(df['data_split_by_status_time'])))
  249. for index, data_number in enumerate(data_number_list):
  250. status = list(df[df['data_split_by_status_time']==data_number]['data_status'])
  251. time = list(df[df['data_split_by_status_time']==data_number]['时间戳'])
  252. cur_status = status[0]
  253. cur_delta_time = (time[-1]-time[0]).total_seconds() / 60.0 # 分钟
  254. if len(status) >= 2:
  255. if first_flag:
  256. first_flag = False
  257. if index < len(data_number_list)-1:
  258. if cur_status in ['charge', 'drive']:
  259. next_status = list(df[df['data_split_by_status_time']==data_number_list[index+1]]['data_status'])[0]
  260. stand_time.extend([None]*(len(status)-1))
  261. if next_status == 'stand':
  262. next_time = list(df[df['data_split_by_status_time']==data_number_list[index+1]]['时间戳'])
  263. stand_time.extend([(next_time[-1]-next_time[0]).total_seconds() / 60.0])
  264. else:
  265. stand_time.extend([0])
  266. else:
  267. stand_time.extend([None]*len(status))
  268. else:
  269. stand_time.extend([None]*len(status))
  270. else:
  271. if cur_status in ['charge', 'drive']:
  272. if last_status == 'stand':
  273. stand_time.extend([last_delta_time])
  274. else:
  275. stand_time.extend([0])
  276. stand_time.extend([None]*(len(status)-2))
  277. if index < len(data_number_list)-1:
  278. next_status = list(df[df['data_split_by_status_time']==data_number_list[index+1]]['data_status'])[0]
  279. if next_status == 'stand':
  280. next_time = list(df[df['data_split_by_status_time']==data_number_list[index+1]]['时间戳'])
  281. stand_time.extend([(next_time[-1]-next_time[0]).total_seconds() / 60.0])
  282. else:
  283. stand_time.extend([0])
  284. else:
  285. stand_time.extend([None])
  286. else:
  287. stand_time.extend([None]*len(status))
  288. else:
  289. stand_time.extend([None])
  290. last_status = cur_status
  291. last_delta_time = cur_delta_time
  292. df['stand_time'] = stand_time
  293. return df
  294. # 输入GPS数据,返回本段数据的累积里程,及平均时速(如果两点之间)
  295. @jit
  296. def _cal_odo_speed(self, lat_list, long_list, time_list):
  297. '''
  298. 输入:经度列表, 纬度列表, 时间列表;
  299. 输出:每两个经纬度坐标之间的距离,以及速度 的数组
  300. '''
  301. dis_array = []
  302. speed_array = []
  303. for i in range(len(lat_list)-1):
  304. dis = self.tools.cal_distance(lat_list[i],long_list[i], lat_list[i+1],long_list[i+1])
  305. dis_array.append(dis)
  306. deltaT = abs(time_list[i] - time_list[i+1]).total_seconds()
  307. speed_array.append(dis * 3600.0/deltaT)
  308. return np.array(dis_array), np.array(speed_array)
  309. def gps_data_judge(self, df_bms, df_gps, time_diff_thre=300, odo_sum_thre=200, drive_spd_thre=80, parking_spd_thre=2):
  310. '''
  311. GPS数据可靠性判断函数(基于combine前的分段)
  312. GPS数据出现以下情况时,判定为不可靠:
  313. 1)如果该段对应的地理位置数据 少于2 个,则认为不可靠
  314. 2)如果截取的GPS数据的起止时间,与BMS数据段的起止时间相差超过阈值,则认为不可靠
  315. 3)如果行车段 累积里程超过阈值,车速超过阈值
  316. 4) 如果非行车段 车速超过阈值
  317. --------------输入参数--------------:
  318. time_diff_thre: 时间差阈值
  319. odo_sum_thre: 累积里程阈值
  320. drive_spd_thre: 行车车速阈值
  321. parking_spd_thre: 非行车状态车速阈值
  322. --------------输出参数--------------:
  323. df_bms 增加一列gps_rely, 表明对应的GPS数据是否可靠。
  324. 1:可靠
  325. <0: 表示不可靠的原因
  326. df_gps 增加两列odo, speed, 分别表示前后两点间的距离和速度
  327. '''
  328. df_gps['时间戳'] = pd.to_datetime(df_gps['时间戳'])
  329. res_record = {'drive':0, 'charge':0, 'stand':0, 'none':0, 'total':0}
  330. rely_list = []
  331. df_gps['odo'] = [None] * len(df_gps)
  332. df_gps['speed'] = [None] * len(df_gps)
  333. data_number_list = sorted(list(set(df_bms['data_split_by_status_time'])))
  334. for data_number in data_number_list[:]:
  335. df_sel = df_bms[df_bms['data_split_by_status_time'] == data_number]
  336. df_sel = df_sel.reset_index(drop=True)
  337. df_sel_gps = df_gps[(df_gps['时间戳']>=df_sel.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel.loc[len(df_sel)-1,'时间戳'])]
  338. origin_index = list(df_sel_gps.index)
  339. df_sel_gps = df_sel_gps.reset_index(drop=True)
  340. # 如果当前段数据对应的地理位置数据少于2个
  341. if len(df_sel_gps) <= 1:
  342. rely_list.extend([-1]*len(df_sel))
  343. res_record[str(df_sel.loc[0, 'data_status'])] = res_record[str(df_sel.loc[0, 'data_status'])] + 1
  344. continue
  345. # 如果GPS 起止时间段和BMS数据相差超过阈值
  346. if abs(df_sel_gps.loc[0, '时间戳'] - df_sel.loc[0,'时间戳']).total_seconds() > time_diff_thre or \
  347. abs(df_sel_gps.loc[len(df_sel_gps)-1, '时间戳'] - df_sel.loc[len(df_sel)-1,'时间戳']).total_seconds() > time_diff_thre:
  348. rely_list.extend([-2]*len(df_sel))
  349. res_record[str(df_sel.loc[0, 'data_status'])] = res_record[str(df_sel.loc[0, 'data_status'])] + 1
  350. continue
  351. # 计算该段数据每两点之间的里程以及速度
  352. dis_array, speed_array = self._cal_odo_speed(df_sel_gps['纬度'], df_sel_gps['经度'], df_sel_gps['时间戳'])
  353. # 如果 累积里程异常 或 平均车速异常 或两点间车速异常
  354. avg_speed = np.sum(dis_array) *3600.0 / abs(df_sel_gps.loc[0, '时间戳'] - df_sel_gps.loc[len(df_sel_gps)-1, '时间戳']).total_seconds()
  355. if np.sum(dis_array) > odo_sum_thre or avg_speed > drive_spd_thre or (speed_array > drive_spd_thre).any():
  356. rely_list.extend([-3]*len(df_sel))
  357. res_record[str(df_sel.loc[0, 'data_status'])] = res_record[str(df_sel.loc[0, 'data_status'])] + 1
  358. continue
  359. # 如果停车,且 平均时速超过阈值,则不可靠
  360. if (str(df_sel.loc[0, 'data_status']) == 'charge' or str(df_sel.loc[0, 'data_status']) == 'stand') and avg_speed > parking_spd_thre :
  361. rely_list.extend([-4]*len(df_sel))
  362. res_record[str(df_sel.loc[0, 'data_status'])] = res_record[str(df_sel.loc[0, 'data_status'])] + 1
  363. continue
  364. # 剩下的记录为可靠
  365. rely_list.extend([1]*len(df_sel))
  366. df_gps.loc[origin_index[1:], 'odo'] = dis_array
  367. df_gps.loc[origin_index[1:], 'speed'] = speed_array
  368. df_bms['gps_rely'] = rely_list
  369. res_record['total'] = (res_record['drive'] + res_record['charge'] + res_record['stand'] + res_record['none'] )/df_bms['data_split_by_status_time'].max()
  370. if len(set(df_bms[df_bms['data_status']=='drive']['data_split_by_status_time'])) > 0:
  371. res_record['drive'] = (res_record['drive'])/len(set(df_bms[df_bms['data_status']=='drive']['data_split_by_status_time']))
  372. if len(set(df_bms[df_bms['data_status']=='charge']['data_split_by_status_time'])) > 0:
  373. res_record['charge'] = (res_record['charge'])/len(set(df_bms[df_bms['data_status']=='charge']['data_split_by_status_time']))
  374. if len(set(df_bms[df_bms['data_status']=='stand']['data_split_by_status_time'])) > 0:
  375. res_record['stand'] = (res_record['stand'])/len(set(df_bms[df_bms['data_status']=='stand']['data_split_by_status_time']))
  376. if len(set(df_bms[df_bms['data_status']=='none']['data_split_by_status_time'])) > 0:
  377. res_record['none'] = (res_record['none'])/len(set(df_bms[df_bms['data_status']=='none']['data_split_by_status_time']))
  378. return df_bms, df_gps, res_record
  379. def data_gps_judge_after_combine(self, df_bms, df_gps, time_diff_thre=600, odo_sum_thre=200, drive_spd_thre=80, parking_spd_thre=2):
  380. '''
  381. GPS数据可靠性判断函数2 (基于combine后的分段) 判别方式同data_gps_judge
  382. '''
  383. df_gps['时间戳'] = pd.to_datetime(df_gps['时间戳'])
  384. res_record = {'not charge':0, 'charge':0, 'total':0} # 不可靠的比例
  385. rely_list = []
  386. df_gps['odo_after_combine'] = [None] * len(df_gps)
  387. df_gps['speed_after_combine'] = [None] * len(df_gps)
  388. data_number_list = sorted(list(set(df_bms['data_split_by_status_after_combine'])))
  389. for data_number in data_number_list[:]:
  390. df_sel = df_bms[df_bms['data_split_by_status_after_combine'] == data_number]
  391. df_sel = df_sel.reset_index(drop=True)
  392. # 尝试采用drive段的开始和结束时间选择GPS数据,因为stand时GPS数据可能存在丢失,影响里程的计算
  393. df_sel_drive = df_sel[df_sel['data_status']=='drive'] #
  394. df_sel_drive = df_sel_drive.reset_index(drop=True)
  395. if df_sel_drive.empty:
  396. df_sel_1 = df_sel
  397. else:
  398. df_sel_1 = df_sel_drive
  399. df_sel_gps = df_gps[(df_gps['时间戳']>=df_sel_1.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_1.loc[len(df_sel_1)-1,'时间戳'])]
  400. origin_index = list(df_sel_gps.index)
  401. df_sel_gps = df_sel_gps.reset_index(drop=True)
  402. # 如果当前段数据对应的地理位置数据少于2个
  403. if len(df_sel_gps) <= 1:
  404. rely_list.extend([-1]*len(df_sel))
  405. res_record[str(df_sel.loc[0, 'data_status_after_combine'])] = res_record[str(df_sel.loc[0, 'data_status_after_combine'])] + 1
  406. continue
  407. # 如果GPS 起止时间段和BMS数据相差超过阈值
  408. if abs(df_sel_gps.loc[0, '时间戳'] - df_sel_1.loc[0,'时间戳']).total_seconds() > time_diff_thre or \
  409. abs(df_sel_gps.loc[len(df_sel_gps)-1, '时间戳'] - df_sel_1.loc[len(df_sel_1)-1,'时间戳']).total_seconds() > time_diff_thre:
  410. rely_list.extend([-2]*len(df_sel))
  411. res_record[str(df_sel.loc[0, 'data_status_after_combine'])] = res_record[str(df_sel.loc[0, 'data_status_after_combine'])] + 1
  412. continue
  413. # 计算该段数据每两点之间的里程以及速度
  414. dis_array, speed_array = self._cal_odo_speed(df_sel_gps['纬度'], df_sel_gps['经度'], df_sel_gps['时间戳'])
  415. # 如果 累积里程异常 或 平均车速异常 或两点间车速异常
  416. avg_speed = np.sum(dis_array) *3600.0 / abs(df_sel_gps.loc[0, '时间戳'] - df_sel_gps.loc[len(df_sel_gps)-1, '时间戳']).total_seconds()
  417. if np.sum(dis_array) > odo_sum_thre or avg_speed > drive_spd_thre or (speed_array > drive_spd_thre).any():
  418. rely_list.extend([-3]*len(df_sel))
  419. res_record[str(df_sel.loc[0, 'data_status_after_combine'])] = res_record[str(df_sel.loc[0, 'data_status_after_combine'])] + 1
  420. continue
  421. # 如果充电,且 平均时速超过阈值,则不可靠
  422. if str(df_sel.loc[0, 'data_status_after_combine']) == 'charge' and avg_speed > parking_spd_thre:
  423. rely_list.extend([-4]*len(df_sel))
  424. res_record[str(df_sel.loc[0, 'data_status_after_combine'])] = res_record[str(df_sel.loc[0, 'data_status_after_combine'])] + 1
  425. continue
  426. # 剩下的记录为可靠
  427. rely_list.extend([1]*len(df_sel))
  428. df_gps.loc[origin_index[1:], 'odo_after_combine'] = dis_array
  429. df_gps.loc[origin_index[1:], 'speed_after_combine'] = speed_array
  430. df_bms['gps_rely_after_combine'] = rely_list
  431. res_record['total'] = (res_record['not charge'] + res_record['charge'])/df_bms['data_split_by_status_after_combine'].max()
  432. if len(set(df_bms[df_bms['data_status_after_combine']=='not charge']['data_split_by_status_after_combine'])) > 0:
  433. res_record['not charge'] = (res_record['not charge'])/len(set(df_bms[df_bms['data_status_after_combine']=='not charge']['data_split_by_status_after_combine']))
  434. if len(set(df_bms[df_bms['data_status_after_combine']=='charge']['data_split_by_status_after_combine'])) > 0 :
  435. res_record['charge'] = (res_record['charge'])/len(set(df_bms[df_bms['data_status_after_combine']=='charge']['data_split_by_status_after_combine']))
  436. return df_bms, df_gps, res_record