ana.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. import datetime
  2. import tools.tools
  3. import pdb
  4. import pandas as pd
  5. import numpy as np
  6. from sklearn.cluster import DBSCAN
  7. # 数据分析
  8. # 按月统计 停车(静置+充电)发送数据最长的地点
  9. def sta_stop_position(df_bms, df_gps, days=30):
  10. df_res = pd.DataFrame(columns=['time', 'lat', 'long', 'max_duration'])
  11. # 从静置+充电数据段,且GPS可靠的数据中进行统计
  12. start_time = df_bms.loc[0, '时间戳']
  13. timeDelta = datetime.timedelta(days=days)
  14. end_time = start_time + timeDelta
  15. while end_time < df_bms.loc[len(df_bms)-1, '时间戳']:
  16. df_res_temp = pd.DataFrame(columns=['time', 'lat', 'long', 'duration'])
  17. df_sel = df_bms[(df_bms['时间戳']>start_time) & (df_bms['时间戳']<=end_time)]
  18. data_number_list = sorted(list(set(df_sel[(df_sel['data_status'].isin(['charge', 'stand'])) & (df_sel['gps_rely']==1)
  19. ]['data_split_by_status'])))
  20. # 每段数据的经纬度求均值后记录下来
  21. for data_number in data_number_list[:]:
  22. df_sel_bms = df_sel[df_sel['data_split_by_status'] == data_number]
  23. df_sel_bms = df_sel_bms.reset_index(drop=True)
  24. df_sel_gps = df_gps[(df_gps['时间戳']>df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
  25. df_sel_gps = df_sel_gps.reset_index(drop=True)
  26. deltaT = abs(df_sel_gps.loc[0,'时间戳'] - df_sel_gps.loc[len(df_sel_gps)-1,'时间戳']).total_seconds()
  27. df_res_temp = df_res_temp.append({'time': df_sel_gps.loc[0,'时间戳'],
  28. 'lat':np.mean(df_sel_gps['纬度']),
  29. 'long':np.mean(df_sel_gps['经度']),
  30. 'duration':deltaT}, ignore_index=True)
  31. # 利用聚类算法,将靠近的停车地点合并
  32. # 计算每次停车的GPS地点之间的距离
  33. count = len(df_res_temp)
  34. dis_mat_half = np.full((count,count), 0.0)
  35. for i in range(count):
  36. for j in range(i,count):
  37. dis_mat_half[i][j] = tools.tools.cal_distance(df_res_temp.loc[i,'lat'],df_res_temp.loc[i,'long'],
  38. df_res_temp.loc[j,'lat'],df_res_temp.loc[j,'long'])
  39. dis_mat=np.array(dis_mat_half)+np.transpose(dis_mat_half)
  40. # 执行聚类算法,聚类参数:距离50,类内最少样本数10
  41. dbscan = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(dis_mat)
  42. # 将对应的类内的GPS合并
  43. class_label = list(sorted(set(dbscan.labels_)))
  44. if -1 in class_label:
  45. class_label.remove(-1)
  46. for label in class_label:
  47. index = sorted(np.where(dbscan.labels_ == label))[0]
  48. min_index = index[0]
  49. gps_lat = df_res_temp.loc[min_index,'lat']
  50. gps_long = df_res_temp.loc[min_index,'long']
  51. temp_duration = df_res_temp.loc[min_index,'duration']
  52. for i in index[1:]:
  53. gps_lat = gps_lat + df_res_temp.loc[i, 'lat']
  54. gps_long = gps_long + df_res_temp.loc[i, 'long']
  55. temp_duration = temp_duration + df_res_temp.loc[i, 'duration']
  56. df_res_temp.drop(index=i, inplace=True)
  57. df_res_temp.loc[min_index, 'lat'] = gps_lat/len(index)
  58. df_res_temp.loc[min_index, 'long'] = gps_long/len(index)
  59. df_res_temp.loc[min_index, 'duration'] = temp_duration
  60. df_res = df_res.append({'time': start_time[0:10],
  61. 'lat':np.mean(df_sel_gps['纬度']),
  62. 'long':np.mean(df_sel_gps['经度']),
  63. 'max_duration':df_res_temp['duration'].max()/3600.0}, ignore_index=True)
  64. start_time = end_time
  65. end_time = end_time + timeDelta
  66. return df_res
  67. # 统计单位时间内的累积行车时长、soc累积使用量以及累积行驶里程(若GPS可信)
  68. # 计算单位时间内行车时长占比, 单位时间内行车soc平均变化量,单位时间内平均里程数。
  69. # 输入
  70. # time_window: 统计时间长度
  71. # step: 时间窗口滑动步进值
  72. def sta_one_drive_cycle(df_bms, df_gps, prepro_record, time_window=3600, step=3600, start_time="00:00:00"):
  73. st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
  74. et = st + datetime.timedelta(seconds=time_window)
  75. time_list = []
  76. driveT_list = []
  77. driveSoc_list = []
  78. driveOdo_list = []
  79. driveOdoRevise_list = []
  80. while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
  81. df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
  82. df_t = df_t.reset_index(drop=True)
  83. driveT = 0
  84. driveSoc = 0
  85. driveOdo = 0
  86. driveOdoRevise = 0
  87. if not df_t.empty:
  88. deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
  89. df_drive = df_t[df_t['data_status']=='drive']
  90. df_drive = df_drive.reset_index(drop=True)
  91. data_number_list = sorted(list(set(df_drive['data_split_by_status'])))
  92. for data_number in data_number_list[:]:
  93. df_d = df_drive[df_drive['data_split_by_status'] == data_number]
  94. df_d = df_d.reset_index(drop=True)
  95. driveT = driveT + (df_d.loc[len(df_d)-1, '时间戳'] - df_d.loc[0, '时间戳']).total_seconds()
  96. driveSoc = driveSoc + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]'])
  97. if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
  98. df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
  99. df_sel_gps = df_sel_gps.reset_index(drop=True)
  100. if len(df_sel_gps) > 0:
  101. driveOdo = driveOdo + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
  102. else:
  103. driveOdo = None
  104. else:
  105. driveOdo = None
  106. time_list.append(st)
  107. driveT_list.append(driveT)
  108. driveSoc_list.append(driveSoc)
  109. driveOdo_list.append(driveOdo)
  110. st = st + datetime.timedelta(seconds=step)
  111. et = st + datetime.timedelta(seconds=time_window)
  112. if prepro_record['drive']<0.8 and sum(driveSoc_list) > 0:
  113. # 计算能耗
  114. sum_odo = 0
  115. sum_soc = 0
  116. for i,odo in enumerate(driveOdo_list):
  117. if odo !=0 and not pd.isnull(odo):
  118. sum_odo += odo
  119. sum_soc += driveSoc_list[i]
  120. ene_consump = sum_odo/sum_soc
  121. st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
  122. et = st + datetime.timedelta(seconds=time_window)
  123. driveOdoRevise_list = []
  124. while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
  125. df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
  126. df_t = df_t.reset_index(drop=True)
  127. driveOdoRevise = 0
  128. if not df_t.empty:
  129. deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
  130. df_drive = df_t[df_t['data_status']=='drive']
  131. df_drive = df_drive.reset_index(drop=True)
  132. data_number_list = sorted(list(set(df_drive['data_split_by_status'])))
  133. for data_number in data_number_list[:]:
  134. df_d = df_drive[df_drive['data_split_by_status'] == data_number]
  135. df_d = df_d.reset_index(drop=True)
  136. if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
  137. df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
  138. df_sel_gps = df_sel_gps.reset_index(drop=True)
  139. if len(df_sel_gps) > 0:
  140. driveOdoRevise = driveOdoRevise + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
  141. else:
  142. driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
  143. else:
  144. driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
  145. driveOdoRevise_list.append(driveOdoRevise)
  146. st = st + datetime.timedelta(seconds=step)
  147. et = st + datetime.timedelta(seconds=time_window)
  148. else:
  149. driveOdoRevise_list = [None] * len(driveSoc_list)
  150. df_res = pd.DataFrame({'time':time_list, 'driveT':driveT_list, 'driveSoc':driveSoc_list, 'driveOdo':driveOdo_list, 'driveOdoRevise':driveOdoRevise_list})
  151. return df_res
  152. # 统计充电前的GPS海拔与充电时的GPS海拔差(若GPS可信)
  153. def sta_charge_height(df_bms, df_gps):
  154. data_number_list = sorted(list(set(df_bms['data_split_by_status'])))
  155. df_sel_bms_last = df_bms[df_bms['data_split_by_status'] == 1]
  156. df_sel_bms_last = df_sel_bms_last.reset_index(drop=True)
  157. time_list = []
  158. last_height_list = []
  159. height_list = []
  160. last_status_list = []
  161. for data_number in data_number_list[1:]:
  162. df_sel_bms = df_bms[df_bms['data_split_by_status'] == data_number]
  163. df_sel_bms = df_sel_bms.reset_index(drop=True)
  164. if df_sel_bms_last.loc[0, 'data_status'] != 'charge' and df_sel_bms.loc[0, 'data_status'] == 'charge' and\
  165. df_sel_bms_last.loc[0, 'gps_rely'] == 1 and df_sel_bms.loc[0, 'gps_rely'] == 1:
  166. df_sel_gps_last = df_gps[(df_gps['时间戳']>=df_sel_bms_last.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms_last.loc[len(df_sel_bms_last)-1,'时间戳'])]
  167. df_sel_gps_last = df_sel_gps_last.reset_index(drop=True)
  168. df_sel_gps = df_gps[(df_gps['时间戳']>=df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
  169. df_sel_gps = df_sel_gps.reset_index(drop=True)
  170. if (df_sel_bms_last.loc[0, 'data_status'] == 'stand'):
  171. last_height = df_sel_gps_last['海拔m'].mean()
  172. else:
  173. last_height = df_sel_gps_last.loc[len(df_sel_gps_last)-1, '海拔m']
  174. cur_height = df_sel_gps['海拔m'].mean()
  175. time_list.append(df_sel_bms.loc[0, '时间戳'])
  176. last_height_list.append(last_height)
  177. height_list.append(cur_height)
  178. last_status_list.append(df_sel_bms_last.loc[0, 'data_status'])
  179. df_sel_bms_last = df_sel_bms.copy()
  180. df_res = pd.DataFrame({'time':time_list, 'last_status':last_status_list, 'last_height':last_height_list, 'cur_height':height_list, 'diff':np.array(height_list)-np.array(last_height_list)})
  181. return df_res