123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199 |
- import datetime
- import tools.tools
- import pdb
- import pandas as pd
- import numpy as np
- from sklearn.cluster import DBSCAN
- # 数据分析
- # 按月统计 停车(静置+充电)发送数据最长的地点
- def sta_stop_position(df_bms, df_gps, days=30):
- df_res = pd.DataFrame(columns=['time', 'lat', 'long', 'max_duration'])
- # 从静置+充电数据段,且GPS可靠的数据中进行统计
- start_time = df_bms.loc[0, '时间戳']
- timeDelta = datetime.timedelta(days=days)
- end_time = start_time + timeDelta
- while end_time < df_bms.loc[len(df_bms)-1, '时间戳']:
- df_res_temp = pd.DataFrame(columns=['time', 'lat', 'long', 'duration'])
- df_sel = df_bms[(df_bms['时间戳']>start_time) & (df_bms['时间戳']<=end_time)]
- data_number_list = sorted(list(set(df_sel[(df_sel['data_status'].isin(['charge', 'stand'])) & (df_sel['gps_rely']==1)
- ]['data_split_by_status'])))
- # 每段数据的经纬度求均值后记录下来
- for data_number in data_number_list[:]:
- df_sel_bms = df_sel[df_sel['data_split_by_status'] == data_number]
- df_sel_bms = df_sel_bms.reset_index(drop=True)
- df_sel_gps = df_gps[(df_gps['时间戳']>df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
- df_sel_gps = df_sel_gps.reset_index(drop=True)
- deltaT = abs(df_sel_gps.loc[0,'时间戳'] - df_sel_gps.loc[len(df_sel_gps)-1,'时间戳']).total_seconds()
- df_res_temp = df_res_temp.append({'time': df_sel_gps.loc[0,'时间戳'],
- 'lat':np.mean(df_sel_gps['纬度']),
- 'long':np.mean(df_sel_gps['经度']),
- 'duration':deltaT}, ignore_index=True)
- # 利用聚类算法,将靠近的停车地点合并
- # 计算每次停车的GPS地点之间的距离
- count = len(df_res_temp)
- dis_mat_half = np.full((count,count), 0.0)
- for i in range(count):
- for j in range(i,count):
- dis_mat_half[i][j] = tools.tools.cal_distance(df_res_temp.loc[i,'lat'],df_res_temp.loc[i,'long'],
- df_res_temp.loc[j,'lat'],df_res_temp.loc[j,'long'])
- dis_mat=np.array(dis_mat_half)+np.transpose(dis_mat_half)
- # 执行聚类算法,聚类参数:距离50,类内最少样本数10
- dbscan = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(dis_mat)
- # 将对应的类内的GPS合并
- class_label = list(sorted(set(dbscan.labels_)))
- if -1 in class_label:
- class_label.remove(-1)
- for label in class_label:
- index = sorted(np.where(dbscan.labels_ == label))[0]
- min_index = index[0]
- gps_lat = df_res_temp.loc[min_index,'lat']
- gps_long = df_res_temp.loc[min_index,'long']
- temp_duration = df_res_temp.loc[min_index,'duration']
- for i in index[1:]:
- gps_lat = gps_lat + df_res_temp.loc[i, 'lat']
- gps_long = gps_long + df_res_temp.loc[i, 'long']
- temp_duration = temp_duration + df_res_temp.loc[i, 'duration']
- df_res_temp.drop(index=i, inplace=True)
- df_res_temp.loc[min_index, 'lat'] = gps_lat/len(index)
- df_res_temp.loc[min_index, 'long'] = gps_long/len(index)
- df_res_temp.loc[min_index, 'duration'] = temp_duration
- df_res = df_res.append({'time': start_time[0:10],
- 'lat':np.mean(df_sel_gps['纬度']),
- 'long':np.mean(df_sel_gps['经度']),
- 'max_duration':df_res_temp['duration'].max()/3600.0}, ignore_index=True)
- start_time = end_time
- end_time = end_time + timeDelta
- return df_res
- # 统计单位时间内的累积行车时长、soc累积使用量以及累积行驶里程(若GPS可信)
- # 计算单位时间内行车时长占比, 单位时间内行车soc平均变化量,单位时间内平均里程数。
- # 输入
- # time_window: 统计时间长度
- # step: 时间窗口滑动步进值
- def sta_one_drive_cycle(df_bms, df_gps, prepro_record, time_window=3600, step=3600, start_time="00:00:00"):
- st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
- et = st + datetime.timedelta(seconds=time_window)
- time_list = []
- driveT_list = []
- driveSoc_list = []
- driveOdo_list = []
- driveOdoRevise_list = []
- while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
- df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
- df_t = df_t.reset_index(drop=True)
- driveT = 0
- driveSoc = 0
- driveOdo = 0
- driveOdoRevise = 0
- if not df_t.empty:
- deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
- df_drive = df_t[df_t['data_status']=='drive']
- df_drive = df_drive.reset_index(drop=True)
- data_number_list = sorted(list(set(df_drive['data_split_by_status'])))
- for data_number in data_number_list[:]:
- df_d = df_drive[df_drive['data_split_by_status'] == data_number]
- df_d = df_d.reset_index(drop=True)
- driveT = driveT + (df_d.loc[len(df_d)-1, '时间戳'] - df_d.loc[0, '时间戳']).total_seconds()
- driveSoc = driveSoc + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]'])
- if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
- df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
- df_sel_gps = df_sel_gps.reset_index(drop=True)
- if len(df_sel_gps) > 0:
- driveOdo = driveOdo + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
-
- else:
- driveOdo = None
- else:
- driveOdo = None
- time_list.append(st)
- driveT_list.append(driveT)
- driveSoc_list.append(driveSoc)
- driveOdo_list.append(driveOdo)
- st = st + datetime.timedelta(seconds=step)
- et = st + datetime.timedelta(seconds=time_window)
- if prepro_record['drive']<0.8 and sum(driveSoc_list) > 0:
- # 计算能耗
- sum_odo = 0
- sum_soc = 0
- for i,odo in enumerate(driveOdo_list):
- if odo !=0 and not pd.isnull(odo):
- sum_odo += odo
- sum_soc += driveSoc_list[i]
- ene_consump = sum_odo/sum_soc
- st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
- et = st + datetime.timedelta(seconds=time_window)
- driveOdoRevise_list = []
- while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
- df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
- df_t = df_t.reset_index(drop=True)
- driveOdoRevise = 0
- if not df_t.empty:
- deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
- df_drive = df_t[df_t['data_status']=='drive']
- df_drive = df_drive.reset_index(drop=True)
- data_number_list = sorted(list(set(df_drive['data_split_by_status'])))
- for data_number in data_number_list[:]:
- df_d = df_drive[df_drive['data_split_by_status'] == data_number]
- df_d = df_d.reset_index(drop=True)
- if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
- df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
- df_sel_gps = df_sel_gps.reset_index(drop=True)
- if len(df_sel_gps) > 0:
- driveOdoRevise = driveOdoRevise + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
- else:
- driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
- else:
- driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
- driveOdoRevise_list.append(driveOdoRevise)
- st = st + datetime.timedelta(seconds=step)
- et = st + datetime.timedelta(seconds=time_window)
- else:
- driveOdoRevise_list = [None] * len(driveSoc_list)
- df_res = pd.DataFrame({'time':time_list, 'driveT':driveT_list, 'driveSoc':driveSoc_list, 'driveOdo':driveOdo_list, 'driveOdoRevise':driveOdoRevise_list})
- return df_res
- # 统计充电前的GPS海拔与充电时的GPS海拔差(若GPS可信)
- def sta_charge_height(df_bms, df_gps):
- data_number_list = sorted(list(set(df_bms['data_split_by_status'])))
- df_sel_bms_last = df_bms[df_bms['data_split_by_status'] == 1]
- df_sel_bms_last = df_sel_bms_last.reset_index(drop=True)
- time_list = []
- last_height_list = []
- height_list = []
- last_status_list = []
- for data_number in data_number_list[1:]:
- df_sel_bms = df_bms[df_bms['data_split_by_status'] == data_number]
- df_sel_bms = df_sel_bms.reset_index(drop=True)
- if df_sel_bms_last.loc[0, 'data_status'] != 'charge' and df_sel_bms.loc[0, 'data_status'] == 'charge' and\
- df_sel_bms_last.loc[0, 'gps_rely'] == 1 and df_sel_bms.loc[0, 'gps_rely'] == 1:
- df_sel_gps_last = df_gps[(df_gps['时间戳']>=df_sel_bms_last.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms_last.loc[len(df_sel_bms_last)-1,'时间戳'])]
- df_sel_gps_last = df_sel_gps_last.reset_index(drop=True)
- df_sel_gps = df_gps[(df_gps['时间戳']>=df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
- df_sel_gps = df_sel_gps.reset_index(drop=True)
- if (df_sel_bms_last.loc[0, 'data_status'] == 'stand'):
- last_height = df_sel_gps_last['海拔m'].mean()
- else:
- last_height = df_sel_gps_last.loc[len(df_sel_gps_last)-1, '海拔m']
- cur_height = df_sel_gps['海拔m'].mean()
- time_list.append(df_sel_bms.loc[0, '时间戳'])
- last_height_list.append(last_height)
- height_list.append(cur_height)
- last_status_list.append(df_sel_bms_last.loc[0, 'data_status'])
- df_sel_bms_last = df_sel_bms.copy()
- df_res = pd.DataFrame({'time':time_list, 'last_status':last_status_list, 'last_height':last_height_list, 'cur_height':height_list, 'diff':np.array(height_list)-np.array(last_height_list)})
- return df_res
|