import pandas as pd import numpy as np from datetime import datetime from datetime import timedelta from ProcessDfBms import * from math import radians, cos, sin, asin, sqrt def cal_unrecorded_gps(df_in,df_bms): '''筛选出现gps时间断点的数据,用df_bms数据补齐,df_in为df_gps表格。''' #未记录到的odo总和 accum_unrecorded_odo=0 #设置丢失的判断条件,获得信息丢失行的index condition1=df_in['deltatime']>60*3#时间间隔大于3分钟。说明数据掉线了。 condition2=(df_in['deltatime']>90*1)&(df_in['distance']>1000)#时间间隔大于*分钟,且Distance间隔大于*,代表掉线了。 signal_start_list=df_in.loc[condition1|condition2,:].index.to_list()#信息丢失行 #如果第0行属于信息丢失行,则删除,因为需要index-1行 try: signal_start_list.remove(0) except: pass else: pass #筛选出所有GPS信号丢失,对应的开始时间-结束时间对。 if len(signal_start_list)>0: signal_end_list=[num-1 for num in signal_start_list]#信息丢失行的前一行,此处可能如果是首行,可能会有bug。 pick_gps_list=[0]+signal_start_list+signal_end_list+[len(df_in)-1]#首行+尾行+信号开始行+信号结束行 pick_gps_list=sorted(pick_gps_list)#重新排序 #有出现信号断点的行,则进行以下计算。 if len(signal_start_list)>0: #针对每个时间对,计算unrecorded odo for start_time_index,end_time_index in zip(signal_start_list,signal_end_list): last_end_time=df_in.loc[end_time_index,'time'] this_start_time=df_in.loc[start_time_index,'time'] #print('gps signal loss from: '+str(last_end_time)+'-to-'+str(this_start_time)) #使用cal_delatasoc计算预估里程 unrecorded_odo=cal_deltasoc(df_bms,last_end_time,this_start_time) accum_unrecorded_odo+=unrecorded_odo #print('accum_unrecorded_odo:'+str(accum_unrecorded_odo)) else: pass return accum_unrecorded_odo def df_add_avgspeed(df_in): '''Add a columns:avgspeed ,input df must have deltatime,distance column.''' for i in range(len(df_in)): #首行默认为0 if i==0: df_in.loc[i,'avgspeed']=0 #从第二行开始,计算平均速度 else: deltatime=df_in.loc[i,'deltatime'] distance=df_in.loc[i,'distance'] avgspeed=(distance/1000)/(deltatime/3600) df_in.loc[i,'avgspeed']=avgspeed return df_in def read_df_gps(path): df_gps=pd.read_csv(path, encoding='gbk')#编码方式gbk #重置表头 df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng", "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True) #时间格式调整 df_gps['time']=pd.to_datetime(df_gps['time']) #对gps进行清洗 df_gps=df_add_distance(df_gps)#增加distance列 condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值 df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。 df_gps=df_gps.reset_index(drop=True)#重置index #进行预处理 df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance df_gps=df_add_deltatime(df_gps)#增加deltatime列 df_gps=df_add_avgspeed(df_gps)#增加avgspeed列 #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1') return df_gps def preprocess_Df_Gps(df_gps): '''对Df_Gps进行预处理''' #重置表头 df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng", "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True) #删除含有空数据的行 df_gps=df_gps.dropna(subset=['time','lat','lng']) #删除时间重复的行,保留第一次出现的行 df_gps=df_gps.drop_duplicates(subset=['time'],keep='first') #时间格式调整 df_gps['time']=pd.to_datetime(df_gps['time']) #对gps进行清洗 df_gps=df_add_distance(df_gps)#增加distance列 condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值 df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。 df_gps=df_gps.reset_index(drop=True)#重置index #进行预处理 df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance df_gps=df_add_deltatime(df_gps)#增加deltatime列 df_gps=df_gps.loc[df_gps['deltatime']>0.01,:].copy()#删除deltatime=0的列,两个时间戳相同,无法求速度。 df_gps=df_add_avgspeed(df_gps)#增加avgspeed列 #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1') return df_gps def df_add_distance(df_in): '''Add a columns:distance,input df must have lng,lat columns.''' for i in range(len(df_in)): #首行默认为0 if i==0: df_in.loc[i,'distance']=0 #从第二行开始,计算i行到i-1行,GPS距离之差 else: lon1=df_in.loc[i-1,'lng'] lat1=df_in.loc[i-1,'lat'] lon2=df_in.loc[i,'lng'] lat2=df_in.loc[i,'lat'] distance=haversine(lon1,lat1,lon2,lat2)#haversine公式计算距离差 df_in.loc[i,'distance']=distance return df_in def haversine(lon1, lat1, lon2, lat2): """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) """ # 将十进制度数转化为弧度 lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) # haversine公式 dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 c = 2 * asin(sqrt(a)) r = 6371 # 地球平均半径,单位为公里 return c * r * 1000