123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import pandas as pd
- import numpy as np
- from datetime import datetime
- from datetime import timedelta
- from ProcessDfBms import *
- from math import radians, cos, sin, asin, sqrt
- def cal_unrecorded_gps(df_in,df_bms):
- '''筛选出现gps时间断点的数据,用df_bms数据补齐,df_in为df_gps表格。'''
- #未记录到的odo总和
- accum_unrecorded_odo=0
- #设置丢失的判断条件,获得信息丢失行的index
- condition1=df_in['deltatime']>60*3#时间间隔大于3分钟。说明数据掉线了。
- condition2=(df_in['deltatime']>90*1)&(df_in['distance']>1000)#时间间隔大于*分钟,且Distance间隔大于*,代表掉线了。
- signal_start_list=df_in.loc[condition1|condition2,:].index.to_list()#信息丢失行
- #如果第0行属于信息丢失行,则删除,因为需要index-1行
- try:
- signal_start_list.remove(0)
- except:
- pass
- else:
- pass
- #筛选出所有GPS信号丢失,对应的开始时间-结束时间对。
- if len(signal_start_list)>0:
- signal_end_list=[num-1 for num in signal_start_list]#信息丢失行的前一行,此处可能如果是首行,可能会有bug。
- pick_gps_list=[0]+signal_start_list+signal_end_list+[len(df_in)-1]#首行+尾行+信号开始行+信号结束行
- pick_gps_list=sorted(pick_gps_list)#重新排序
- #有出现信号断点的行,则进行以下计算。
- if len(signal_start_list)>0:
- #针对每个时间对,计算unrecorded odo
- for start_time_index,end_time_index in zip(signal_start_list,signal_end_list):
- last_end_time=df_in.loc[end_time_index,'time']
- this_start_time=df_in.loc[start_time_index,'time']
- #print('gps signal loss from: '+str(last_end_time)+'-to-'+str(this_start_time))
- #使用cal_delatasoc计算预估里程
- unrecorded_odo=cal_deltasoc(df_bms,last_end_time,this_start_time)
- accum_unrecorded_odo+=unrecorded_odo
- #print('accum_unrecorded_odo:'+str(accum_unrecorded_odo))
- else:
- pass
-
- return accum_unrecorded_odo
- def df_add_avgspeed(df_in):
- '''Add a columns:avgspeed ,input df must have deltatime,distance column.'''
- for i in range(len(df_in)):
- #首行默认为0
- if i==0:
- df_in.loc[i,'avgspeed']=0
- #从第二行开始,计算平均速度
- else:
- deltatime=df_in.loc[i,'deltatime']
- distance=df_in.loc[i,'distance']
- avgspeed=(distance/1000)/(deltatime/3600)
- df_in.loc[i,'avgspeed']=avgspeed
- return df_in
- def read_df_gps(path):
- df_gps=pd.read_csv(path, encoding='gbk')#编码方式gbk
- #重置表头
- df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng",
- "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True)
- #时间格式调整
- df_gps['time']=pd.to_datetime(df_gps['time'])
- #对gps进行清洗
- df_gps=df_add_distance(df_gps)#增加distance列
- condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值
- df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。
- df_gps=df_gps.reset_index(drop=True)#重置index
- #进行预处理
- df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance
- df_gps=df_add_deltatime(df_gps)#增加deltatime列
- df_gps=df_add_avgspeed(df_gps)#增加avgspeed列
- #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1')
- return df_gps
- def preprocess_Df_Gps(df_gps):
- '''对Df_Gps进行预处理'''
- #重置表头
- df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng",
- "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True)
- #删除含有空数据的行
- df_gps=df_gps.dropna(subset=['time','lat','lng'])
- #删除时间重复的行,保留第一次出现的行
- df_gps=df_gps.drop_duplicates(subset=['time'],keep='first')
- #时间格式调整
- df_gps['time']=pd.to_datetime(df_gps['time'])
-
- #对gps进行清洗
- df_gps=df_add_distance(df_gps)#增加distance列
- condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值
- df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。
- df_gps=df_gps.reset_index(drop=True)#重置index
- #进行预处理
- df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance
- df_gps=df_add_deltatime(df_gps)#增加deltatime列
- df_gps=df_gps.loc[df_gps['deltatime']>0.01,:].copy()#删除deltatime=0的列,两个时间戳相同,无法求速度。
- df_gps=df_add_avgspeed(df_gps)#增加avgspeed列
- #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1')
- return df_gps
- def df_add_distance(df_in):
- '''Add a columns:distance,input df must have lng,lat columns.'''
- for i in range(len(df_in)):
- #首行默认为0
- if i==0:
- df_in.loc[i,'distance']=0
- #从第二行开始,计算i行到i-1行,GPS距离之差
- else:
- lon1=df_in.loc[i-1,'lng']
- lat1=df_in.loc[i-1,'lat']
- lon2=df_in.loc[i,'lng']
- lat2=df_in.loc[i,'lat']
- distance=haversine(lon1,lat1,lon2,lat2)#haversine公式计算距离差
- df_in.loc[i,'distance']=distance
- return df_in
- def haversine(lon1, lat1, lon2, lat2):
- """
- Calculate the great circle distance between two points
- on the earth (specified in decimal degrees)
- """
- # 将十进制度数转化为弧度
- lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
- # haversine公式
- dlon = lon2 - lon1
- dlat = lat2 - lat1
- a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
- c = 2 * asin(sqrt(a))
- r = 6371 # 地球平均半径,单位为公里
- return c * r * 1000
|