123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import pandas as pd
- import numpy as np
- from datetime import datetime
- from datetime import timedelta
- from ProcessDfBms import *
- from math import radians, cos, sin, asin, sqrt
- def cal_unrecorded_gps(df_in,df_bms):
- '''筛选出现gps时间断点的数据,用df_bms数据补齐,df_in为df_gps表格。'''
-
- accum_unrecorded_odo=0
-
- condition1=df_in['deltatime']>60*3
- condition2=(df_in['deltatime']>90*1)&(df_in['distance']>1000)
- signal_start_list=df_in.loc[condition1|condition2,:].index.to_list()
-
- try:
- signal_start_list.remove(0)
- except:
- pass
- else:
- pass
-
- if len(signal_start_list)>0:
- signal_end_list=[num-1 for num in signal_start_list]
- pick_gps_list=[0]+signal_start_list+signal_end_list+[len(df_in)-1]
- pick_gps_list=sorted(pick_gps_list)
-
- if len(signal_start_list)>0:
-
- for start_time_index,end_time_index in zip(signal_start_list,signal_end_list):
- last_end_time=df_in.loc[end_time_index,'time']
- this_start_time=df_in.loc[start_time_index,'time']
-
-
- unrecorded_odo=cal_deltasoc(df_bms,last_end_time,this_start_time)
- accum_unrecorded_odo+=unrecorded_odo
-
- else:
- pass
-
- return accum_unrecorded_odo
- def df_add_avgspeed(df_in):
- '''Add a columns:avgspeed ,input df must have deltatime,distance column.'''
- for i in range(len(df_in)):
-
- if i==0:
- df_in.loc[i,'avgspeed']=0
-
- else:
- deltatime=df_in.loc[i,'deltatime']
- distance=df_in.loc[i,'distance']
- avgspeed=(distance/1000)/(deltatime/3600)
- df_in.loc[i,'avgspeed']=avgspeed
- return df_in
- def read_df_gps(path):
- df_gps=pd.read_csv(path, encoding='gbk')
-
- df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng",
- "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True)
-
- df_gps['time']=pd.to_datetime(df_gps['time'])
-
- df_gps=df_add_distance(df_gps)
- condition=df_gps['distance']<20000
- df_gps=df_gps.loc[condition,:].copy()
- df_gps=df_gps.reset_index(drop=True)
-
- df_gps=df_add_distance(df_gps)
- df_gps=df_add_deltatime(df_gps)
- df_gps=df_add_avgspeed(df_gps)
-
- return df_gps
- def preprocess_Df_Gps(df_gps):
- '''对Df_Gps进行预处理'''
-
- df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng",
- "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"}, inplace=True)
-
- df_gps=df_gps.dropna(subset=['time','lat','lng'])
-
- df_gps=df_gps.drop_duplicates(subset=['time'],keep='first')
-
- df_gps['time']=pd.to_datetime(df_gps['time'])
-
-
- df_gps=df_add_distance(df_gps)
- condition=df_gps['distance']<20000
- df_gps=df_gps.loc[condition,:].copy()
- df_gps=df_gps.reset_index(drop=True)
-
- df_gps=df_add_distance(df_gps)
- df_gps=df_add_deltatime(df_gps)
- df_gps=df_gps.loc[df_gps['deltatime']>0.01,:].copy()
- df_gps=df_add_avgspeed(df_gps)
-
- return df_gps
- def df_add_distance(df_in):
- '''Add a columns:distance,input df must have lng,lat columns.'''
- for i in range(len(df_in)):
-
- if i==0:
- df_in.loc[i,'distance']=0
-
- else:
- lon1=df_in.loc[i-1,'lng']
- lat1=df_in.loc[i-1,'lat']
- lon2=df_in.loc[i,'lng']
- lat2=df_in.loc[i,'lat']
- distance=haversine(lon1,lat1,lon2,lat2)
- df_in.loc[i,'distance']=distance
- return df_in
- def haversine(lon1, lat1, lon2, lat2):
- """
- Calculate the great circle distance between two points
- on the earth (specified in decimal degrees)
- """
-
- lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
-
- dlon = lon2 - lon1
- dlat = lat2 - lat1
- a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
- c = 2 * asin(sqrt(a))
- r = 6371
- return c * r * 1000
|