import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
from ProcessDfBms import *
from math import radians, cos, sin, asin, sqrt

def cal_unrecorded_gps(df_in,df_bms):
    '''筛选出现gps时间断点的数据,用df_bms数据补齐,df_in为df_gps表格。'''
    #未记录到的odo总和
    accum_unrecorded_odo=0

    #设置丢失的判断条件,获得信息丢失行的index
    condition1=df_in['deltatime']>60*3#时间间隔大于3分钟。说明数据掉线了。
    condition2=(df_in['deltatime']>90*1)&(df_in['distance']>1000)#时间间隔大于*分钟,且Distance间隔大于*,代表掉线了。
    signal_start_list=df_in.loc[condition1|condition2,:].index.to_list()#信息丢失行
    #如果第0行属于信息丢失行,则删除,因为需要index-1行
    try:
        signal_start_list.remove(0)
    except:
        pass
    else:
        pass
    #筛选出所有GPS信号丢失,对应的开始时间-结束时间对。
    if len(signal_start_list)>0:
        signal_end_list=[num-1 for num in signal_start_list]#信息丢失行的前一行,此处可能如果是首行,可能会有bug。
        pick_gps_list=[0]+signal_start_list+signal_end_list+[len(df_in)-1]#首行+尾行+信号开始行+信号结束行
        pick_gps_list=sorted(pick_gps_list)#重新排序

    #有出现信号断点的行,则进行以下计算。
    if len(signal_start_list)>0:
        #针对每个时间对,计算unrecorded odo
        for start_time_index,end_time_index in zip(signal_start_list,signal_end_list):
            last_end_time=df_in.loc[end_time_index,'time']
            this_start_time=df_in.loc[start_time_index,'time']
            #print('gps signal loss from: '+str(last_end_time)+'-to-'+str(this_start_time))
            #使用cal_delatasoc计算预估里程
            unrecorded_odo=cal_deltasoc(df_bms,last_end_time,this_start_time)
            accum_unrecorded_odo+=unrecorded_odo
        #print('accum_unrecorded_odo:'+str(accum_unrecorded_odo))
    else:
        pass
    
    return accum_unrecorded_odo


def df_add_avgspeed(df_in):
    '''Add a columns:avgspeed ,input df must have deltatime,distance column.'''
    for i in range(len(df_in)):
        #首行默认为0
        if i==0:
            df_in.loc[i,'avgspeed']=0
        #从第二行开始,计算平均速度
        else:
            deltatime=df_in.loc[i,'deltatime']
            distance=df_in.loc[i,'distance']
            avgspeed=(distance/1000)/(deltatime/3600)
            df_in.loc[i,'avgspeed']=avgspeed
    return df_in


def read_df_gps(path):
    df_gps=pd.read_csv(path, encoding='gbk')#编码方式gbk
    #重置表头
    df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng", 
                             "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"},  inplace=True)
    #时间格式调整
    df_gps['time']=pd.to_datetime(df_gps['time'])
    #对gps进行清洗
    df_gps=df_add_distance(df_gps)#增加distance列
    condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值
    df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。
    df_gps=df_gps.reset_index(drop=True)#重置index
    #进行预处理
    df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance
    df_gps=df_add_deltatime(df_gps)#增加deltatime列
    df_gps=df_add_avgspeed(df_gps)#增加avgspeed列

    #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1')
    return df_gps

def preprocess_Df_Gps(df_gps):
    '''对Df_Gps进行预处理'''
    #重置表头
    df_gps.rename(columns = {"时间戳": "time", "纬度":"lat", "经度":"lng", 
                             "卫星数":"sat_num", "海拔m":"height","速度[km/h]":"speed"},  inplace=True)
    #删除含有空数据的行
    df_gps=df_gps.dropna(subset=['time','lat','lng'])
    #删除时间重复的行,保留第一次出现的行
    df_gps=df_gps.drop_duplicates(subset=['time'],keep='first')
    #时间格式调整
    df_gps['time']=pd.to_datetime(df_gps['time'])
    
    #对gps进行清洗
    df_gps=df_add_distance(df_gps)#增加distance列
    condition=df_gps['distance']<20000#删除GPS漂移过远的点,可能为GPS错误值
    df_gps=df_gps.loc[condition,:].copy()#删除condition中,avgspd过大的部分,很可能伴随着GPS的漂移。
    df_gps=df_gps.reset_index(drop=True)#重置index
    #进行预处理
    df_gps=df_add_distance(df_gps)#增加distance列,再算一次distance
    df_gps=df_add_deltatime(df_gps)#增加deltatime列
    df_gps=df_gps.loc[df_gps['deltatime']>0.01,:].copy()#删除deltatime=0的列,两个时间戳相同,无法求速度。
    df_gps=df_add_avgspeed(df_gps)#增加avgspeed列

    #df_gps.to_excel('df_gps.xlsx',sheet_name='Sheet1')
    return df_gps


def df_add_distance(df_in):
    '''Add a columns:distance,input df must have lng,lat columns.'''
    for i in range(len(df_in)):
        #首行默认为0
        if i==0:
            df_in.loc[i,'distance']=0
        #从第二行开始,计算i行到i-1行,GPS距离之差
        else:
            lon1=df_in.loc[i-1,'lng']
            lat1=df_in.loc[i-1,'lat']
            lon2=df_in.loc[i,'lng']
            lat2=df_in.loc[i,'lat']
            distance=haversine(lon1,lat1,lon2,lat2)#haversine公式计算距离差
            df_in.loc[i,'distance']=distance    
    return df_in


def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # 将十进制度数转化为弧度
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine公式
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # 地球平均半径,单位为公里
    return c * r * 1000