import datetime
import tools.tools
import pdb
import pandas as pd
import numpy as np
from sklearn.cluster import DBSCAN

# 数据分析
# 按月统计 停车(静置+充电)发送数据最长的地点
def sta_stop_position(df_bms, df_gps, days=30):
    df_res = pd.DataFrame(columns=['time', 'lat', 'long', 'max_duration'])
    # 从静置+充电数据段,且GPS可靠的数据中进行统计
    start_time = df_bms.loc[0, '时间戳']
    timeDelta = datetime.timedelta(days=days)
    end_time = start_time + timeDelta
    while end_time < df_bms.loc[len(df_bms)-1, '时间戳']:
        df_res_temp = pd.DataFrame(columns=['time', 'lat', 'long', 'duration'])
        df_sel = df_bms[(df_bms['时间戳']>start_time) & (df_bms['时间戳']<=end_time)]
        data_number_list = sorted(list(set(df_sel[(df_sel['data_status'].isin(['charge', 'stand'])) & (df_sel['gps_rely']==1)
                                             ]['data_split_by_status'])))
        # 每段数据的经纬度求均值后记录下来
        for data_number in data_number_list[:]:
            df_sel_bms = df_sel[df_sel['data_split_by_status'] == data_number]
            df_sel_bms = df_sel_bms.reset_index(drop=True)
            df_sel_gps = df_gps[(df_gps['时间戳']>df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
            df_sel_gps = df_sel_gps.reset_index(drop=True)

            deltaT = abs(df_sel_gps.loc[0,'时间戳'] - df_sel_gps.loc[len(df_sel_gps)-1,'时间戳']).total_seconds()
            df_res_temp = df_res_temp.append({'time': df_sel_gps.loc[0,'时间戳'],
                                    'lat':np.mean(df_sel_gps['纬度']), 
                                    'long':np.mean(df_sel_gps['经度']), 
                                    'duration':deltaT}, ignore_index=True)

        # 利用聚类算法,将靠近的停车地点合并
        # 计算每次停车的GPS地点之间的距离
        count = len(df_res_temp)
        dis_mat_half = np.full((count,count), 0.0)
        for i in range(count):
            for j in range(i,count):
                dis_mat_half[i][j] = tools.tools.cal_distance(df_res_temp.loc[i,'lat'],df_res_temp.loc[i,'long'],
                                                       df_res_temp.loc[j,'lat'],df_res_temp.loc[j,'long'])      
        dis_mat=np.array(dis_mat_half)+np.transpose(dis_mat_half) 

        # 执行聚类算法,聚类参数:距离50,类内最少样本数10
        dbscan = DBSCAN(eps=0.05, min_samples=10, metric='precomputed').fit(dis_mat)
        # 将对应的类内的GPS合并
        class_label = list(sorted(set(dbscan.labels_)))
        if -1 in class_label:
            class_label.remove(-1)
        for label in class_label:
            index = sorted(np.where(dbscan.labels_ == label))[0]
            min_index = index[0]
            gps_lat = df_res_temp.loc[min_index,'lat']
            gps_long = df_res_temp.loc[min_index,'long']
            temp_duration = df_res_temp.loc[min_index,'duration']
            for i in index[1:]:
                gps_lat = gps_lat + df_res_temp.loc[i, 'lat']
                gps_long = gps_long + df_res_temp.loc[i, 'long']
                temp_duration = temp_duration + df_res_temp.loc[i, 'duration']
                df_res_temp.drop(index=i, inplace=True)
            df_res_temp.loc[min_index, 'lat'] = gps_lat/len(index)
            df_res_temp.loc[min_index, 'long'] = gps_long/len(index)
            df_res_temp.loc[min_index, 'duration'] = temp_duration

        df_res = df_res.append({'time': start_time[0:10],
                                    'lat':np.mean(df_sel_gps['纬度']), 
                                    'long':np.mean(df_sel_gps['经度']), 
                                    'max_duration':df_res_temp['duration'].max()/3600.0}, ignore_index=True)

        start_time = end_time
        end_time = end_time + timeDelta
    return df_res

# 统计单位时间内的累积行车时长、soc累积使用量以及累积行驶里程(若GPS可信)
# 计算单位时间内行车时长占比, 单位时间内行车soc平均变化量,单位时间内平均里程数。
# 输入 
# time_window: 统计时间长度
# step: 时间窗口滑动步进值
def sta_one_drive_cycle(df_bms, df_gps, prepro_record, time_window=3600, step=3600, start_time="00:00:00"):
    st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
    et = st + datetime.timedelta(seconds=time_window)
    time_list = []
    driveT_list = []
    driveSoc_list = []
    driveOdo_list = []
    driveOdoRevise_list = []
    while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
        df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
        df_t = df_t.reset_index(drop=True)

        driveT = 0
        driveSoc = 0
        driveOdo = 0
        driveOdoRevise = 0
        if not df_t.empty:
            deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
            df_drive = df_t[df_t['data_status']=='drive']
            df_drive = df_drive.reset_index(drop=True)
            data_number_list = sorted(list(set(df_drive['data_split_by_status'])))

            for data_number in data_number_list[:]:
                df_d = df_drive[df_drive['data_split_by_status'] == data_number]
                df_d = df_d.reset_index(drop=True)
                driveT = driveT + (df_d.loc[len(df_d)-1, '时间戳'] - df_d.loc[0, '时间戳']).total_seconds()
                driveSoc = driveSoc + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]'])
                if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
                    df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
                    df_sel_gps = df_sel_gps.reset_index(drop=True)
                    if len(df_sel_gps) > 0:
                        driveOdo = driveOdo + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
                        
                    else:
                        driveOdo = None
                else:
                    driveOdo = None 
        time_list.append(st)
        driveT_list.append(driveT)
        driveSoc_list.append(driveSoc)
        driveOdo_list.append(driveOdo)
        st = st + datetime.timedelta(seconds=step)
        et = st + datetime.timedelta(seconds=time_window)
    if prepro_record['drive']<0.8 and sum(driveSoc_list) > 0:
        # 计算能耗
        sum_odo = 0
        sum_soc = 0
        for i,odo in enumerate(driveOdo_list):
            if odo !=0 and not pd.isnull(odo):
                sum_odo += odo
                sum_soc += driveSoc_list[i]
        ene_consump = sum_odo/sum_soc
        st = datetime.datetime.strptime(str(df_bms.loc[0, '时间戳'])[0:10] + ' ' + start_time, '%Y-%m-%d %H:%M:%S')
        et = st + datetime.timedelta(seconds=time_window)
        driveOdoRevise_list = []
        while (et < df_bms.loc[len(df_bms)-1, '时间戳']):
            df_t = df_bms[(df_bms['时间戳'] > st ) & (df_bms['时间戳'] < et )]
            df_t = df_t.reset_index(drop=True)

            driveOdoRevise = 0
            if not df_t.empty:
                deltaT = (df_t.loc[len(df_t)-1, '时间戳'] - df_t.loc[0, '时间戳']).total_seconds()
                df_drive = df_t[df_t['data_status']=='drive']
                df_drive = df_drive.reset_index(drop=True)
                data_number_list = sorted(list(set(df_drive['data_split_by_status'])))

                for data_number in data_number_list[:]:
                    df_d = df_drive[df_drive['data_split_by_status'] == data_number]
                    df_d = df_d.reset_index(drop=True)

                    if df_d.loc[0, 'gps_rely'] == 1 and driveOdo != None:
                        df_sel_gps = df_gps[(df_gps['时间戳']>=df_d.loc[0,'时间戳']) & (df_gps['时间戳']<=df_d.loc[len(df_d)-1,'时间戳'])]
                        df_sel_gps = df_sel_gps.reset_index(drop=True)
                        if len(df_sel_gps) > 0:
                            driveOdoRevise = driveOdoRevise + (df_sel_gps.loc[len(df_sel_gps)-1, 'odo'] - df_sel_gps.loc[0, 'odo'])
                        else:
                            driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
                    else:
                            driveOdoRevise = driveOdoRevise + (df_d.loc[0, 'SOC[%]'] - df_d.loc[len(df_d)-1, 'SOC[%]']) * ene_consump
            driveOdoRevise_list.append(driveOdoRevise)
            st = st + datetime.timedelta(seconds=step)
            et = st + datetime.timedelta(seconds=time_window)
    else:
        driveOdoRevise_list = [None] * len(driveSoc_list)
    df_res = pd.DataFrame({'time':time_list, 'driveT':driveT_list, 'driveSoc':driveSoc_list, 'driveOdo':driveOdo_list, 'driveOdoRevise':driveOdoRevise_list})
    return df_res


# 统计充电前的GPS海拔与充电时的GPS海拔差(若GPS可信)
def sta_charge_height(df_bms, df_gps):
    data_number_list = sorted(list(set(df_bms['data_split_by_status'])))
    df_sel_bms_last = df_bms[df_bms['data_split_by_status'] == 1]
    df_sel_bms_last = df_sel_bms_last.reset_index(drop=True)
    time_list = []
    last_height_list = []
    height_list = []
    last_status_list = []
    for data_number in data_number_list[1:]:
        df_sel_bms = df_bms[df_bms['data_split_by_status'] == data_number]
        df_sel_bms = df_sel_bms.reset_index(drop=True)
        if df_sel_bms_last.loc[0, 'data_status'] != 'charge' and df_sel_bms.loc[0, 'data_status'] == 'charge' and\
           df_sel_bms_last.loc[0, 'gps_rely'] == 1 and df_sel_bms.loc[0, 'gps_rely'] == 1:

            df_sel_gps_last = df_gps[(df_gps['时间戳']>=df_sel_bms_last.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms_last.loc[len(df_sel_bms_last)-1,'时间戳'])]
            df_sel_gps_last = df_sel_gps_last.reset_index(drop=True)
            df_sel_gps = df_gps[(df_gps['时间戳']>=df_sel_bms.loc[0,'时间戳']) & (df_gps['时间戳']<=df_sel_bms.loc[len(df_sel_bms)-1,'时间戳'])]
            df_sel_gps = df_sel_gps.reset_index(drop=True)
            if (df_sel_bms_last.loc[0, 'data_status'] == 'stand'):
                last_height = df_sel_gps_last['海拔m'].mean()
            else:
                last_height = df_sel_gps_last.loc[len(df_sel_gps_last)-1, '海拔m']
            cur_height = df_sel_gps['海拔m'].mean()
            time_list.append(df_sel_bms.loc[0, '时间戳'])
            last_height_list.append(last_height)
            height_list.append(cur_height)
            last_status_list.append(df_sel_bms_last.loc[0, 'data_status'])
        df_sel_bms_last = df_sel_bms.copy()

    df_res = pd.DataFrame({'time':time_list, 'last_status':last_status_list, 'last_height':last_height_list, 'cur_height':height_list, 'diff':np.array(height_list)-np.array(last_height_list)})
    return df_res