Parcourir la source

Merge branch 'dev' of http://git.fast-fun.cn:92/lmstack/data_analyze_platform into dev

qingfeng il y a 2 ans
Parent
commit
80257351af

+ 129 - 0
LIB/MIDDLE/Anomaly_Detection/V1_0_0/anomalyPCA.py

@@ -0,0 +1,129 @@
+#热失控预警:PCA异常指数
+
+import pandas as pd
+import numpy as np
+from scipy.signal import savgol_filter
+from sklearn.preprocessing import RobustScaler
+from sklearn.decomposition import PCA
+
+#筛选特征
+def makedataset1(df_data):
+    df_data=df_data.drop(['Unnamed: 0','总电流[A]','GSM信号','外电压','SOH[%]','开关状态','充电状态','故障等级','故障代码','绝缘电阻','上锁状态','加热状态','单体均衡状态','总输出状态'],axis=1,errors='ignore')
+    df_data=df_data.drop(["单体温度"+str(i) for i in range(1,5)],axis=1,errors='ignore')
+    df_data=df_data.drop(["其他温度"+str(i) for i in range(1,7)],axis=1,errors='ignore')
+    listV=[s for s in list(df_data) if '单体电压' in s]
+    for i in range(1,len(listV)+1):
+        df_data=df_data[(df_data['单体电压'+str(i)]>2200) & (df_data['单体电压'+str(i)]<4800)]
+    df_data=df_data[df_data['SOC[%]']>20]
+    df_data['时间']=[df_data.loc[i,'时间戳'][0:15] for i in df_data.index]
+    df_data=df_data.drop('时间戳',axis=1)
+    data_set=df_data.groupby('时间').mean(False)
+    for k in data_set.columns:
+        data_set[k]=savgol_filter(data_set[k],3,2)
+    return data_set
+
+#新建统计特征
+def makedataset2(df_data):
+    data_set=makedataset1(df_data)
+    listV=[s for s in list(df_data) if '单体电压' in s]
+    data_set["最低单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].min(axis=1)
+    data_set["最高单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].max(axis=1)
+    data_set["平均单体电压"]=data_set[["单体电压"+str(i) for i in range(1,len(listV)+1)]].mean(axis=1)
+    data_set["最大单体压差"]=[data_set.loc[k,"最高单体电压"]-data_set.loc[k,"最低单体电压"] for k in data_set.index]
+    data_set["低压差"]=[data_set.loc[k,"平均单体电压"]-data_set.loc[k,"最低单体电压"] for k in data_set.index]
+    data_set=data_set.drop(["单体电压"+str(i) for i in range(1,len(listV)+1)],axis=1)
+    return data_set
+
+#标准化
+def process(data_set):
+    features=data_set.columns
+    sX=RobustScaler(copy=True)
+    data_set2=data_set.copy()
+    data_set2.loc[:,features]=sX.fit_transform(data_set2[features])
+    return data_set2
+
+#异常指数函数
+def anomalyScores(originalDF,reducedDF):
+    loss=np.sum((np.array(originalDF)-np.array(reducedDF))**2,axis=1)
+    loss=pd.Series(data=loss,index=originalDF.index)
+    loss=(loss-np.min(loss))/(np.max(loss)-np.min(loss))
+    return loss
+
+#建立PCA模型
+def anomalyPCA(x_train_pro):
+    n_components=4
+    whiten=True
+    random_state=2
+    pca=PCA(n_components=n_components,whiten=whiten,random_state=random_state)
+    pca.fit(x_train_pro)
+    return pca
+
+#判断PCA异常指数
+def transform(df_data_pro,model,df_data):
+    #降维
+    X_train=model.transform(df_data_pro)
+    X_train=pd.DataFrame(data=X_train,index=df_data_pro.index)
+    #还原
+    X_train_inverse=model.inverse_transform(X_train)
+    X_train_inverse=pd.DataFrame(data=X_train_inverse,index=df_data_pro.index)
+    #异常指数
+    anomalyScoresModel=anomalyScores(df_data_pro,X_train_inverse)
+    anomalyScoresModel=savgol_filter(anomalyScoresModel,15,3)
+    df_data2=df_data.copy()
+    df_data2['anomalyScores_'+str(model)]=anomalyScoresModel
+    return df_data2
+
+#判断离群
+def detect_outliers(data,pred,threshold=3):
+    anomaly=data['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)']
+    anomalypred=pred['anomalyScores_PCA(n_components=4, random_state=2, whiten=True)']
+    mean_d=np.mean(anomaly.values)
+    std_d=np.std(anomaly.values)
+    max_score=np.max(anomaly.values)
+    outliers2=pd.DataFrame()
+    for k in anomalypred.index:
+        z_score= (anomalypred[k]-mean_d)/std_d
+        if (np.abs(z_score) >threshold) & (anomalypred[k]>max_score):
+            outliers2=outliers2.append(pred[anomalypred.values==anomalypred[k]])
+    return outliers2
+
+#训练模型
+def train_model(data_train):
+    x_train1=makedataset1(data_train) 
+    x_train2=makedataset2(data_train)  
+    x_train_pro1=process(x_train1) 
+    x_train_pro2=process(x_train2) 
+    pca1=anomalyPCA(x_train_pro1) 
+    pca2=anomalyPCA(x_train_pro2) 
+    res1=transform(x_train_pro1,pca1,x_train1)
+    res2=transform(x_train_pro2,pca2,x_train2)
+    return pca1,pca2,res1,res2
+
+#预测
+def prediction(data_test,pca1,pca2):
+    x_test1=makedataset1(data_test) 
+    x_test2=makedataset2(data_test) 
+    x_test_pro1=process(x_test1) 
+    x_test_pro2=process(x_test2) 
+    pred1=transform(x_test_pro1,pca1,x_test1)
+    pred2=transform(x_test_pro2,pca2,x_test2)
+    return pred1,pred2
+
+def boxplot_fill(res2):
+    col=res2['低压差']
+    # 计算iqr:数据四分之三分位值与四分之一分位值的差
+    iqr=col.quantile(0.75)-col.quantile(0.25)
+    # 根据iqr计算异常值判断阈值
+    u_th=col.quantile(0.75) + 2*iqr # 上界
+    return u_th
+
+#判定异常
+def check_anomaly(outliers1,outliers2,res2):
+    outliers=pd.merge(outliers1,outliers2,on='时间')
+    outliers=outliers[outliers['SOC[%]_x']>50]
+    outliers=outliers.drop(['总电压[V]_y','单体压差_y','SOC[%]_y'],axis=1)
+    u_th=boxplot_fill(res2)
+    outliers=outliers[outliers['低压差']>u_th]
+    return outliers
+    
+

+ 42 - 0
LIB/MIDDLE/Anomaly_Detection/V1_0_0/main_anomalyPCA.py

@@ -0,0 +1,42 @@
+#热失控预警:PCA异常指数
+#训练模型
+
+from LIB.BACKEND import DBManager
+dbManager = DBManager.DBManager()
+from LIB.MIDDLE.CellStateEstimation.Common import log
+import pandas as pd
+from anomalyPCA import *
+import joblib
+import datetime
+
+dataSOH = pd.read_excel('sn-20210903.xlsx',sheet_name='sn-20210903')
+fileNames = dataSOH['sn']
+fileNames = list(fileNames)
+l = len(fileNames)
+
+now_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')   #type: str
+now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d %H:%M:%S')     #type: datetime
+start_time=now_time-datetime.timedelta(days=365)
+end_time=str(now_time)
+start_time=str(start_time)
+
+mylog=log.Mylog('log.txt','error')
+mylog.logcfg()
+
+for k in range(l): 
+    try: 
+        sn = fileNames[k]
+        df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms'])
+        data_train = df_data['bms']
+        
+        if len(data_train)>0:
+            pca1,pca2,res1,res2=train_model(data_train) 
+            joblib.dump(pca1,'pca1_'+sn+'.m')  
+            joblib.dump(pca2,'pca2_'+sn+'.m')  
+            res1.to_csv('res1_'+sn+'.csv',encoding='gbk')
+            res2.to_csv('res2_'+sn+'.csv',encoding='gbk')
+    
+    except Exception as e:
+        print(repr(e))
+        mylog.logopt(sn,e)
+        pass

+ 98 - 0
LIB/MIDDLE/Anomaly_Detection/V1_0_0/main_detection.py

@@ -0,0 +1,98 @@
+#热失控预警:PCA异常指数
+#预测及异常预警
+
+from LIB.BACKEND import DBManager
+
+dbManager = DBManager.DBManager()
+import datetime
+
+import joblib
+import pandas as pd
+import pymysql
+from LIB.MIDDLE.CellStateEstimation.Common import log
+
+from anomalyPCA import *
+
+dataSOH = pd.read_excel('sn-20210903.xlsx',sheet_name='sn-20210903')
+fileNames = dataSOH['sn']
+fileNames = list(fileNames)
+l = len(fileNames)
+
+now_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')   #type: str
+now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d %H:%M:%S')     #type: datetime
+start_time=now_time-datetime.timedelta(hours=3)
+end_time=str(now_time)
+start_time=str(start_time)
+
+mylog=log.Mylog('log.txt','error')
+mylog.logcfg()
+
+#数据库配置
+host='rm-bp10j10qy42bzy0q77o.mysql.rds.aliyuncs.com'
+port=3306
+user='qx_algo_readonly'
+password = 'qx@123456'
+
+#读取故障结果库中code==119且end_time='0000-00-00 00:00:00'...............................
+db='safety_platform'
+mysql = pymysql.connect (host=host, port=port, user=user, password=password, database=db)
+cursor = mysql.cursor()
+param='start_time,end_time,product_id,code,level,info,advice'
+tablename='all_fault_info'
+sql =  "select %s from %s where code='C493' and end_time='0000-00-00 00:00:00'" %(param,tablename)
+cursor.execute(sql)
+res = cursor.fetchall()
+df_diag_ram= pd.DataFrame(res,columns=param.split(','))
+cursor.close()
+mysql.close()
+
+anomalies=pd.DataFrame()
+df_res=pd.DataFrame(columns=['start_time','end_time','product_id','code','level','info','advice'])
+for k in range(l): 
+    try: 
+        sn = fileNames[k]
+        df_diag_ram_sn=df_diag_ram[df_diag_ram['product_id']==sn]
+        df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms'])
+        data_test = df_data['bms']
+        data_test=data_test[data_test['SOC[%]']>20]
+        if len(data_test)>5:
+            pca1 = joblib.load('pca1_'+sn+'.m')  
+            pca2 = joblib.load('pca2_'+sn+'.m') 
+            res1 = pd.read_csv('res1_'+sn+'.csv',encoding='gbk')
+            res2 = pd.read_csv('res2_'+sn+'.csv',encoding='gbk')
+            pred1,pred2=prediction(data_test,pca1,pca2)
+            outliers1=detect_outliers(res1,pred1,threshold=30)
+            outliers2=detect_outliers(res2,pred2,threshold=16)
+            if (len(outliers1)>0) & (len(outliers2)>0):
+                outliers=check_anomaly(outliers1,outliers2,res2)
+                if len(outliers)>5:
+                    outliers['sn']=sn
+                    anomalies=anomalies.append(outliers)
+                    if df_diag_ram_sn.empty: 
+                        product_id=sn
+                        start_time=outliers.loc[0,'时间']
+                        if outliers.loc[-1,'时间'] == pred1.loc[-1,'时间']:
+                            end_time='0000-00-00 00:00:00'
+                        else:
+                            end_time=outliers.loc[-1,'时间']
+                        code='C493'
+                        level=4
+                        info='热失控预警'
+                        advice='建议返厂维修'
+                        df_res=df_res.append([start_time, end_time,product_id, code, level, info,advice])    
+                        with open(r'D:\Platform\platform_python\data_analyze_platform\USER\spf\01qixiang\06BatSafetyAlarm\热失控报警.txt','a') as file:
+                            file.write(str(tuple(df_res.iloc[-1]))+'\n')    
+                    else:
+                        if outliers.loc[-1,'时间'] == pred1.loc[-1,'时间']:
+                            end_time='0000-00-00 00:00:00'
+                        else:
+                            end_time=outliers.loc[-1,'时间']
+                            df_diag_ram_sn['end_time']=end_time
+                            with open(r'D:\Platform\platform_python\data_analyze_platform\USER\spf\01qixiang\06BatSafetyAlarm\热失控报警.txt','a') as file:
+                                file.write(str(tuple(df_diag_ram_sn.iloc[-1]))+'\n')
+                        
+           
+    except Exception as e:
+        print(repr(e))
+        mylog.logopt(sn,e)
+        pass 

BIN
LIB/MIDDLE/Anomaly_Detection/V1_0_0/sn-20210903.xlsx


+ 51 - 0
LIB/MIDDLE/SaftyCenter/DataDiag_Static/BMSuploaderrortest.py

@@ -0,0 +1,51 @@
+
+__author__ = 'lmstack'
+#coding=utf-8
+import os
+import datetime
+import pandas as pd
+from LIB.BACKEND import DBManager, Log
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import time, datetime
+import dateutil.relativedelta
+import traceback
+from LIB.MIDDLE.CellStateEstimation.Common import log
+from LIB.MIDDLE.CellStateEstimation.Common.V1_0_1 import DBDownload as DBDownload
+import time, datetime
+from pandas.core.frame import DataFrame
+from apscheduler.schedulers.blocking import BlockingScheduler
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static.DataStatistics import DataSta
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static.SC_CtrlSafty import CtrlSafty
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static.DiagDataMerge import DiagDataMerge
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static.SC_SamplingSafty import SamplingSafty
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static.SC_BMSUploadError import BMSReportError
+from LIB.MIDDLE.SaftyCenter.DataDiag_Static import CBMSBatDiag
+from LIB.MIDDLE.SaftyCenter.Common import DBDownload as DBDw
+from LIB.MIDDLE.CellStateEstimation.Common.V1_0_1 import BatParam as QX_BatteryParam
+from urllib import parse
+import pymysql
+
+# 故障映射表
+host='rm-bp10j10qy42bzy0q77o.mysql.rds.aliyuncs.com'
+port=3306
+db='algo_dict'
+user='qx_algo_rw'
+password=parse.quote_plus('qx@123456')
+db_engine = create_engine(
+    "mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8".format(
+        user, password, host, port, db
+    ))
+
+errorcode_map = pd.read_sql("select * from faultcode_map", db_engine)
+db_engine.dispose()
+sn = 'PK504B10100004349'
+start_time = '2021-12-30 18:00:04'
+end_time = '2021-12-30 18:15:04'
+dbManager = DBManager.DBManager()
+df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms'])
+df_bms = df_data['bms']
+df_bms=df_bms.dropna(subset=['总电流[A]'])
+df_bms=df_bms.reset_index(drop=True)
+df_Diag_Batdiag_update=BMSReportError.main(sn,df_bms,pd.DataFrame(),1,errorcode_map)
+print(df_Diag_Batdiag_update)

+ 4 - 5
LIB/MIDDLE/SaftyCenter/DataDiag_Static/SC_BMSUploadError.py

@@ -52,10 +52,8 @@ class BMSReportError:
                                 nCode=int(math.pow(2,power))
                                 newCode.append(nCode)
                                 code=code-nCode
-                                newCode_total.append(nCode)
                         elif code>0:
                             newCode=newCode.append(code)
-                            newCode_total.append(code)
                         else:
                             newCode=[]
                     else:
@@ -63,7 +61,6 @@ class BMSReportError:
                 else:
                     newCode=df_bms.loc[i,'故障代码']
                     if not pd.isnull(newCode) and newCode!=0:
-                        newCode_total.append(newCode)
                         newCode=[newCode]
                     else:
                         newCode=[]                        
@@ -82,7 +79,7 @@ class BMSReportError:
                         # DbSession = sessionmaker(bind=db_engine)
 
                         # errorcode_map = pd.read_sql("select * from faultcode_map", db_engine)
-                        # if 'k50' in sn:
+                        # if 'K50' in sn:
                         #     FactoryType=1
                         # elif 'MGMCL' in sn or 'UD' in sn:
                         #     FactoryType=2
@@ -92,10 +89,12 @@ class BMSReportError:
                              
                         code = newCode[j] # 终端故障码
                         platform_code = errorcode_map[(errorcode_map['protocol']==FactoryType)&(errorcode_map['end_errorcode']==str(code))]['platform_errorcode']                   
+
                         # db_engine.dispose()
                         if len(platform_code) == 0:
                             pass
-                        else:
+                        else:                       
+                            newCode_total.append(platform_code.values[0])
                             newCode[j]=platform_code.values[0]
                             if not platform_code.values[0] in df_Diag_Ram_BMS['code'].values.tolist():
                                 df_Diag_Ram_BMS.loc[len(df_Diag_Ram_BMS)]=[df_bms.loc[i,'时间戳'],'0000-00-00 00:00:00',sn,platform_code.values[0],df_bms.loc[i,'故障等级'],'','']

+ 356 - 0
LIB/MIDDLE/odo/DailyMileageEstimation/V1_0_4_SimpleVehicle/cal_mileage.py

@@ -0,0 +1,356 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sat Sep 25 23:26:27 2021
+
+@author: admin
+"""
+import pandas as pd
+import numpy as np
+from LIB.BACKEND.DataPreProcess import DataPreProcess
+import os
+import math
+import datetime
+import time
+
+#将时间戳由 "%Y-%m-%d %H:%M:%S" 切换为 sec
+def timeconvert(df_in,column_name):  
+    df=df_in.copy()
+    df.index=range(len(df))
+    time=df[column_name]
+    timeInSeries=[]
+    time2=datetime.datetime.strptime(str(time[0]),"%Y-%m-%d %H:%M:%S")
+    for k in range(len(time)):
+        time1=datetime.datetime.strptime(str(time[k]),"%Y-%m-%d %H:%M:%S")    
+        t=(time1-time2)
+        timeInSeries.append(t.days*86400+t.seconds)
+    df.loc[:,'相对时间[s]']=pd.DataFrame(timeInSeries,columns=['相对时间[s]'])
+    return df
+
+#计算累积能量
+def cal_accumKwh(df_in):
+    df_in1=df_in.copy()
+    df1=df_in1[['总电流[A]','总电压[V]','相对时间[s]']]
+    df1=df1.dropna(axis=0,how='any')
+    I=df1['总电流[A]'].values
+    V=df1['总电压[V]'].values
+    t=df1['相对时间[s]'].values
+    accumAh=[0.0]
+    for k in range(1,len(I)):
+        accumAh_temp=(t[k]-t[k-1])*((I[k]+I[k-1])/(2*3600))*(V[k]+V[k-1])/2/1000
+        accumAh.append(accumAh[-1]+accumAh_temp)
+    df1.loc[:,'累积能量[Kwh]']=accumAh
+    df_out=pd.merge(df_in1,df1[['累积能量[Kwh]']],how='left',left_index=True, right_index=True)
+    return(df_out)
+
+#将时间格式化为整数
+def str_data_to_num(str_data):
+    # 格式时间成毫秒
+    strptime = time.strptime(str_data,"%Y-%m-%d %H:%M:%S")
+    # print("strptime",strptime)
+    mktime = int(time.mktime(strptime)*1000)
+    # print("mktime",mktime)
+    return mktime
+
+def df_date_To_int(df_in):
+    df=df_in.copy()
+    for k in range(len(df)):
+        df.loc[k,'绝对时间[ms]']=str_data_to_num(df['时间戳'][k])
+    return df
+
+#根据经纬度获取两点之间的距离
+def cal_dis_meters(radius,latitude1, longitude1,latitude2, longitude2):  
+    radLat1 = (math.pi/180)*latitude1  
+    radLat2 = (math.pi/180)*latitude2  
+    radLng1 = (math.pi/180)*longitude1  
+    radLng2= (math.pi/180)*longitude2      
+    d=2*math.asin(math.sqrt(math.pow(math.sin((radLat1-radLat2)/2.0),2)+math.cos(radLat1)*math.cos(radLat2)*math.pow(math.sin((radLng1-radLng2)/2.0),2)))*radius
+    return d
+
+def cal_gps_distance(df_in):
+    #根据gps数据计算△距离1和△距离2
+    #△距离1:直接根据两次的经度和纬度计算得到的马氏距离
+    #△距离2:当两次上报经纬度的时间间隔<60sec时,如果车辆为行驶状态则使用两次的经纬度求得的马氏距离,
+    #如果车辆不为行驶状态,则为0.
+    df_gpsOnly=df_in[df_in['纬度']>0]
+    index_list=df_gpsOnly.index
+    pos_list=df_gpsOnly[['纬度','经度']].values
+    time_list=df_in['相对时间[s]'].values
+    # Energy_list=df_in['累积能量[Kwh]'].values
+    df_gpsOnly.loc[index_list[0],'△距离1']=0
+    df_gpsOnly.loc[index_list[0],'△距离2']=0
+    for k in range(1,len(pos_list)):
+        latitude1=pos_list[k-1][0]
+        longitude1=pos_list[k-1][1]
+        latitude2=pos_list[k][0]
+        longitude2=pos_list[k][1]
+        dlt_odo=cal_dis_meters(6378.137,latitude1, longitude1,latitude2, longitude2)
+        df_gpsOnly.loc[index_list[k],'△距离1']=dlt_odo    
+        if time_list[index_list[k]]-time_list[index_list[k-1]]<60:#两次上传GPS数据时间间隔小于60sec
+            if df_gpsOnly.loc[index_list[k],'data_status']=='drive' :   
+                df_gpsOnly.loc[index_list[k],'△距离2']=dlt_odo
+            elif df_gpsOnly.loc[index_list[k],'data_status']=='none' :
+                df_gpsOnly.loc[index_list[k],'△距离2']=dlt_odo
+            else:
+                df_gpsOnly.loc[index_list[k],'△距离2']=0
+    return(df_gpsOnly)
+
+
+def real_odo(df_in,avg_cost):
+    # df_handle=df_in.copy()
+    df=timeconvert(df_in,"时间戳")#计算相对时间
+    df=cal_accumKwh(df)#计算累积能量
+
+    positive_lat_index=df[df['纬度']>0].index
+    if len(positive_lat_index)>2:
+        first_index=positive_lat_index[0]
+        end_index=positive_lat_index[-1]
+        #将数据分割为有第一个GPS数据之前(part1)、
+        #有最后一个GPS数据之后(part3)以及
+        #有第一个GPS数据和最后一个GPS数据中间的数据(part3)
+        part1=df[0:first_index+1]
+        part2=df[first_index:end_index+1]
+        part3=df[end_index:-1]
+        
+        #第一部分距离计算    
+        part1.index=[i for i in range(len(part1))]
+        part1.loc[0,'△距离']=0
+        AccumEnergy1=part1['累积能量[Kwh]'].values
+        for k1 in range(1,len(part1)):
+            if (AccumEnergy1[k1]-AccumEnergy1[k1-1])>0:
+                part1.loc[k1,'△距离']=(AccumEnergy1[k1]-AccumEnergy1[k1-1])*avg_cost
+                part1.loc[k1,'方法']=4
+            else:
+                part1.loc[k1,'△距离']=0
+                part1.loc[k1,'方法']=5
+        
+        #第二部分计算
+        part2_gps=part2[part2['纬度']>0]
+        part2_gps.index=[i for i in range(len(part2_gps))]
+        times_list=part2_gps['相对时间[s]'].values
+        lat=part2_gps['纬度'].values
+        lng=part2_gps['经度'].values
+        AccumEnergy2=part2_gps['累积能量[Kwh]'].values
+        part2_gps.loc[0,'△距离']=0
+        for k2 in range(1,len(part2_gps)):
+            delta_energy=AccumEnergy2[k2]-AccumEnergy2[k2-1]
+            delta_span=cal_dis_meters(6378.137,lat[k2], lng[k2],lat[k2-1], lng[k2-1])
+            v_spd=3600*delta_span/(times_list[k2]-times_list[k2-1])        
+            if times_list[k2]-times_list[k2-1]<60 and not delta_energy<=0:
+                if v_spd>50 :
+                    part2_gps.loc[k2,'△距离']=delta_energy*avg_cost
+                    part2_gps.loc[k2,'方法']=1_1
+                else:
+                    part2_gps.loc[k2,'△距离']=delta_span
+                    part2_gps.loc[k2,'方法']=1_2
+            else:            
+                if delta_energy<=0:
+                    part2_gps.loc[k2,'△距离']=0
+                    part2_gps.loc[k2,'方法']=2
+                else:
+                    if v_spd>50:
+                        part2_gps.loc[k2,'△距离']=delta_energy*avg_cost
+                        part2_gps.loc[k2,'方法']=3_1
+                    else:
+                        part2_gps.loc[k2,'△距离']=max(delta_span,delta_energy*avg_cost)
+                        part2_gps.loc[k2,'方法']=3_2
+    
+                    
+        #第三部分距离计算    
+        part3.index=[i for i in range(len(part3))]
+        part3.loc[0,'△距离']=0
+        AccumEnergy3=part3['累积能量[Kwh]'].values
+        for k3 in range(1,len(part3)):
+            if (AccumEnergy3[k3]-AccumEnergy3[k3-1])>0:
+                part3.loc[k3,'△距离']=(AccumEnergy3[k3]-AccumEnergy3[k3-1])*avg_cost
+                part3.loc[k3,'方法']=4
+            else:
+                part3.loc[k3,'△距离']=0
+                part3.loc[k3,'方法']=5
+        df_out=pd.concat([part1,part2_gps[1:],part3[1:]])
+    else:
+        part1=df
+        part2=df[0:0]
+        part3=df[0:0]
+        
+        #第一部分距离计算    
+        part1.index=[i for i in range(len(part1))]
+        part1.loc[0,'△距离']=0
+        AccumEnergy1=part1['累积能量[Kwh]'].values
+        for k1 in range(1,len(part1)):
+            if (AccumEnergy1[k1]-AccumEnergy1[k1-1])>0:
+                part1.loc[k1,'△距离']=(AccumEnergy1[k1]-AccumEnergy1[k1-1])*avg_cost
+                part1.loc[k1,'方法']=4
+            else:
+                part1.loc[k1,'△距离']=0
+                part1.loc[k1,'方法']=5
+        df_out=part1.copy()
+    
+    #计算累积里程
+    df_out.loc[0,'累积里程[km]']=0
+    df_out.index=[i for i in range(len(df_out))]
+
+    for k in range(1,len(df_out)):
+        df_out.loc[k,'累积里程[km]']=df_out.loc[k-1,'累积里程[km]']+df_out.loc[k,'△距离']
+    return(df_out)
+
+def calcul_mileage(sn,data_bms,data_gps):
+
+    #合并两张表格
+    df_bms=data_bms.copy()
+    df_gps=data_gps.copy()
+    #删除纬度小于10的点
+    for k in range(1,len(df_gps)):
+        if df_gps.loc[k,'纬度']<10:
+            df_gps=df_gps.drop(k)
+    df_bms.set_index(["时间戳"], inplace=True)
+    df_gps.set_index(["时间戳"], inplace=True)
+    merge_df1=pd.merge(df_bms, df_gps,how='outer', left_index=True, right_index=True)
+    merge_df1.loc[:,'时间戳']=merge_df1.index
+    merge_df1.index=[i for i in range(len(merge_df1))]
+
+    #参数定义
+    cost_min=34 #最小能耗km/kwh
+    cost_max=45 #最高能耗km/kwh
+
+    df_input=merge_df1[['时间戳','总电流[A]', '总电压[V]','SOC[%]','充电状态','纬度', '经度']]
+    #电流电压数据修复
+    df_input=timeconvert(df_input,"时间戳")#计算相对时间
+    related_times=df_input['相对时间[s]'].values
+    lat_list=df_input['纬度'].values
+    lng_list=df_input['经度'].values
+    Vlt_list=df_input['总电压[V]'].values
+    for k in range(1,len(df_input)):
+        if math.isnan(df_input.loc[k,'总电流[A]']):
+            if related_times[k]-related_times[k-1]<5:
+                df_input.loc[k,'总电流[A]']=df_input.loc[k-1,'总电流[A]']
+                df_input.loc[k,'总电压[V]']=df_input.loc[k-1,'总电压[V]']
+                df_input.loc[k,'修复']=1
+            elif lat_list[k-1]>0:
+                delta_odo=cal_dis_meters(6378.137,lat_list[k], lng_list[k],lat_list[k-1], lng_list[k-1])
+                df_input.loc[k,'总电流[A]']=1000*3600*(delta_odo/cost_min)/70/(related_times[k]-related_times[k-1])
+                df_input.loc[k,'总电压[V]']=70
+                df_input.loc[k,'修复']=2
+            elif related_times[k]-related_times[k-1]<30:
+                df_input.loc[k,'总电流[A]']=df_input.loc[k-1,'总电流[A]']
+                df_input.loc[k,'总电压[V]']=df_input.loc[k-1,'总电压[V]']
+                df_input.loc[k,'修复']=3
+            else:
+                df_input=df_input.drop(k)
+
+    df_input.index=[i for i in range(len(df_input))]
+    df_input['日期']=[df_input.loc[i,'时间戳'][0:10] for i in range(len(df_input))]     
+    date_list=np.unique(df_input['日期'].values)
+
+    list_result=[]
+    input_res=pd.DataFrame()
+    res = pd.DataFrame()
+    df_input_copy=df_input.copy()
+    for date_str in date_list:
+        df_input_copy=df_input[df_input['日期']==date_str]
+        data_len=len(df_input_copy)
+        time_list=df_input_copy['时间戳'].values
+        #初始化参数
+        start_time=datetime.datetime.strptime(time_list[0], "%Y-%m-%d %H:%M:%S")
+        last_time=time_list[0]
+        reset_time=start_time
+        start_index=0
+        end_index=1
+        cal_count=0
+        df_deltaData_last=df_input_copy[0:0]
+        #初始化输出
+        df_result=pd.DataFrame()
+        
+        df_result.loc[0,'时间']=start_time
+        df_result.loc[0,'时间']=str(df_result.loc[0,'时间'])
+        df_result.loc[0,'时间'] =df_result.loc[0,'时间'].rpartition(':')[0]
+        
+        df_result.loc[0,'累积里程[km]']=0
+        df_result.loc[0,'能耗[km/kwh]']=cost_min
+        process_store=[]
+        avg_cost=34
+        cross_odo=0
+        for k in range(1,data_len):
+            
+            target_Time=start_time+datetime.timedelta(minutes=5)
+            timenow=datetime.datetime.strptime(time_list[k], "%Y-%m-%d %H:%M:%S")
+            if target_Time>=timenow:
+                delta_time=(target_Time-timenow).seconds
+                cal_flag=0
+                if delta_time<60:
+                    end_index=k
+                    cal_flag=1
+            else:        
+                end_index=k
+                cal_flag=1
+            
+            if cal_flag==1:
+                cal_count=cal_count+1
+                
+                df_deltaData=df_input_copy[max(start_index-100,0):end_index+1]
+                avg_cost=max(cost_min,df_result.loc[cal_count-1,'能耗[km/kwh]'])
+                #avg_cost=cost_min
+                df_span=real_odo(df_deltaData,avg_cost)
+                st_str=df_span['时间戳'].values[0]
+                st_time=datetime.datetime.strptime(st_str, "%Y-%m-%d %H:%M:%S")
+                last_time_s=datetime.datetime.strptime(last_time, "%Y-%m-%d %H:%M:%S")
+                if last_time_s>=st_time:
+                    last_time=last_time
+                    cross_odo=cross_odo
+                else:
+                    last_time=start_time
+                    cross_odo=0
+                delta_odo=df_span['累积里程[km]'].values[-1]-df_span[df_span['时间戳']==str(last_time)]['累积里程[km]'].values[0]
+                delta_energy=df_span['累积能量[Kwh]'].values[-1]-df_span[df_span['时间戳']==str(last_time)]['累积能量[Kwh]'].values[0]
+                df_result.loc[cal_count,'时间']=timenow
+                df_result.loc[cal_count,'时间']=str(df_result.loc[cal_count,'时间'])
+                df_result.loc[cal_count,'时间'] = df_result.loc[cal_count,'时间'].rpartition(':')[0]
+                
+                df_result.loc[cal_count,'累积里程[km]']=df_result.loc[cal_count-1,'累积里程[km]']+delta_odo-cross_odo
+                if delta_energy>0:
+                    df_result.loc[cal_count,'能耗[km/kwh]']=min(max(delta_odo/delta_energy,cost_min),cost_max)
+                else:
+                    df_result.loc[cal_count,'能耗[km/kwh]']=cost_min
+                
+                # df_span[df_span['纬度']>0][-1:]
+                if len(df_span[df_span['纬度']>0])>0:
+                    last_gps=df_span[df_span['纬度']>0][-1:]
+                    idx=last_gps.index.values[0]
+                    last_time=df_span.loc[idx,'时间戳']        
+                    cross_odo=df_span['累积里程[km]'].values[-1]-df_span.loc[idx,'累积里程[km]']
+                else:
+                    last_time=df_span['时间戳'].values[-1]        
+                    cross_odo=0
+                
+                start_index=end_index
+                start_time=timenow
+                cal_flag=0
+    
+        res=res.append(df_result)
+        input_res=input_res.append(df_input_copy)
+
+    l= len(input_res['时间戳'])
+    for k in range(l):
+        input_res.loc[k,'时间戳'] =input_res.loc[k,'时间戳'].rpartition(':')[0]
+
+    res.index=[i for i in range(len(res))]
+    res['日期']=[res.loc[i,'时间'][0:10] for i in range(len(res))]     
+
+    merge_res=pd.DataFrame()
+    for date_str in date_list:
+        input_res_copy=input_res[input_res['日期']==date_str]
+        input_res_copy=input_res_copy.drop_duplicates('时间戳')
+        res_copy=res[res['日期']==date_str]
+        df_merge=pd.DataFrame()
+        df_merge=pd.merge(input_res_copy,res_copy,left_on='时间戳',right_on='时间',how='right')
+        merge_res=merge_res.append(df_merge)
+
+    merge_res['sn']=sn
+    merge_res=merge_res[['时间戳','sn','总电流[A]','总电压[V]','SOC[%]','充电状态','纬度','经度','能耗[km/kwh]','累积里程[km]']]
+    merge_res=merge_res.rename(columns={'累积里程[km]':'每日累积里程[km]'})
+
+    return merge_res
+
+
+
+
+

+ 42 - 0
LIB/MIDDLE/odo/DailyMileageEstimation/V1_0_4_SimpleVehicle/main_daily_mileage.py

@@ -0,0 +1,42 @@
+from LIB.BACKEND import DBManager
+import cal_mileage
+from LIB.MIDDLE.CellStateEstimation.Common import log
+import datetime
+import pandas as pd
+
+dbManager = DBManager.DBManager()
+
+now_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')   #type: str
+now_time=datetime.datetime.strptime(now_time,'%Y-%m-%d %H:%M:%S')     #type: datetime
+start_time=now_time-datetime.timedelta(days=1)
+end_time=str(now_time)
+start_time=str(start_time)
+
+# dataSOH = pd.read_excel('sn-20210903.xlsx',sheet_name='sn-20210903')
+# fileNames = dataSOH['sn']
+# fileNames = list(fileNames)
+# l = len(fileNames)
+
+#log信息配置
+mylog=log.Mylog('log.txt','error')
+mylog.logcfg()
+
+
+
+try:
+    sn = 'MGMCLN750N215N180'
+    start_time='2021-12-26 00:00:00'
+    end_time='2021-12-26 23:59:59'
+    df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms','gps'])
+    data_bms = df_data['bms']
+    data_gps = df_data['gps']
+
+    #...............每日累积里程............................................................................
+    if len(data_bms['时间戳'])>0 and len(data_gps['时间戳'])>0:
+        df_res = cal_mileage.calcul_mileage(sn,data_bms,data_gps)
+        df_res.to_csv('Mileage_'+sn+'.csv')
+    
+except Exception as e:
+    print(repr(e))
+    mylog.logopt(sn,e)
+    pass