{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import pandas as pd\n", "from LIB.BACKEND import DBManager, Log\n", "from pandasql import sqldf " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "start_time = (datetime.datetime.now()+datetime.timedelta(days=-7)).strftime('%Y-%m-%d %H:%M:%S')\n", "end_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "dbManager = DBManager.DBManager()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "idlist=['TJMCL120502305010','TJMCL120502305022','TJMCL120502305038','TJMCL120502305026']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df_bms=pd.DataFrame()\n", "df_gps=pd.DataFrame()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for sn in idlist:\n", " df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms','gps'])\n", " df_data_bms=df_data['bms'] \n", " df_data_bms[\"sn\"]=sn\n", " df_data_gps=df_data['gps']\n", " df_data_gps[\"sn\"]=sn\n", " df_bms=pd.concat([df_bms,df_data_bms],axis=0,ignore_index=True)\n", " df_gps=pd.concat([df_gps,df_data_gps],axis=0,ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "df_bms.to_csv('onlinedata/BMS.csv',index=False)\n", "df_gps.to_csv('onlinedata/GPS.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "##基于上次保存的数据进行加工,生产可以省略\n", "df_bms=pd.read_csv(\"onlinedata/BMS.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "##确定是按 sn 和 时间戳排序\n", "\n", "df_mbs=df_bms.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])\n", "\n", "##df_gps=df_gps.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "##寻找电压字段,计算每行的最大电压,供后续关联使用\n", "column=list(df_mbs.columns)\n", "cellvolt_list = [s for s in column if '单体电压' in s]\n", "df_v=df_mbs[cellvolt_list]\n", "df_mbs[\"v_max\"]=df_v.max(axis=1)\n", "df_join=df_mbs[[\"sn\",\"时间戳\",\"v_max\"]]\n", "p_col=['sn','data_time','v_max']\n", "df_join.columns=p_col" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "###核实数据\n", "df_join.to_csv('onlinedata/df_join.csv',index=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "##列变量重命名\n", "df_bms_1=df_bms[[\"时间戳\",\"总电流[A]\",\"总电压[V]\",\"充电状态\",\"SOC[%]\",\"sn\"]]\n", "df_bms_1.columns=[\"data_time\",\"aa\",\"vv\",\"charge\",\"soc\",\"sn\"]\n", "df_bms_2=df_bms_1" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "df_bms_2.to_csv('onlinedata/BMS_2.csv',index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##更换充电状态,部分为5的替换为3\n", "df_bms_2[\"charge\"][df_bms_2[\"charge\"]==5]=3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "###按充电状态第一次分组\n", "df_bms_2['block'] = (df_bms_2[\"charge\"].shift(1) != df_bms_2[\"charge\"]).astype(int).cumsum()\n", "##核实数据\n", "df_bms_2.to_csv(\"onlinedata/df_bms_2_block.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "##统计分组的个数\n", "pysqldf = lambda q: sqldf(q, globals())\n", "sql=\"select count(*) as num1,df_bms_2.block from df_bms_2 group by df_bms_2.block order by num1 \"\n", "df_block_num= pysqldf(sql)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "##选取每段数量大于5的分组\n", "df_block_target=df_block_num[ df_block_num[\"num1\"] >5 ] " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "###行驶的数据\n", "sql1=\"select * from df_bms_2 where charge=3 and block in (select block from df_block_target)\"\n", "df_drive=pysqldf(sql1)\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "df_drive.to_csv(\"onlinedata/df_dirve_blcok.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "##再按时间划分\n", "df_drive[\"order_delta\"]= pd.to_datetime(df_drive[\"data_time\"] )-pd.to_datetime( df_drive[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n", "df_drive[\"order_delta\"]=pd.to_timedelta(df_drive[\"order_delta\"])\n", "df_drive[\"order_delta\"] = df_drive[\"order_delta\"].dt.total_seconds()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "##限定时间间隔1200秒切割和合并\n", "df_drive[\"time_flag\"]=df_drive[\"order_delta\"]>1200\n", "df_drive['time_block']=(df_drive[\"time_flag\"].shift(1) != df_drive[\"time_flag\"]).astype(int).cumsum()\n", "###核实数据\n", "df_drive.to_csv(\"onlinedata/df_drive.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "##数量大于5个的分组\n", "sql2=\"select count(*) as num1,time_block from df_drive group by time_block order by num1\"\n", "df_drive_block_num= pysqldf(sql2)\n", "df_drive_time_block_target=df_drive_block_num[ df_drive_block_num[\"num1\"] >5 ] ####限定每一段的点数\n", "sql3=\"select * from df_drive where time_block in (select time_block from df_drive_time_block_target)\"\n", "df_drive_block=pysqldf(sql3)\n", "df_drive_block.to_csv(\"onlinedata/df_drive.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "##汇总驾驶数据(包含长短驾驶)\n", "sql4=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block group by sn, time_block\"\n", "df_drive_static=pysqldf(sql4)\n", "df_drive_static.to_csv(\"onlinedata/df_drive_static.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "##选取长驾驶\n", "df_drive_time_block_target_l=df_drive_block_num[ df_drive_block_num[\"num1\"] >120 ] ####限定每一段的点数\n", "sql31=\"select * from df_drive where time_block in (select time_block from df_drive_time_block_target_l)\"\n", "df_drive_block_l=pysqldf(sql31)\n", "##核实数据\n", "df_drive_block_l.to_csv(\"onlinedata/df_drive_l.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "###统计长驾驶\n", "sql41=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block_l group by sn, time_block\"\n", "df_drive_static_l=pysqldf(sql41)\n", "##核实数据\n", "df_drive_static_l.to_csv(\"onlinedata/df_drive_static_l.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "###区分充电和静置\n", "sql5=\"select * from df_bms_2 where charge!=3 and block in (select block from df_block_target)\"\n", "df_no_drive=pysqldf(sql5)\n", "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "### 填充电流的空值\n", "df_no_drive[\"aa\"]=df_no_drive[\"aa\"].fillna(method = 'ffill',axis = 0)\n", "df_no_drive[\"aa\"]=pd.to_numeric(df_no_drive[\"aa\"], errors='ignore')\n", "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##判断充电\n", "df_charge=df_no_drive[df_no_drive[\"aa\"].abs()>0.5]\n", "df_charge[\"order_delta\"]= pd.to_datetime(df_charge[\"data_time\"] )-pd.to_datetime( df_charge[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n", "df_charge[\"order_delta\"]=pd.to_timedelta(df_charge[\"order_delta\"])\n", "df_charge[\"order_delta\"] = df_charge[\"order_delta\"].dt.total_seconds()\n", "df_charge[\"order_delta\"].describe()\n", "df_charge[\"time_flag\"]=df_charge[\"order_delta\"]>1200 ####限定时间间隔\n", "df_charge['time_block']=(df_charge[\"time_flag\"].shift(1) != df_charge[\"time_flag\"]).astype(int).cumsum()\n", "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)\n", "\n" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "##排除单点充电\n", "sql6=\"select count(*) as num1,time_block from df_charge group by time_block order by num1\"\n", "df_charge_block_num= pysqldf(sql6)\n", "df_charge_block_target=df_charge_block_num[ df_charge_block_num[\"num1\"] >5 ] ####限定每一段的点数\n", "sql7=\"select * from df_charge where time_block in (select time_block from df_charge_block_target)\"\n", "df_charge=pysqldf(sql7)\n", "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##判断静置\n", "df_stand=df_no_drive[df_no_drive[\"aa\"].abs()<=0.5]\n", "df_stand[\"order_delta\"]= pd.to_datetime(df_stand[\"data_time\"] )-pd.to_datetime( df_stand[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n", "df_stand[\"order_delta\"]=pd.to_timedelta(df_stand[\"order_delta\"])\n", "df_stand[\"order_delta\"] = df_stand[\"order_delta\"].dt.total_seconds()\n", "df_stand[\"order_delta\"].describe()\n", "df_stand[\"time_flag\"]=df_stand[\"order_delta\"]>1200 ####限定时间间隔\n", "df_stand['time_block']=(df_stand[\"time_flag\"].shift(1) != df_stand[\"time_flag\"]).astype(int).cumsum()\n", "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)\n" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "##排除单点静置\n", "sql8=\"select count(*) as num1,time_block from df_stand group by time_block order by num1\"\n", "df_stand_block_num= pysqldf(sql8)\n", "df_stand_block_target=df_stand_block_num[ df_stand_block_num[\"num1\"] >5 ] ####限定每一段的点数\n", "sql9=\"select * from df_stand where time_block in (select time_block from df_stand_block_target)\"\n", "df_stand=pysqldf(sql9)\n", "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 200, "metadata": {}, "outputs": [], "source": [ "##充电数据统计\n", "sql10=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_charge group by sn,time_block\"\n", "df_charge_static=pysqldf(sql10)\n", "df_charge_static.to_csv(\"onlinedata/df_charge_static.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 201, "metadata": {}, "outputs": [], "source": [ "new_charge_clumns=[\"charge_time_b\",\"charge_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n", "df_charge_static.columns=new_charge_clumns" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "##静置时间统计\n", "sql11=\"select min(data_time),max(data_time),min(soc),max(soc),block,sn from df_stand group by sn, block\"\n", "df_stand_static=pysqldf(sql11)\n", "df_stand_static.to_csv(\"onlinedata/df_stand_static.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "##进行指标加工,长驾驶等,没有计算短驾驶的指标" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "p1_col=[\"data_time_b\",\"data_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n", "df_drive_static_l.columns=p1_col" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "##数据核实\n", "df_drive_static_l.to_csv(\"onlinedata/df_stand_static.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "##关联最大电压\n", "sql11=\"select a.*, b.v_max as v_max_b ,c.v_max as v_max_e from df_drive_static_l a left join df_join b on a.data_time_b=b.data_time and a.sn=b.sn \\\n", "left join df_join c on a.data_time_e=c.data_time and a.sn=c.sn \"\n", "drive_statics_l_add=pysqldf(sql11)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "###计算时间差\n", "drive_statics_l_add[\"time_diff\"]= pd.to_datetime(drive_statics_l_add[\"data_time_e\"] )-pd.to_datetime(drive_statics_l_add[\"data_time_b\"])\n", "drive_statics_l_add[\"time_diff\"]=(drive_statics_l_add[\"time_diff\"].dt.total_seconds()/3600).round(2)\n", "sql12=\"select substring(data_time_b,1,10) from drive_statics_l_add \"\n", "drive_statics_l_add[\"day\"]=pysqldf(sql12)\n", "drive_statics_l_add[\"hour_b\"]=pd.to_datetime(drive_statics_l_add[\"data_time_b\"]).dt.hour\n", "drive_statics_l_add[\"hour_e\"]=pd.to_datetime(drive_statics_l_add[\"data_time_e\"]).dt.hour\n", "\n", "def Timephased(x):\n", " if x>=0 and x<=4 :\n", " return \"D-0-4\"\n", " elif x>4 and x<=8:\n", " return \"D-4-8\"\n", " elif x>8 and x<=12:\n", " return \"D-8-12\"\n", " elif x>12 and x<=16:\n", " return \"D-12-16\"\n", " elif x>16 and x<=20:\n", " return \"D-16-20\"\n", " elif x>20 and x<=24:\n", " return \"D-20-24\"\n", "\n", "drive_statics_l_add[\"time_phase_b\"]=drive_statics_l_add[\"hour_b\"].apply(lambda x: Timephased(x))\n", "drive_statics_l_add[\"time_phase_e\"]=drive_statics_l_add[\"hour_e\"].apply(lambda x: Timephased(x))" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "##计算 SOC 差值\n", "drive_statics_l_add[\"soc_diff\"]=drive_statics_l_add[\"soc_max\"]-drive_statics_l_add[\"soc_min\"]" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "####计算 错行 SOC 差值 \n", "####第二个行程最大SOC 减去本行程最小的SOC值,来判断本行程结束是否换电\n", "drive_statics_l_add[\"soc_diff_u\"] = (drive_statics_l_add[\"soc_max\"].shift(-1).fillna(method = 'ffill',axis = 0) -drive_statics_l_add[\"soc_min\"]).abs()/drive_statics_l_add[\"soc_min\"]\n", "def Changestatus(x):\n", " if x>0.2:\n", " return 1\n", " else:\n", " return 0\n", " \n", "drive_statics_l_add[\"end_change\"]=drive_statics_l_add[\"soc_diff_u\"].apply(lambda x: Changestatus(x)).astype(\"int\")\n", "drive_statics_l_add[\"begin_change\"] = drive_statics_l_add[\"end_change\"].shift(1).fillna(method = 'bfill',axis = 0).astype(\"int\") \n", "drive_statics_l_add[\"drive_block\"]=drive_statics_l_add[\"begin_change\"].cumsum()\n", "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False) " ] }, { "cell_type": "code", "execution_count": 194, "metadata": {}, "outputs": [], "source": [ "###关联充电表\n", "##提取行驶后充电的数据\n", "data_drive_charge=drive_statics_l_add[drive_statics_l_add[\"end_change\"]==1]" ] }, { "cell_type": "code", "execution_count": 195, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "data_drive_charge.to_csv(\"onlinedata/data_drive_charge.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 198, "metadata": {}, "outputs": [], "source": [ "##只统计等待小于三个小时的数据,不然关联不到\n", "sql13=\"select a.* ,b.charge_time_b from data_drive_charge a left join df_charge_static b on a.sn=b.sn and \\\n", "JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)<0.125 and JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)>0 \"" ] }, { "cell_type": "code", "execution_count": 202, "metadata": {}, "outputs": [], "source": [ "data_drive_charge_add=pysqldf(sql13)\n", "##核实数据\n" ] }, { "cell_type": "code", "execution_count": 203, "metadata": {}, "outputs": [], "source": [ "###计算等待时间\n", "sql14=\"select *,(JULIANDAY(charge_time_b)-JULIANDAY(data_time_e))*24*60 wait_time from data_drive_charge_add \"\n", "data_drive_charge_add=pysqldf(sql14)" ] }, { "cell_type": "code", "execution_count": 204, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "data_drive_charge_add.to_csv(\"onlinedata/data_drive_charge_add.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 205, "metadata": {}, "outputs": [], "source": [ "##换电总次数\n", "sqlc1=\"select count(*) from data_drive_charge_add \"\n", "count_change=pysqldf(sqlc1)" ] }, { "cell_type": "code", "execution_count": 206, "metadata": {}, "outputs": [], "source": [ "##等待超过三小时的次数\n", "sqlc2=\"select count(*) from data_drive_charge_add where wait_time is null\"\n", "count_wait_long=pysqldf(sqlc2)" ] }, { "cell_type": "code", "execution_count": 210, "metadata": {}, "outputs": [], "source": [ "##三小时以下的时间分布情况,单位为分钟\n", "df_charge_wait_time=data_drive_charge_add[\"wait_time\"].agg(['sum','max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 211, "metadata": {}, "outputs": [], "source": [ "##75位数\n", "df_charge_wait_time75=data_drive_charge_add[\"wait_time\"].quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 215, "metadata": {}, "outputs": [], "source": [ "##关联经纬度\n", "##加载数据生产上不需要\n", "df_gps=pd.read_csv(\"onlinedata/GPS.csv\")" ] }, { "cell_type": "code", "execution_count": 223, "metadata": {}, "outputs": [], "source": [ "##筛选列变量\n", "df_gps_2=df_gps[[\"时间戳\",\"纬度\",\"经度\",\"sn\"]]" ] }, { "cell_type": "code", "execution_count": 227, "metadata": {}, "outputs": [], "source": [ "##重命名\n", "gps_columns=[\"data_time\",\"y\",\"x\",\"sn\"]\n", "df_gps_2.columns=gps_columns" ] }, { "cell_type": "code", "execution_count": 228, "metadata": {}, "outputs": [], "source": [ "sqlc3=\"select distinct(substring(data_time_b,1,10)) data_day from data_drive_charge_add union \\\n", "select distinct(substring(data_time_e,1,10)) data_day from data_drive_charge_add \"" ] }, { "cell_type": "code", "execution_count": 229, "metadata": {}, "outputs": [], "source": [ "data_time=pysqldf(sqlc3)" ] }, { "cell_type": "code", "execution_count": 231, "metadata": {}, "outputs": [], "source": [ "##gps表太大关联慢,先处理一下\n", "sqlc4=\"select * from df_gps_2 where substring(data_time,1,10) in (select data_day from data_time ) \"" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [], "source": [ "df_gps_core=pysqldf(sqlc4)" ] }, { "cell_type": "code", "execution_count": 248, "metadata": {}, "outputs": [], "source": [ "##筛选开始未换电,行程后换电的情况\n", "sqlc5=\"select * from data_drive_charge_add where begin_change=0\"\n", "data_drive_charge_add_e=pysqldf(sqlc5)" ] }, { "cell_type": "code", "execution_count": 259, "metadata": {}, "outputs": [], "source": [ "##关联经纬度\n", "sqlc6=\"select a.*,b.y y_b,b.x x_b,c.y y_e,c.x x_e from data_drive_charge_add_e a \\\n", "left join df_gps_core b on a.sn=b.sn and abs((JULIANDAY(a.data_time_b)-JULIANDAY(b.data_time))*24*60*60)<5 \\\n", "left join df_gps_core c on a.sn=c.sn and abs((JULIANDAY(a.data_time_e)-JULIANDAY(c.data_time))*24*60*60)<5\"" ] }, { "cell_type": "code", "execution_count": 260, "metadata": {}, "outputs": [], "source": [ "drive_charge_gps=pysqldf(sqlc6)" ] }, { "cell_type": "code", "execution_count": 261, "metadata": {}, "outputs": [], "source": [ "##核查数据\n", "drive_charge_gps.to_csv(\"onlinedata/drive_charge_gps.csv\",index=False)" ] }, { "cell_type": "code", "execution_count": 262, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12.717310483106838\n" ] } ], "source": [ "##距离试算\n", "from haversine import haversine\n", "a=(34.500821,114.990448)\n", "b=(34.41177,115.077483)\n", "dis=haversine(a,b)\n", "print(dis)" ] }, { "cell_type": "code", "execution_count": 263, "metadata": {}, "outputs": [], "source": [ "sqlc7=\"select * from drive_charge_gps where y_b>0 and x_b>0 and x_b>0 and x_e>0\"\n", "drive_charge_gps_core=pysqldf(sqlc7)" ] }, { "cell_type": "code", "execution_count": 270, "metadata": {}, "outputs": [], "source": [ "##统计换电行驶的距离\n", "drive_charge_gps_core[\"dist\"]=drive_charge_gps_core.apply(lambda row :haversine((row[\"y_b\"],row[\"x_b\"]),(row[\"y_e\"],row[\"x_e\"])),axis=1)" ] }, { "cell_type": "code", "execution_count": 271, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 12.717310\n", "1 0.027449\n", "2 3.498388\n", "3 12.693435\n", "Name: dist, dtype: float64" ] }, "execution_count": 271, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drive_charge_gps_core[\"dist\"]" ] }, { "cell_type": "code", "execution_count": 272, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "drive_charge_gps_core.to_csv(\"onlinedata/drive_charge_gps_core.csv\")" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "##计算统计指标,行驶相关指标,以drive_statics_l_add为主表\n", "##本周支撑行驶的次数\n", "drive_count_total=drive_statics_l_add[[\"sn\",\"drive_block\"]].groupby(\"sn\").agg('count')" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [], "source": [ "drive_count_total.columns=[\"count_total\"]" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "##计算每天的驾驶次数\n", "drive_count_total[\"count_day\"]=round(drive_count_total[\"count_total\"]/7,2)" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "##单次行驶的时间\n", "drive_time_single=drive_statics_l_add[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "drive_time_single_75=drive_statics_l_add.groupby(\"sn\").time_diff.quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "##单次行驶SOC消耗情况\n", "drive_soc_single=drive_statics_l_add[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "drive_soc_single_75=drive_statics_l_add.groupby(\"sn\").soc_diff.quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "##行驶后换电次数统计\n", "end_charge_count=data_drive_charge.groupby(\"sn\").sn.count()" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [], "source": [ "##每天换电次数情况\n", "change_day_avg_num=round(end_charge_count/7,2)" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "frames=[drive_count_total,drive_time_single,drive_time_single_75,drive_soc_single,drive_soc_single_75,end_charge_count,change_day_avg_num]" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [], "source": [ "##进行表的关联\n", "drive_describe_single = pd.concat(frames, axis=1, join='inner')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "drive_describe_single" ] }, { "cell_type": "code", "execution_count": 112, "metadata": {}, "outputs": [], "source": [ "drive_describe_single_clumns=['total_driving_num','day_driving_avg_num','time_diff_max','time_diff_min','time_diff_median'\n", " ,'time_diff_mean','time_diff_per75','soc_diff_max','soc_diff_min','soc_diff_median'\n", " ,'soc_diff_mean','soc_diff_per75','change_num','change_day_avg_num']" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [], "source": [ "drive_describe_single.columns=drive_describe_single_clumns" ] }, { "cell_type": "code", "execution_count": 132, "metadata": {}, "outputs": [], "source": [ "##数据核实\n", "drive_describe_single.to_csv(\"onlinedata/drive_describe_single.csv\")" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [], "source": [ "##单个周期情况统计\n", "drive_statics_period=drive_statics_l_add.groupby([\"sn\",\"drive_block\"]).agg({'data_time_b':'min',\n", " 'data_time_e':'max',\n", " 'soc_min':'min',\n", " 'soc_max':'max',\n", " 'v_max_b':'max',\n", " 'v_max_e':'min',\n", " 'time_diff':'sum',\n", " 'day':'min', \n", " 'hour_b':'min',\n", " 'hour_e':'max', \n", " 'time_phase_b':'min',\n", " 'time_phase_e':'max',\n", " 'soc_diff':'sum'})" ] }, { "cell_type": "code", "execution_count": 115, "metadata": {}, "outputs": [], "source": [ "drive_statics_period=drive_statics_period.reset_index()" ] }, { "cell_type": "code", "execution_count": 116, "metadata": {}, "outputs": [], "source": [ "##单个周期行驶的时间\n", "drive_time_period=drive_statics_period[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 117, "metadata": {}, "outputs": [], "source": [ "##基于充电周期的行驶时间75位数\n", "drive_time_period_75=drive_statics_period.groupby(\"sn\").time_diff.quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [], "source": [ "##单个周期SOC消耗情况\n", "drive_soc_period=drive_statics_period[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 119, "metadata": {}, "outputs": [], "source": [ "##基于充电周期的SOC消耗75位数\n", "drive_soc_period_75=drive_statics_period.groupby(\"sn\").soc_diff.quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 120, "metadata": {}, "outputs": [], "source": [ "##充电周期个数\n", "drive_period_count=drive_statics_period.groupby(\"sn\").sn.count()" ] }, { "cell_type": "code", "execution_count": 122, "metadata": {}, "outputs": [], "source": [ "##每天的充电周期个数\n", "drive_period_count_day=round(drive_period_count/7,2)" ] }, { "cell_type": "code", "execution_count": 123, "metadata": {}, "outputs": [], "source": [ "frames=[drive_time_period,drive_time_period_75,drive_soc_period,drive_soc_period_75,drive_period_count,drive_period_count_day]" ] }, { "cell_type": "code", "execution_count": 124, "metadata": {}, "outputs": [], "source": [ "##进行表的关联\n", "drive_describe_period = pd.concat(frames, axis=1, join='inner')" ] }, { "cell_type": "code", "execution_count": 127, "metadata": {}, "outputs": [], "source": [ "drive_describe_single_columns=['period_time_diff_max','period_time_diff_min','period_diff_median'\n", " ,'period_time_diff_mean','period_time_diff_per75','period_soc_diff_max','prriod_soc_diff_min'\n", " ,'period_soc_diff_median','period_soc_diff_mean','period_soc_diff_per75'\n", " ,'period_count','period_count_day']" ] }, { "cell_type": "code", "execution_count": 128, "metadata": {}, "outputs": [], "source": [ "drive_describe_period.columns=drive_describe_single_columns" ] }, { "cell_type": "code", "execution_count": 130, "metadata": {}, "outputs": [], "source": [ "##核实数据\n", "drive_describe_period.to_csv(\"onlinedata/drive_describe_period.csv\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "##换电站总体指标以drive_statics_l_add为主表" ] }, { "cell_type": "code", "execution_count": 145, "metadata": {}, "outputs": [], "source": [ "##活跃电池数量\n", "sql15=\"select count(distinct sn) from drive_statics_l_add \"\n", "count_active=pysqldf(sql15)" ] }, { "cell_type": "code", "execution_count": 150, "metadata": {}, "outputs": [], "source": [ "##换电次数\n", "sql16=\"select count(end_change) from drive_statics_l_add where end_change=1 \"\n", "count_change=pysqldf(sql16)" ] }, { "cell_type": "code", "execution_count": 148, "metadata": {}, "outputs": [], "source": [ "##换电次数/每天\n", "count_charge_day=round(count_change/7,2)" ] }, { "cell_type": "code", "execution_count": 151, "metadata": {}, "outputs": [], "source": [ "##行驶后换电时间点\n", "sql17=\"select count(*) ,time_phase_e from drive_statics_l_add where end_change=1 group by time_phase_e \"\n", "change_time_slot=pysqldf(sql17)" ] }, { "cell_type": "code", "execution_count": 153, "metadata": {}, "outputs": [], "source": [ "##换电后出发的时间段分布\n", "sql18=\"select count(*) ,time_phase_b from drive_statics_l_add where begin_change=1 group by time_phase_b \"\n", "start_time_slot=pysqldf(sql18)" ] }, { "cell_type": "code", "execution_count": 155, "metadata": {}, "outputs": [], "source": [ "##充电情况分析主表df_charge_static\n", "charge_columns=['data_time_b','data_time_e','soc_min','soc_max','time_block','sn']\n", "df_charge_static.columns=charge_columns" ] }, { "cell_type": "code", "execution_count": 156, "metadata": {}, "outputs": [], "source": [ "##计算时间差\n", "df_charge_static[\"time_diff\"]= pd.to_datetime(df_charge_static[\"data_time_e\"] )-pd.to_datetime(df_charge_static[\"data_time_b\"])\n", "df_charge_static[\"time_diff\"]=(df_charge_static[\"time_diff\"].dt.total_seconds()/3600).round(2)\n", "sql12=\"select substring(data_time_b,1,10) from df_charge_static \"\n", "df_charge_static[\"day\"]=pysqldf(sql12)\n", "df_charge_static[\"hour_b\"]=pd.to_datetime(df_charge_static[\"data_time_b\"]).dt.hour\n", "df_charge_static[\"hour_e\"]=pd.to_datetime(df_charge_static[\"data_time_e\"]).dt.hour\n", "\n", "def Timephased(x):\n", " if x>=0 and x<=4 :\n", " return \"D-0-4\"\n", " elif x>4 and x<=8:\n", " return \"D-4-8\"\n", " elif x>8 and x<=12:\n", " return \"D-8-12\"\n", " elif x>12 and x<=16:\n", " return \"D-12-16\"\n", " elif x>16 and x<=20:\n", " return \"D-16-20\"\n", " elif x>20 and x<=24:\n", " return \"D-20-24\"\n", "\n", "df_charge_static[\"time_phase_b\"]=df_charge_static[\"hour_b\"].apply(lambda x: Timephased(x))\n", "df_charge_static[\"time_phase_e\"]=df_charge_static[\"hour_e\"].apply(lambda x: Timephased(x))" ] }, { "cell_type": "code", "execution_count": 157, "metadata": {}, "outputs": [], "source": [ "##计算 SOC 差值\n", "df_charge_static[\"soc_diff\"]=df_charge_static[\"soc_max\"]-df_charge_static[\"soc_min\"]" ] }, { "cell_type": "code", "execution_count": 160, "metadata": {}, "outputs": [], "source": [ "##选取充电时间大于12分钟的时间段\n", "df_charge_static=df_charge_static[df_charge_static[\"time_diff\"]>0.2]" ] }, { "cell_type": "code", "execution_count": 167, "metadata": {}, "outputs": [], "source": [ "##充电时间统计\n", "df_charge_static_time=df_charge_static[\"time_diff\"].agg(['sum','max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 177, "metadata": {}, "outputs": [], "source": [ "df_charge_static_time_75=df_charge_static[\"time_diff\"].quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 168, "metadata": {}, "outputs": [], "source": [ "##充电SOC变化统计\n", "df_charge_static_soc=df_charge_static[\"soc_diff\"].agg(['sum','max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 178, "metadata": {}, "outputs": [], "source": [ "df_charge_static_soc_75=df_charge_static[\"soc_diff\"].quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 179, "metadata": {}, "outputs": [], "source": [ "##充电时间段的情况\n", "df_charge_static_phase=df_charge_static[[\"time_phase_b\",\"time_block\"]].groupby(\"time_phase_b\").agg(['count'])" ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [], "source": [ "##充电结束SOC分布情况\n", "df_charge_static_soc_e=df_charge_static[\"soc_max\"].agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "df_charge_static_soc_e75=df_charge_static[\"soc_max\"].quantile(0.75)" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "##充电前SOC分布情况\n", "df_charge_static_soc_b=df_charge_static[\"soc_min\"].agg(['max','min','median','mean'])" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [], "source": [ "df_charge_static_soc_b75=df_charge_static[\"soc_min\"].quantile(0.75)" ] } ], "metadata": { "kernelspec": { "display_name": "py38", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.15 (default, Nov 24 2022, 14:38:14) [MSC v.1916 64 bit (AMD64)]" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "4d6c15edccb966d2bb52e4527dc0611a26206d084fadc4df5e610d719652857f" } } }, "nbformat": 4, "nbformat_minor": 2 }