|
@@ -0,0 +1,1350 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import datetime\n",
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "from LIB.BACKEND import DBManager, Log\n",
|
|
|
+ "from pandasql import sqldf "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "start_time = (datetime.datetime.now()+datetime.timedelta(days=-7)).strftime('%Y-%m-%d %H:%M:%S')\n",
|
|
|
+ "end_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 13,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "dbManager = DBManager.DBManager()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "idlist=['TJMCL120502305010','TJMCL120502305022','TJMCL120502305038','TJMCL120502305026']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_bms=pd.DataFrame()\n",
|
|
|
+ "df_gps=pd.DataFrame()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "for sn in idlist:\n",
|
|
|
+ " df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms','gps'])\n",
|
|
|
+ " df_data_bms=df_data['bms'] \n",
|
|
|
+ " df_data_bms[\"sn\"]=sn\n",
|
|
|
+ " df_data_gps=df_data['gps']\n",
|
|
|
+ " df_data_gps[\"sn\"]=sn\n",
|
|
|
+ " df_bms=pd.concat([df_bms,df_data_bms],axis=0,ignore_index=True)\n",
|
|
|
+ " df_gps=pd.concat([df_gps,df_data_gps],axis=0,ignore_index=True)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 17,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_bms.to_csv('onlinedata/BMS.csv',index=False)\n",
|
|
|
+ "df_gps.to_csv('onlinedata/GPS.csv',index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##基于上次保存的数据进行加工,生产可以省略\n",
|
|
|
+ "df_bms=pd.read_csv(\"onlinedata/BMS.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##确定是按 sn 和 时间戳排序\n",
|
|
|
+ "\n",
|
|
|
+ "df_mbs=df_bms.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])\n",
|
|
|
+ "\n",
|
|
|
+ "##df_gps=df_gps.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##寻找电压字段,计算每行的最大电压,供后续关联使用\n",
|
|
|
+ "column=list(df_mbs.columns)\n",
|
|
|
+ "cellvolt_list = [s for s in column if '单体电压' in s]\n",
|
|
|
+ "df_v=df_mbs[cellvolt_list]\n",
|
|
|
+ "df_mbs[\"v_max\"]=df_v.max(axis=1)\n",
|
|
|
+ "df_join=df_mbs[[\"sn\",\"时间戳\",\"v_max\"]]\n",
|
|
|
+ "p_col=['sn','data_time','v_max']\n",
|
|
|
+ "df_join.columns=p_col"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 6,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###核实数据\n",
|
|
|
+ "df_join.to_csv('onlinedata/df_join.csv',index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##列变量重命名\n",
|
|
|
+ "df_bms_1=df_bms[[\"时间戳\",\"总电流[A]\",\"总电压[V]\",\"充电状态\",\"SOC[%]\",\"sn\"]]\n",
|
|
|
+ "df_bms_1.columns=[\"data_time\",\"aa\",\"vv\",\"charge\",\"soc\",\"sn\"]\n",
|
|
|
+ "df_bms_2=df_bms_1"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "df_bms_2.to_csv('onlinedata/BMS_2.csv',index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##更换充电状态,部分为5的替换为3\n",
|
|
|
+ "df_bms_2[\"charge\"][df_bms_2[\"charge\"]==5]=3"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###按充电状态第一次分组\n",
|
|
|
+ "df_bms_2['block'] = (df_bms_2[\"charge\"].shift(1) != df_bms_2[\"charge\"]).astype(int).cumsum()\n",
|
|
|
+ "##核实数据\n",
|
|
|
+ "df_bms_2.to_csv(\"onlinedata/df_bms_2_block.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 11,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##统计分组的个数\n",
|
|
|
+ "pysqldf = lambda q: sqldf(q, globals())\n",
|
|
|
+ "sql=\"select count(*) as num1,df_bms_2.block from df_bms_2 group by df_bms_2.block order by num1 \"\n",
|
|
|
+ "df_block_num= pysqldf(sql)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##选取每段数量大于5的分组\n",
|
|
|
+ "df_block_target=df_block_num[ df_block_num[\"num1\"] >5 ] "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 13,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###行驶的数据\n",
|
|
|
+ "sql1=\"select * from df_bms_2 where charge=3 and block in (select block from df_block_target)\"\n",
|
|
|
+ "df_drive=pysqldf(sql1)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 14,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "df_drive.to_csv(\"onlinedata/df_dirve_blcok.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 15,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##再按时间划分\n",
|
|
|
+ "df_drive[\"order_delta\"]= pd.to_datetime(df_drive[\"data_time\"] )-pd.to_datetime( df_drive[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
|
|
|
+ "df_drive[\"order_delta\"]=pd.to_timedelta(df_drive[\"order_delta\"])\n",
|
|
|
+ "df_drive[\"order_delta\"] = df_drive[\"order_delta\"].dt.total_seconds()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##限定时间间隔1200秒切割和合并\n",
|
|
|
+ "df_drive[\"time_flag\"]=df_drive[\"order_delta\"]>1200\n",
|
|
|
+ "df_drive['time_block']=(df_drive[\"time_flag\"].shift(1) != df_drive[\"time_flag\"]).astype(int).cumsum()\n",
|
|
|
+ "###核实数据\n",
|
|
|
+ "df_drive.to_csv(\"onlinedata/df_drive.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 17,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##数量大于5个的分组\n",
|
|
|
+ "sql2=\"select count(*) as num1,time_block from df_drive group by time_block order by num1\"\n",
|
|
|
+ "df_drive_block_num= pysqldf(sql2)\n",
|
|
|
+ "df_drive_time_block_target=df_drive_block_num[ df_drive_block_num[\"num1\"] >5 ] ####限定每一段的点数\n",
|
|
|
+ "sql3=\"select * from df_drive where time_block in (select time_block from df_drive_time_block_target)\"\n",
|
|
|
+ "df_drive_block=pysqldf(sql3)\n",
|
|
|
+ "df_drive_block.to_csv(\"onlinedata/df_drive.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 18,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##汇总驾驶数据(包含长短驾驶)\n",
|
|
|
+ "sql4=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block group by sn, time_block\"\n",
|
|
|
+ "df_drive_static=pysqldf(sql4)\n",
|
|
|
+ "df_drive_static.to_csv(\"onlinedata/df_drive_static.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 19,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##选取长驾驶\n",
|
|
|
+ "df_drive_time_block_target_l=df_drive_block_num[ df_drive_block_num[\"num1\"] >120 ] ####限定每一段的点数\n",
|
|
|
+ "sql31=\"select * from df_drive where time_block in (select time_block from df_drive_time_block_target_l)\"\n",
|
|
|
+ "df_drive_block_l=pysqldf(sql31)\n",
|
|
|
+ "##核实数据\n",
|
|
|
+ "df_drive_block_l.to_csv(\"onlinedata/df_drive_l.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 20,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###统计长驾驶\n",
|
|
|
+ "sql41=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block_l group by sn, time_block\"\n",
|
|
|
+ "df_drive_static_l=pysqldf(sql41)\n",
|
|
|
+ "##核实数据\n",
|
|
|
+ "df_drive_static_l.to_csv(\"onlinedata/df_drive_static_l.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 21,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###区分充电和静置\n",
|
|
|
+ "sql5=\"select * from df_bms_2 where charge!=3 and block in (select block from df_block_target)\"\n",
|
|
|
+ "df_no_drive=pysqldf(sql5)\n",
|
|
|
+ "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 22,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "### 填充电流的空值\n",
|
|
|
+ "df_no_drive[\"aa\"]=df_no_drive[\"aa\"].fillna(method = 'ffill',axis = 0)\n",
|
|
|
+ "df_no_drive[\"aa\"]=pd.to_numeric(df_no_drive[\"aa\"], errors='ignore')\n",
|
|
|
+ "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##判断充电\n",
|
|
|
+ "df_charge=df_no_drive[df_no_drive[\"aa\"].abs()>0.5]\n",
|
|
|
+ "df_charge[\"order_delta\"]= pd.to_datetime(df_charge[\"data_time\"] )-pd.to_datetime( df_charge[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
|
|
|
+ "df_charge[\"order_delta\"]=pd.to_timedelta(df_charge[\"order_delta\"])\n",
|
|
|
+ "df_charge[\"order_delta\"] = df_charge[\"order_delta\"].dt.total_seconds()\n",
|
|
|
+ "df_charge[\"order_delta\"].describe()\n",
|
|
|
+ "df_charge[\"time_flag\"]=df_charge[\"order_delta\"]>1200 ####限定时间间隔\n",
|
|
|
+ "df_charge['time_block']=(df_charge[\"time_flag\"].shift(1) != df_charge[\"time_flag\"]).astype(int).cumsum()\n",
|
|
|
+ "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)\n",
|
|
|
+ "\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 25,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##排除单点充电\n",
|
|
|
+ "sql6=\"select count(*) as num1,time_block from df_charge group by time_block order by num1\"\n",
|
|
|
+ "df_charge_block_num= pysqldf(sql6)\n",
|
|
|
+ "df_charge_block_target=df_charge_block_num[ df_charge_block_num[\"num1\"] >5 ] ####限定每一段的点数\n",
|
|
|
+ "sql7=\"select * from df_charge where time_block in (select time_block from df_charge_block_target)\"\n",
|
|
|
+ "df_charge=pysqldf(sql7)\n",
|
|
|
+ "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##判断静置\n",
|
|
|
+ "df_stand=df_no_drive[df_no_drive[\"aa\"].abs()<=0.5]\n",
|
|
|
+ "df_stand[\"order_delta\"]= pd.to_datetime(df_stand[\"data_time\"] )-pd.to_datetime( df_stand[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
|
|
|
+ "df_stand[\"order_delta\"]=pd.to_timedelta(df_stand[\"order_delta\"])\n",
|
|
|
+ "df_stand[\"order_delta\"] = df_stand[\"order_delta\"].dt.total_seconds()\n",
|
|
|
+ "df_stand[\"order_delta\"].describe()\n",
|
|
|
+ "df_stand[\"time_flag\"]=df_stand[\"order_delta\"]>1200 ####限定时间间隔\n",
|
|
|
+ "df_stand['time_block']=(df_stand[\"time_flag\"].shift(1) != df_stand[\"time_flag\"]).astype(int).cumsum()\n",
|
|
|
+ "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##排除单点静置\n",
|
|
|
+ "sql8=\"select count(*) as num1,time_block from df_stand group by time_block order by num1\"\n",
|
|
|
+ "df_stand_block_num= pysqldf(sql8)\n",
|
|
|
+ "df_stand_block_target=df_stand_block_num[ df_stand_block_num[\"num1\"] >5 ] ####限定每一段的点数\n",
|
|
|
+ "sql9=\"select * from df_stand where time_block in (select time_block from df_stand_block_target)\"\n",
|
|
|
+ "df_stand=pysqldf(sql9)\n",
|
|
|
+ "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 200,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电数据统计\n",
|
|
|
+ "sql10=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_charge group by sn,time_block\"\n",
|
|
|
+ "df_charge_static=pysqldf(sql10)\n",
|
|
|
+ "df_charge_static.to_csv(\"onlinedata/df_charge_static.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 201,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "new_charge_clumns=[\"charge_time_b\",\"charge_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n",
|
|
|
+ "df_charge_static.columns=new_charge_clumns"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 30,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##静置时间统计\n",
|
|
|
+ "sql11=\"select min(data_time),max(data_time),min(soc),max(soc),block,sn from df_stand group by sn, block\"\n",
|
|
|
+ "df_stand_static=pysqldf(sql11)\n",
|
|
|
+ "df_stand_static.to_csv(\"onlinedata/df_stand_static.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 32,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##进行指标加工,长驾驶等,没有计算短驾驶的指标"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 33,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "p1_col=[\"data_time_b\",\"data_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n",
|
|
|
+ "df_drive_static_l.columns=p1_col"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 34,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##数据核实\n",
|
|
|
+ "df_drive_static_l.to_csv(\"onlinedata/df_stand_static.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 35,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##关联最大电压\n",
|
|
|
+ "sql11=\"select a.*, b.v_max as v_max_b ,c.v_max as v_max_e from df_drive_static_l a left join df_join b on a.data_time_b=b.data_time and a.sn=b.sn \\\n",
|
|
|
+ "left join df_join c on a.data_time_e=c.data_time and a.sn=c.sn \"\n",
|
|
|
+ "drive_statics_l_add=pysqldf(sql11)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 36,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 37,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###计算时间差\n",
|
|
|
+ "drive_statics_l_add[\"time_diff\"]= pd.to_datetime(drive_statics_l_add[\"data_time_e\"] )-pd.to_datetime(drive_statics_l_add[\"data_time_b\"])\n",
|
|
|
+ "drive_statics_l_add[\"time_diff\"]=(drive_statics_l_add[\"time_diff\"].dt.total_seconds()/3600).round(2)\n",
|
|
|
+ "sql12=\"select substring(data_time_b,1,10) from drive_statics_l_add \"\n",
|
|
|
+ "drive_statics_l_add[\"day\"]=pysqldf(sql12)\n",
|
|
|
+ "drive_statics_l_add[\"hour_b\"]=pd.to_datetime(drive_statics_l_add[\"data_time_b\"]).dt.hour\n",
|
|
|
+ "drive_statics_l_add[\"hour_e\"]=pd.to_datetime(drive_statics_l_add[\"data_time_e\"]).dt.hour\n",
|
|
|
+ "\n",
|
|
|
+ "def Timephased(x):\n",
|
|
|
+ " if x>=0 and x<=4 :\n",
|
|
|
+ " return \"D-0-4\"\n",
|
|
|
+ " elif x>4 and x<=8:\n",
|
|
|
+ " return \"D-4-8\"\n",
|
|
|
+ " elif x>8 and x<=12:\n",
|
|
|
+ " return \"D-8-12\"\n",
|
|
|
+ " elif x>12 and x<=16:\n",
|
|
|
+ " return \"D-12-16\"\n",
|
|
|
+ " elif x>16 and x<=20:\n",
|
|
|
+ " return \"D-16-20\"\n",
|
|
|
+ " elif x>20 and x<=24:\n",
|
|
|
+ " return \"D-20-24\"\n",
|
|
|
+ "\n",
|
|
|
+ "drive_statics_l_add[\"time_phase_b\"]=drive_statics_l_add[\"hour_b\"].apply(lambda x: Timephased(x))\n",
|
|
|
+ "drive_statics_l_add[\"time_phase_e\"]=drive_statics_l_add[\"hour_e\"].apply(lambda x: Timephased(x))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 38,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##计算 SOC 差值\n",
|
|
|
+ "drive_statics_l_add[\"soc_diff\"]=drive_statics_l_add[\"soc_max\"]-drive_statics_l_add[\"soc_min\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 39,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "####计算 错行 SOC 差值 \n",
|
|
|
+ "####第二个行程最大SOC 减去本行程最小的SOC值,来判断本行程结束是否换电\n",
|
|
|
+ "drive_statics_l_add[\"soc_diff_u\"] = (drive_statics_l_add[\"soc_max\"].shift(-1).fillna(method = 'ffill',axis = 0) -drive_statics_l_add[\"soc_min\"]).abs()/drive_statics_l_add[\"soc_min\"]\n",
|
|
|
+ "def Changestatus(x):\n",
|
|
|
+ " if x>0.2:\n",
|
|
|
+ " return 1\n",
|
|
|
+ " else:\n",
|
|
|
+ " return 0\n",
|
|
|
+ " \n",
|
|
|
+ "drive_statics_l_add[\"end_change\"]=drive_statics_l_add[\"soc_diff_u\"].apply(lambda x: Changestatus(x)).astype(\"int\")\n",
|
|
|
+ "drive_statics_l_add[\"begin_change\"] = drive_statics_l_add[\"end_change\"].shift(1).fillna(method = 'bfill',axis = 0).astype(\"int\") \n",
|
|
|
+ "drive_statics_l_add[\"drive_block\"]=drive_statics_l_add[\"begin_change\"].cumsum()\n",
|
|
|
+ "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False) "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 194,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###关联充电表\n",
|
|
|
+ "##提取行驶后充电的数据\n",
|
|
|
+ "data_drive_charge=drive_statics_l_add[drive_statics_l_add[\"end_change\"]==1]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 195,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "data_drive_charge.to_csv(\"onlinedata/data_drive_charge.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 198,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##只统计等待小于三个小时的数据,不然关联不到\n",
|
|
|
+ "sql13=\"select a.* ,b.charge_time_b from data_drive_charge a left join df_charge_static b on a.sn=b.sn and \\\n",
|
|
|
+ "JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)<0.125 and JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)>0 \""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 202,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "data_drive_charge_add=pysqldf(sql13)\n",
|
|
|
+ "##核实数据\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 203,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "###计算等待时间\n",
|
|
|
+ "sql14=\"select *,(JULIANDAY(charge_time_b)-JULIANDAY(data_time_e))*24*60 wait_time from data_drive_charge_add \"\n",
|
|
|
+ "data_drive_charge_add=pysqldf(sql14)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 204,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "data_drive_charge_add.to_csv(\"onlinedata/data_drive_charge_add.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 205,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##换电总次数\n",
|
|
|
+ "sqlc1=\"select count(*) from data_drive_charge_add \"\n",
|
|
|
+ "count_change=pysqldf(sqlc1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 206,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##等待超过三小时的次数\n",
|
|
|
+ "sqlc2=\"select count(*) from data_drive_charge_add where wait_time is null\"\n",
|
|
|
+ "count_wait_long=pysqldf(sqlc2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 210,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##三小时以下的时间分布情况,单位为分钟\n",
|
|
|
+ "df_charge_wait_time=data_drive_charge_add[\"wait_time\"].agg(['sum','max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 211,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##75位数\n",
|
|
|
+ "df_charge_wait_time75=data_drive_charge_add[\"wait_time\"].quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 215,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##关联经纬度\n",
|
|
|
+ "##加载数据生产上不需要\n",
|
|
|
+ "df_gps=pd.read_csv(\"onlinedata/GPS.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 223,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##筛选列变量\n",
|
|
|
+ "df_gps_2=df_gps[[\"时间戳\",\"纬度\",\"经度\",\"sn\"]]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 227,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##重命名\n",
|
|
|
+ "gps_columns=[\"data_time\",\"y\",\"x\",\"sn\"]\n",
|
|
|
+ "df_gps_2.columns=gps_columns"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 228,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "sqlc3=\"select distinct(substring(data_time_b,1,10)) data_day from data_drive_charge_add union \\\n",
|
|
|
+ "select distinct(substring(data_time_e,1,10)) data_day from data_drive_charge_add \""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 229,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "data_time=pysqldf(sqlc3)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 231,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##gps表太大关联慢,先处理一下\n",
|
|
|
+ "sqlc4=\"select * from df_gps_2 where substring(data_time,1,10) in (select data_day from data_time ) \""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 232,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_gps_core=pysqldf(sqlc4)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 248,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##筛选开始未换电,行程后换电的情况\n",
|
|
|
+ "sqlc5=\"select * from data_drive_charge_add where begin_change=0\"\n",
|
|
|
+ "data_drive_charge_add_e=pysqldf(sqlc5)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 259,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##关联经纬度\n",
|
|
|
+ "sqlc6=\"select a.*,b.y y_b,b.x x_b,c.y y_e,c.x x_e from data_drive_charge_add_e a \\\n",
|
|
|
+ "left join df_gps_core b on a.sn=b.sn and abs((JULIANDAY(a.data_time_b)-JULIANDAY(b.data_time))*24*60*60)<5 \\\n",
|
|
|
+ "left join df_gps_core c on a.sn=c.sn and abs((JULIANDAY(a.data_time_e)-JULIANDAY(c.data_time))*24*60*60)<5\""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 260,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_charge_gps=pysqldf(sqlc6)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 261,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核查数据\n",
|
|
|
+ "drive_charge_gps.to_csv(\"onlinedata/drive_charge_gps.csv\",index=False)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 262,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "12.717310483106838\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "##距离试算\n",
|
|
|
+ "from haversine import haversine\n",
|
|
|
+ "a=(34.500821,114.990448)\n",
|
|
|
+ "b=(34.41177,115.077483)\n",
|
|
|
+ "dis=haversine(a,b)\n",
|
|
|
+ "print(dis)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 263,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "sqlc7=\"select * from drive_charge_gps where y_b>0 and x_b>0 and x_b>0 and x_e>0\"\n",
|
|
|
+ "drive_charge_gps_core=pysqldf(sqlc7)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 270,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##统计换电行驶的距离\n",
|
|
|
+ "drive_charge_gps_core[\"dist\"]=drive_charge_gps_core.apply(lambda row :haversine((row[\"y_b\"],row[\"x_b\"]),(row[\"y_e\"],row[\"x_e\"])),axis=1)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 271,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "0 12.717310\n",
|
|
|
+ "1 0.027449\n",
|
|
|
+ "2 3.498388\n",
|
|
|
+ "3 12.693435\n",
|
|
|
+ "Name: dist, dtype: float64"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 271,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "drive_charge_gps_core[\"dist\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 272,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "drive_charge_gps_core.to_csv(\"onlinedata/drive_charge_gps_core.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 100,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##计算统计指标,行驶相关指标,以drive_statics_l_add为主表\n",
|
|
|
+ "##本周支撑行驶的次数\n",
|
|
|
+ "drive_count_total=drive_statics_l_add[[\"sn\",\"drive_block\"]].groupby(\"sn\").agg('count')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 101,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_count_total.columns=[\"count_total\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 102,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##计算每天的驾驶次数\n",
|
|
|
+ "drive_count_total[\"count_day\"]=round(drive_count_total[\"count_total\"]/7,2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 103,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##单次行驶的时间\n",
|
|
|
+ "drive_time_single=drive_statics_l_add[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 104,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_time_single_75=drive_statics_l_add.groupby(\"sn\").time_diff.quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 105,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##单次行驶SOC消耗情况\n",
|
|
|
+ "drive_soc_single=drive_statics_l_add[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 106,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_soc_single_75=drive_statics_l_add.groupby(\"sn\").soc_diff.quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 107,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##行驶后换电次数统计\n",
|
|
|
+ "end_charge_count=data_drive_charge.groupby(\"sn\").sn.count()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 108,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##每天换电次数情况\n",
|
|
|
+ "change_day_avg_num=round(end_charge_count/7,2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 109,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "frames=[drive_count_total,drive_time_single,drive_time_single_75,drive_soc_single,drive_soc_single_75,end_charge_count,change_day_avg_num]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 110,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##进行表的关联\n",
|
|
|
+ "drive_describe_single = pd.concat(frames, axis=1, join='inner')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_describe_single"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 112,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_describe_single_clumns=['total_driving_num','day_driving_avg_num','time_diff_max','time_diff_min','time_diff_median'\n",
|
|
|
+ " ,'time_diff_mean','time_diff_per75','soc_diff_max','soc_diff_min','soc_diff_median'\n",
|
|
|
+ " ,'soc_diff_mean','soc_diff_per75','change_num','change_day_avg_num']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 113,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_describe_single.columns=drive_describe_single_clumns"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 132,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##数据核实\n",
|
|
|
+ "drive_describe_single.to_csv(\"onlinedata/drive_describe_single.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 114,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##单个周期情况统计\n",
|
|
|
+ "drive_statics_period=drive_statics_l_add.groupby([\"sn\",\"drive_block\"]).agg({'data_time_b':'min',\n",
|
|
|
+ " 'data_time_e':'max',\n",
|
|
|
+ " 'soc_min':'min',\n",
|
|
|
+ " 'soc_max':'max',\n",
|
|
|
+ " 'v_max_b':'max',\n",
|
|
|
+ " 'v_max_e':'min',\n",
|
|
|
+ " 'time_diff':'sum',\n",
|
|
|
+ " 'day':'min', \n",
|
|
|
+ " 'hour_b':'min',\n",
|
|
|
+ " 'hour_e':'max', \n",
|
|
|
+ " 'time_phase_b':'min',\n",
|
|
|
+ " 'time_phase_e':'max',\n",
|
|
|
+ " 'soc_diff':'sum'})"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 115,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_statics_period=drive_statics_period.reset_index()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 116,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##单个周期行驶的时间\n",
|
|
|
+ "drive_time_period=drive_statics_period[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 117,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##基于充电周期的行驶时间75位数\n",
|
|
|
+ "drive_time_period_75=drive_statics_period.groupby(\"sn\").time_diff.quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 118,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##单个周期SOC消耗情况\n",
|
|
|
+ "drive_soc_period=drive_statics_period[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 119,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##基于充电周期的SOC消耗75位数\n",
|
|
|
+ "drive_soc_period_75=drive_statics_period.groupby(\"sn\").soc_diff.quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 120,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电周期个数\n",
|
|
|
+ "drive_period_count=drive_statics_period.groupby(\"sn\").sn.count()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 122,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##每天的充电周期个数\n",
|
|
|
+ "drive_period_count_day=round(drive_period_count/7,2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 123,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "frames=[drive_time_period,drive_time_period_75,drive_soc_period,drive_soc_period_75,drive_period_count,drive_period_count_day]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 124,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##进行表的关联\n",
|
|
|
+ "drive_describe_period = pd.concat(frames, axis=1, join='inner')"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 127,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_describe_single_columns=['period_time_diff_max','period_time_diff_min','period_diff_median'\n",
|
|
|
+ " ,'period_time_diff_mean','period_time_diff_per75','period_soc_diff_max','prriod_soc_diff_min'\n",
|
|
|
+ " ,'period_soc_diff_median','period_soc_diff_mean','period_soc_diff_per75'\n",
|
|
|
+ " ,'period_count','period_count_day']"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 128,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "drive_describe_period.columns=drive_describe_single_columns"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 130,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##核实数据\n",
|
|
|
+ "drive_describe_period.to_csv(\"onlinedata/drive_describe_period.csv\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##换电站总体指标以drive_statics_l_add为主表"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 145,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##活跃电池数量\n",
|
|
|
+ "sql15=\"select count(distinct sn) from drive_statics_l_add \"\n",
|
|
|
+ "count_active=pysqldf(sql15)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 150,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##换电次数\n",
|
|
|
+ "sql16=\"select count(end_change) from drive_statics_l_add where end_change=1 \"\n",
|
|
|
+ "count_change=pysqldf(sql16)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 148,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##换电次数/每天\n",
|
|
|
+ "count_charge_day=round(count_change/7,2)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 151,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##行驶后换电时间点\n",
|
|
|
+ "sql17=\"select count(*) ,time_phase_e from drive_statics_l_add where end_change=1 group by time_phase_e \"\n",
|
|
|
+ "change_time_slot=pysqldf(sql17)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 153,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##换电后出发的时间段分布\n",
|
|
|
+ "sql18=\"select count(*) ,time_phase_b from drive_statics_l_add where begin_change=1 group by time_phase_b \"\n",
|
|
|
+ "start_time_slot=pysqldf(sql18)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 155,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电情况分析主表df_charge_static\n",
|
|
|
+ "charge_columns=['data_time_b','data_time_e','soc_min','soc_max','time_block','sn']\n",
|
|
|
+ "df_charge_static.columns=charge_columns"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 156,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##计算时间差\n",
|
|
|
+ "df_charge_static[\"time_diff\"]= pd.to_datetime(df_charge_static[\"data_time_e\"] )-pd.to_datetime(df_charge_static[\"data_time_b\"])\n",
|
|
|
+ "df_charge_static[\"time_diff\"]=(df_charge_static[\"time_diff\"].dt.total_seconds()/3600).round(2)\n",
|
|
|
+ "sql12=\"select substring(data_time_b,1,10) from df_charge_static \"\n",
|
|
|
+ "df_charge_static[\"day\"]=pysqldf(sql12)\n",
|
|
|
+ "df_charge_static[\"hour_b\"]=pd.to_datetime(df_charge_static[\"data_time_b\"]).dt.hour\n",
|
|
|
+ "df_charge_static[\"hour_e\"]=pd.to_datetime(df_charge_static[\"data_time_e\"]).dt.hour\n",
|
|
|
+ "\n",
|
|
|
+ "def Timephased(x):\n",
|
|
|
+ " if x>=0 and x<=4 :\n",
|
|
|
+ " return \"D-0-4\"\n",
|
|
|
+ " elif x>4 and x<=8:\n",
|
|
|
+ " return \"D-4-8\"\n",
|
|
|
+ " elif x>8 and x<=12:\n",
|
|
|
+ " return \"D-8-12\"\n",
|
|
|
+ " elif x>12 and x<=16:\n",
|
|
|
+ " return \"D-12-16\"\n",
|
|
|
+ " elif x>16 and x<=20:\n",
|
|
|
+ " return \"D-16-20\"\n",
|
|
|
+ " elif x>20 and x<=24:\n",
|
|
|
+ " return \"D-20-24\"\n",
|
|
|
+ "\n",
|
|
|
+ "df_charge_static[\"time_phase_b\"]=df_charge_static[\"hour_b\"].apply(lambda x: Timephased(x))\n",
|
|
|
+ "df_charge_static[\"time_phase_e\"]=df_charge_static[\"hour_e\"].apply(lambda x: Timephased(x))"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 157,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##计算 SOC 差值\n",
|
|
|
+ "df_charge_static[\"soc_diff\"]=df_charge_static[\"soc_max\"]-df_charge_static[\"soc_min\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 160,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##选取充电时间大于12分钟的时间段\n",
|
|
|
+ "df_charge_static=df_charge_static[df_charge_static[\"time_diff\"]>0.2]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 167,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电时间统计\n",
|
|
|
+ "df_charge_static_time=df_charge_static[\"time_diff\"].agg(['sum','max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 177,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_charge_static_time_75=df_charge_static[\"time_diff\"].quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 168,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电SOC变化统计\n",
|
|
|
+ "df_charge_static_soc=df_charge_static[\"soc_diff\"].agg(['sum','max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 178,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_charge_static_soc_75=df_charge_static[\"soc_diff\"].quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 179,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电时间段的情况\n",
|
|
|
+ "df_charge_static_phase=df_charge_static[[\"time_phase_b\",\"time_block\"]].groupby(\"time_phase_b\").agg(['count'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 181,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电结束SOC分布情况\n",
|
|
|
+ "df_charge_static_soc_e=df_charge_static[\"soc_max\"].agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 183,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_charge_static_soc_e75=df_charge_static[\"soc_max\"].quantile(0.75)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 184,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "##充电前SOC分布情况\n",
|
|
|
+ "df_charge_static_soc_b=df_charge_static[\"soc_min\"].agg(['max','min','median','mean'])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 185,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "df_charge_static_soc_b75=df_charge_static[\"soc_min\"].quantile(0.75)"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "py38",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.8.15 (default, Nov 24 2022, 14:38:14) [MSC v.1916 64 bit (AMD64)]"
|
|
|
+ },
|
|
|
+ "orig_nbformat": 4,
|
|
|
+ "vscode": {
|
|
|
+ "interpreter": {
|
|
|
+ "hash": "4d6c15edccb966d2bb52e4527dc0611a26206d084fadc4df5e610d719652857f"
|
|
|
+ }
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|