fisher 1 рік тому
батько
коміт
c2c48cd2f6

+ 1350 - 0
TEST/datapre.ipynb

@@ -0,0 +1,1350 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "import pandas as pd\n",
+    "from LIB.BACKEND import DBManager, Log\n",
+    "from pandasql import sqldf "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_time = (datetime.datetime.now()+datetime.timedelta(days=-7)).strftime('%Y-%m-%d %H:%M:%S')\n",
+    "end_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dbManager = DBManager.DBManager()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "idlist=['TJMCL120502305010','TJMCL120502305022','TJMCL120502305038','TJMCL120502305026']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_bms=pd.DataFrame()\n",
+    "df_gps=pd.DataFrame()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sn in idlist:\n",
+    "    df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms','gps'])\n",
+    "    df_data_bms=df_data['bms'] \n",
+    "    df_data_bms[\"sn\"]=sn\n",
+    "    df_data_gps=df_data['gps']\n",
+    "    df_data_gps[\"sn\"]=sn\n",
+    "    df_bms=pd.concat([df_bms,df_data_bms],axis=0,ignore_index=True)\n",
+    "    df_gps=pd.concat([df_gps,df_data_gps],axis=0,ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_bms.to_csv('onlinedata/BMS.csv',index=False)\n",
+    "df_gps.to_csv('onlinedata/GPS.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##基于上次保存的数据进行加工,生产可以省略\n",
+    "df_bms=pd.read_csv(\"onlinedata/BMS.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##确定是按 sn 和 时间戳排序\n",
+    "\n",
+    "df_mbs=df_bms.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])\n",
+    "\n",
+    "##df_gps=df_gps.sort_values([\"sn\",\"时间戳\"],ascending = [True, True])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##寻找电压字段,计算每行的最大电压,供后续关联使用\n",
+    "column=list(df_mbs.columns)\n",
+    "cellvolt_list = [s for s in column if '单体电压' in s]\n",
+    "df_v=df_mbs[cellvolt_list]\n",
+    "df_mbs[\"v_max\"]=df_v.max(axis=1)\n",
+    "df_join=df_mbs[[\"sn\",\"时间戳\",\"v_max\"]]\n",
+    "p_col=['sn','data_time','v_max']\n",
+    "df_join.columns=p_col"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###核实数据\n",
+    "df_join.to_csv('onlinedata/df_join.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##列变量重命名\n",
+    "df_bms_1=df_bms[[\"时间戳\",\"总电流[A]\",\"总电压[V]\",\"充电状态\",\"SOC[%]\",\"sn\"]]\n",
+    "df_bms_1.columns=[\"data_time\",\"aa\",\"vv\",\"charge\",\"soc\",\"sn\"]\n",
+    "df_bms_2=df_bms_1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "df_bms_2.to_csv('onlinedata/BMS_2.csv',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##更换充电状态,部分为5的替换为3\n",
+    "df_bms_2[\"charge\"][df_bms_2[\"charge\"]==5]=3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###按充电状态第一次分组\n",
+    "df_bms_2['block'] = (df_bms_2[\"charge\"].shift(1) != df_bms_2[\"charge\"]).astype(int).cumsum()\n",
+    "##核实数据\n",
+    "df_bms_2.to_csv(\"onlinedata/df_bms_2_block.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##统计分组的个数\n",
+    "pysqldf = lambda q: sqldf(q, globals())\n",
+    "sql=\"select count(*) as num1,df_bms_2.block from df_bms_2 group by df_bms_2.block  order by num1  \"\n",
+    "df_block_num= pysqldf(sql)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##选取每段数量大于5的分组\n",
+    "df_block_target=df_block_num[ df_block_num[\"num1\"] >5  ] "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###行驶的数据\n",
+    "sql1=\"select * from df_bms_2  where  charge=3 and  block in (select block from df_block_target)\"\n",
+    "df_drive=pysqldf(sql1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "df_drive.to_csv(\"onlinedata/df_dirve_blcok.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##再按时间划分\n",
+    "df_drive[\"order_delta\"]= pd.to_datetime(df_drive[\"data_time\"] )-pd.to_datetime( df_drive[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
+    "df_drive[\"order_delta\"]=pd.to_timedelta(df_drive[\"order_delta\"])\n",
+    "df_drive[\"order_delta\"] = df_drive[\"order_delta\"].dt.total_seconds()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##限定时间间隔1200秒切割和合并\n",
+    "df_drive[\"time_flag\"]=df_drive[\"order_delta\"]>1200\n",
+    "df_drive['time_block']=(df_drive[\"time_flag\"].shift(1) != df_drive[\"time_flag\"]).astype(int).cumsum()\n",
+    "###核实数据\n",
+    "df_drive.to_csv(\"onlinedata/df_drive.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##数量大于5个的分组\n",
+    "sql2=\"select count(*) as num1,time_block from df_drive  group by time_block  order by num1\"\n",
+    "df_drive_block_num= pysqldf(sql2)\n",
+    "df_drive_time_block_target=df_drive_block_num[ df_drive_block_num[\"num1\"] >5  ] ####限定每一段的点数\n",
+    "sql3=\"select * from df_drive  where   time_block in (select time_block from df_drive_time_block_target)\"\n",
+    "df_drive_block=pysqldf(sql3)\n",
+    "df_drive_block.to_csv(\"onlinedata/df_drive.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##汇总驾驶数据(包含长短驾驶)\n",
+    "sql4=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block group by  sn,  time_block\"\n",
+    "df_drive_static=pysqldf(sql4)\n",
+    "df_drive_static.to_csv(\"onlinedata/df_drive_static.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##选取长驾驶\n",
+    "df_drive_time_block_target_l=df_drive_block_num[ df_drive_block_num[\"num1\"] >120  ] ####限定每一段的点数\n",
+    "sql31=\"select * from df_drive  where   time_block in (select time_block from df_drive_time_block_target_l)\"\n",
+    "df_drive_block_l=pysqldf(sql31)\n",
+    "##核实数据\n",
+    "df_drive_block_l.to_csv(\"onlinedata/df_drive_l.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###统计长驾驶\n",
+    "sql41=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_drive_block_l group by  sn, time_block\"\n",
+    "df_drive_static_l=pysqldf(sql41)\n",
+    "##核实数据\n",
+    "df_drive_static_l.to_csv(\"onlinedata/df_drive_static_l.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###区分充电和静置\n",
+    "sql5=\"select * from df_bms_2  where  charge!=3  and  block in (select block from df_block_target)\"\n",
+    "df_no_drive=pysqldf(sql5)\n",
+    "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### 填充电流的空值\n",
+    "df_no_drive[\"aa\"]=df_no_drive[\"aa\"].fillna(method = 'ffill',axis = 0)\n",
+    "df_no_drive[\"aa\"]=pd.to_numeric(df_no_drive[\"aa\"], errors='ignore')\n",
+    "df_no_drive.to_csv(\"onlinedata/df_no_drive.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##判断充电\n",
+    "df_charge=df_no_drive[df_no_drive[\"aa\"].abs()>0.5]\n",
+    "df_charge[\"order_delta\"]= pd.to_datetime(df_charge[\"data_time\"] )-pd.to_datetime( df_charge[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
+    "df_charge[\"order_delta\"]=pd.to_timedelta(df_charge[\"order_delta\"])\n",
+    "df_charge[\"order_delta\"] = df_charge[\"order_delta\"].dt.total_seconds()\n",
+    "df_charge[\"order_delta\"].describe()\n",
+    "df_charge[\"time_flag\"]=df_charge[\"order_delta\"]>1200  ####限定时间间隔\n",
+    "df_charge['time_block']=(df_charge[\"time_flag\"].shift(1) != df_charge[\"time_flag\"]).astype(int).cumsum()\n",
+    "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##排除单点充电\n",
+    "sql6=\"select count(*) as num1,time_block from df_charge  group by time_block  order by num1\"\n",
+    "df_charge_block_num= pysqldf(sql6)\n",
+    "df_charge_block_target=df_charge_block_num[ df_charge_block_num[\"num1\"] >5  ] ####限定每一段的点数\n",
+    "sql7=\"select * from df_charge  where   time_block in (select time_block from  df_charge_block_target)\"\n",
+    "df_charge=pysqldf(sql7)\n",
+    "df_charge.to_csv(\"onlinedata/df_charge.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##判断静置\n",
+    "df_stand=df_no_drive[df_no_drive[\"aa\"].abs()<=0.5]\n",
+    "df_stand[\"order_delta\"]= pd.to_datetime(df_stand[\"data_time\"] )-pd.to_datetime( df_stand[\"data_time\"].shift(1).fillna(method = 'backfill',axis = 0))\n",
+    "df_stand[\"order_delta\"]=pd.to_timedelta(df_stand[\"order_delta\"])\n",
+    "df_stand[\"order_delta\"] = df_stand[\"order_delta\"].dt.total_seconds()\n",
+    "df_stand[\"order_delta\"].describe()\n",
+    "df_stand[\"time_flag\"]=df_stand[\"order_delta\"]>1200  ####限定时间间隔\n",
+    "df_stand['time_block']=(df_stand[\"time_flag\"].shift(1) != df_stand[\"time_flag\"]).astype(int).cumsum()\n",
+    "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##排除单点静置\n",
+    "sql8=\"select count(*) as num1,time_block from df_stand  group by time_block  order by num1\"\n",
+    "df_stand_block_num= pysqldf(sql8)\n",
+    "df_stand_block_target=df_stand_block_num[ df_stand_block_num[\"num1\"] >5  ] ####限定每一段的点数\n",
+    "sql9=\"select * from df_stand  where   time_block in (select time_block from  df_stand_block_target)\"\n",
+    "df_stand=pysqldf(sql9)\n",
+    "df_stand.to_csv(\"onlinedata/df_stand.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 200,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电数据统计\n",
+    "sql10=\"select min(data_time),max(data_time),min(soc),max(soc),time_block,sn from df_charge group by  sn,time_block\"\n",
+    "df_charge_static=pysqldf(sql10)\n",
+    "df_charge_static.to_csv(\"onlinedata/df_charge_static.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 201,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_charge_clumns=[\"charge_time_b\",\"charge_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n",
+    "df_charge_static.columns=new_charge_clumns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##静置时间统计\n",
+    "sql11=\"select min(data_time),max(data_time),min(soc),max(soc),block,sn from df_stand group by  sn, block\"\n",
+    "df_stand_static=pysqldf(sql11)\n",
+    "df_stand_static.to_csv(\"onlinedata/df_stand_static.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##进行指标加工,长驾驶等,没有计算短驾驶的指标"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "p1_col=[\"data_time_b\",\"data_time_e\",\"soc_min\",\"soc_max\",\"time_block\",\"sn\"]\n",
+    "df_drive_static_l.columns=p1_col"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##数据核实\n",
+    "df_drive_static_l.to_csv(\"onlinedata/df_stand_static.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##关联最大电压\n",
+    "sql11=\"select a.*, b.v_max as v_max_b ,c.v_max as v_max_e  from df_drive_static_l a left join df_join b  on a.data_time_b=b.data_time and a.sn=b.sn  \\\n",
+    "left join  df_join c on     a.data_time_e=c.data_time and a.sn=c.sn \"\n",
+    "drive_statics_l_add=pysqldf(sql11)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###计算时间差\n",
+    "drive_statics_l_add[\"time_diff\"]= pd.to_datetime(drive_statics_l_add[\"data_time_e\"] )-pd.to_datetime(drive_statics_l_add[\"data_time_b\"])\n",
+    "drive_statics_l_add[\"time_diff\"]=(drive_statics_l_add[\"time_diff\"].dt.total_seconds()/3600).round(2)\n",
+    "sql12=\"select substring(data_time_b,1,10)  from    drive_statics_l_add \"\n",
+    "drive_statics_l_add[\"day\"]=pysqldf(sql12)\n",
+    "drive_statics_l_add[\"hour_b\"]=pd.to_datetime(drive_statics_l_add[\"data_time_b\"]).dt.hour\n",
+    "drive_statics_l_add[\"hour_e\"]=pd.to_datetime(drive_statics_l_add[\"data_time_e\"]).dt.hour\n",
+    "\n",
+    "def Timephased(x):\n",
+    "    if  x>=0 and x<=4 :\n",
+    "        return \"D-0-4\"\n",
+    "    elif x>4 and x<=8:\n",
+    "        return \"D-4-8\"\n",
+    "    elif x>8 and x<=12:\n",
+    "        return \"D-8-12\"\n",
+    "    elif x>12 and x<=16:\n",
+    "        return \"D-12-16\"\n",
+    "    elif x>16 and x<=20:\n",
+    "        return \"D-16-20\"\n",
+    "    elif x>20 and x<=24:\n",
+    "        return \"D-20-24\"\n",
+    "\n",
+    "drive_statics_l_add[\"time_phase_b\"]=drive_statics_l_add[\"hour_b\"].apply(lambda x: Timephased(x))\n",
+    "drive_statics_l_add[\"time_phase_e\"]=drive_statics_l_add[\"hour_e\"].apply(lambda x: Timephased(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##计算 SOC 差值\n",
+    "drive_statics_l_add[\"soc_diff\"]=drive_statics_l_add[\"soc_max\"]-drive_statics_l_add[\"soc_min\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "####计算 错行 SOC 差值 \n",
+    "####第二个行程最大SOC 减去本行程最小的SOC值,来判断本行程结束是否换电\n",
+    "drive_statics_l_add[\"soc_diff_u\"]  = (drive_statics_l_add[\"soc_max\"].shift(-1).fillna(method = 'ffill',axis = 0) -drive_statics_l_add[\"soc_min\"]).abs()/drive_statics_l_add[\"soc_min\"]\n",
+    "def Changestatus(x):\n",
+    "    if x>0.2:\n",
+    "        return 1\n",
+    "    else:\n",
+    "        return 0\n",
+    "    \n",
+    "drive_statics_l_add[\"end_change\"]=drive_statics_l_add[\"soc_diff_u\"].apply(lambda x: Changestatus(x)).astype(\"int\")\n",
+    "drive_statics_l_add[\"begin_change\"]  = drive_statics_l_add[\"end_change\"].shift(1).fillna(method = 'bfill',axis = 0).astype(\"int\") \n",
+    "drive_statics_l_add[\"drive_block\"]=drive_statics_l_add[\"begin_change\"].cumsum()\n",
+    "drive_statics_l_add.to_csv(\"onlinedata/drive_statics_l_add.csv\",index=False) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 194,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###关联充电表\n",
+    "##提取行驶后充电的数据\n",
+    "data_drive_charge=drive_statics_l_add[drive_statics_l_add[\"end_change\"]==1]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 195,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "data_drive_charge.to_csv(\"onlinedata/data_drive_charge.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 198,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##只统计等待小于三个小时的数据,不然关联不到\n",
+    "sql13=\"select a.* ,b.charge_time_b from data_drive_charge a left join df_charge_static b on a.sn=b.sn and  \\\n",
+    "JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)<0.125 and JULIANDAY(b.charge_time_b)-JULIANDAY(a.data_time_e)>0 \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 202,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_drive_charge_add=pysqldf(sql13)\n",
+    "##核实数据\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 203,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###计算等待时间\n",
+    "sql14=\"select *,(JULIANDAY(charge_time_b)-JULIANDAY(data_time_e))*24*60  wait_time from data_drive_charge_add \"\n",
+    "data_drive_charge_add=pysqldf(sql14)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 204,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "data_drive_charge_add.to_csv(\"onlinedata/data_drive_charge_add.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 205,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##换电总次数\n",
+    "sqlc1=\"select count(*)  from data_drive_charge_add \"\n",
+    "count_change=pysqldf(sqlc1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 206,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##等待超过三小时的次数\n",
+    "sqlc2=\"select count(*)  from data_drive_charge_add where wait_time is null\"\n",
+    "count_wait_long=pysqldf(sqlc2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 210,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##三小时以下的时间分布情况,单位为分钟\n",
+    "df_charge_wait_time=data_drive_charge_add[\"wait_time\"].agg(['sum','max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 211,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##75位数\n",
+    "df_charge_wait_time75=data_drive_charge_add[\"wait_time\"].quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 215,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##关联经纬度\n",
+    "##加载数据生产上不需要\n",
+    "df_gps=pd.read_csv(\"onlinedata/GPS.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 223,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##筛选列变量\n",
+    "df_gps_2=df_gps[[\"时间戳\",\"纬度\",\"经度\",\"sn\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 227,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##重命名\n",
+    "gps_columns=[\"data_time\",\"y\",\"x\",\"sn\"]\n",
+    "df_gps_2.columns=gps_columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 228,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sqlc3=\"select  distinct(substring(data_time_b,1,10)) data_day from  data_drive_charge_add  union \\\n",
+    "select  distinct(substring(data_time_e,1,10)) data_day   from  data_drive_charge_add \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 229,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "data_time=pysqldf(sqlc3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 231,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##gps表太大关联慢,先处理一下\n",
+    "sqlc4=\"select * from  df_gps_2  where  substring(data_time,1,10) in (select data_day from  data_time ) \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 232,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_gps_core=pysqldf(sqlc4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 248,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##筛选开始未换电,行程后换电的情况\n",
+    "sqlc5=\"select * from data_drive_charge_add  where  begin_change=0\"\n",
+    "data_drive_charge_add_e=pysqldf(sqlc5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 259,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##关联经纬度\n",
+    "sqlc6=\"select a.*,b.y  y_b,b.x  x_b,c.y  y_e,c.x x_e from  data_drive_charge_add_e a \\\n",
+    "left join  df_gps_core b  on a.sn=b.sn and  abs((JULIANDAY(a.data_time_b)-JULIANDAY(b.data_time))*24*60*60)<5 \\\n",
+    "left join  df_gps_core c  on a.sn=c.sn and  abs((JULIANDAY(a.data_time_e)-JULIANDAY(c.data_time))*24*60*60)<5\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 260,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_charge_gps=pysqldf(sqlc6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 261,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核查数据\n",
+    "drive_charge_gps.to_csv(\"onlinedata/drive_charge_gps.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 262,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "12.717310483106838\n"
+     ]
+    }
+   ],
+   "source": [
+    "##距离试算\n",
+    "from haversine import haversine\n",
+    "a=(34.500821,114.990448)\n",
+    "b=(34.41177,115.077483)\n",
+    "dis=haversine(a,b)\n",
+    "print(dis)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 263,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sqlc7=\"select * from drive_charge_gps where y_b>0 and x_b>0 and x_b>0 and x_e>0\"\n",
+    "drive_charge_gps_core=pysqldf(sqlc7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 270,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##统计换电行驶的距离\n",
+    "drive_charge_gps_core[\"dist\"]=drive_charge_gps_core.apply(lambda row :haversine((row[\"y_b\"],row[\"x_b\"]),(row[\"y_e\"],row[\"x_e\"])),axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 271,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    12.717310\n",
+       "1     0.027449\n",
+       "2     3.498388\n",
+       "3    12.693435\n",
+       "Name: dist, dtype: float64"
+      ]
+     },
+     "execution_count": 271,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drive_charge_gps_core[\"dist\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 272,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "drive_charge_gps_core.to_csv(\"onlinedata/drive_charge_gps_core.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 100,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##计算统计指标,行驶相关指标,以drive_statics_l_add为主表\n",
+    "##本周支撑行驶的次数\n",
+    "drive_count_total=drive_statics_l_add[[\"sn\",\"drive_block\"]].groupby(\"sn\").agg('count')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_count_total.columns=[\"count_total\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##计算每天的驾驶次数\n",
+    "drive_count_total[\"count_day\"]=round(drive_count_total[\"count_total\"]/7,2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##单次行驶的时间\n",
+    "drive_time_single=drive_statics_l_add[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_time_single_75=drive_statics_l_add.groupby(\"sn\").time_diff.quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##单次行驶SOC消耗情况\n",
+    "drive_soc_single=drive_statics_l_add[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_soc_single_75=drive_statics_l_add.groupby(\"sn\").soc_diff.quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##行驶后换电次数统计\n",
+    "end_charge_count=data_drive_charge.groupby(\"sn\").sn.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##每天换电次数情况\n",
+    "change_day_avg_num=round(end_charge_count/7,2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames=[drive_count_total,drive_time_single,drive_time_single_75,drive_soc_single,drive_soc_single_75,end_charge_count,change_day_avg_num]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##进行表的关联\n",
+    "drive_describe_single = pd.concat(frames, axis=1, join='inner')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_describe_single"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_describe_single_clumns=['total_driving_num','day_driving_avg_num','time_diff_max','time_diff_min','time_diff_median'\n",
+    "                              ,'time_diff_mean','time_diff_per75','soc_diff_max','soc_diff_min','soc_diff_median'\n",
+    "                              ,'soc_diff_mean','soc_diff_per75','change_num','change_day_avg_num']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_describe_single.columns=drive_describe_single_clumns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 132,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##数据核实\n",
+    "drive_describe_single.to_csv(\"onlinedata/drive_describe_single.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 114,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##单个周期情况统计\n",
+    "drive_statics_period=drive_statics_l_add.groupby([\"sn\",\"drive_block\"]).agg({'data_time_b':'min',\n",
+    "                                                               'data_time_e':'max',\n",
+    "                                                               'soc_min':'min',\n",
+    "                                                               'soc_max':'max',\n",
+    "                                                               'v_max_b':'max',\n",
+    "                                                               'v_max_e':'min',\n",
+    "                                                               'time_diff':'sum',\n",
+    "                                                               'day':'min', \n",
+    "                                                               'hour_b':'min',\n",
+    "                                                               'hour_e':'max', \n",
+    "                                                               'time_phase_b':'min',\n",
+    "                                                               'time_phase_e':'max',\n",
+    "                                                               'soc_diff':'sum'})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_statics_period=drive_statics_period.reset_index()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##单个周期行驶的时间\n",
+    "drive_time_period=drive_statics_period[[\"sn\",\"time_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 117,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##基于充电周期的行驶时间75位数\n",
+    "drive_time_period_75=drive_statics_period.groupby(\"sn\").time_diff.quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 118,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##单个周期SOC消耗情况\n",
+    "drive_soc_period=drive_statics_period[[\"sn\",\"soc_diff\"]].groupby(\"sn\").agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##基于充电周期的SOC消耗75位数\n",
+    "drive_soc_period_75=drive_statics_period.groupby(\"sn\").soc_diff.quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电周期个数\n",
+    "drive_period_count=drive_statics_period.groupby(\"sn\").sn.count()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##每天的充电周期个数\n",
+    "drive_period_count_day=round(drive_period_count/7,2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "frames=[drive_time_period,drive_time_period_75,drive_soc_period,drive_soc_period_75,drive_period_count,drive_period_count_day]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##进行表的关联\n",
+    "drive_describe_period = pd.concat(frames, axis=1, join='inner')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 127,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_describe_single_columns=['period_time_diff_max','period_time_diff_min','period_diff_median'\n",
+    "                              ,'period_time_diff_mean','period_time_diff_per75','period_soc_diff_max','prriod_soc_diff_min'\n",
+    "                              ,'period_soc_diff_median','period_soc_diff_mean','period_soc_diff_per75'\n",
+    "                              ,'period_count','period_count_day']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_describe_period.columns=drive_describe_single_columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "drive_describe_period.to_csv(\"onlinedata/drive_describe_period.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##换电站总体指标以drive_statics_l_add为主表"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 145,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##活跃电池数量\n",
+    "sql15=\"select count(distinct sn)  from drive_statics_l_add \"\n",
+    "count_active=pysqldf(sql15)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 150,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##换电次数\n",
+    "sql16=\"select count(end_change)  from drive_statics_l_add where end_change=1 \"\n",
+    "count_change=pysqldf(sql16)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##换电次数/每天\n",
+    "count_charge_day=round(count_change/7,2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 151,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##行驶后换电时间点\n",
+    "sql17=\"select count(*) ,time_phase_e    from drive_statics_l_add where end_change=1  group by time_phase_e \"\n",
+    "change_time_slot=pysqldf(sql17)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 153,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##换电后出发的时间段分布\n",
+    "sql18=\"select count(*) ,time_phase_b    from drive_statics_l_add where begin_change=1  group by time_phase_b \"\n",
+    "start_time_slot=pysqldf(sql18)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 155,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电情况分析主表df_charge_static\n",
+    "charge_columns=['data_time_b','data_time_e','soc_min','soc_max','time_block','sn']\n",
+    "df_charge_static.columns=charge_columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 156,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##计算时间差\n",
+    "df_charge_static[\"time_diff\"]= pd.to_datetime(df_charge_static[\"data_time_e\"] )-pd.to_datetime(df_charge_static[\"data_time_b\"])\n",
+    "df_charge_static[\"time_diff\"]=(df_charge_static[\"time_diff\"].dt.total_seconds()/3600).round(2)\n",
+    "sql12=\"select substring(data_time_b,1,10)  from    df_charge_static \"\n",
+    "df_charge_static[\"day\"]=pysqldf(sql12)\n",
+    "df_charge_static[\"hour_b\"]=pd.to_datetime(df_charge_static[\"data_time_b\"]).dt.hour\n",
+    "df_charge_static[\"hour_e\"]=pd.to_datetime(df_charge_static[\"data_time_e\"]).dt.hour\n",
+    "\n",
+    "def Timephased(x):\n",
+    "    if  x>=0 and x<=4 :\n",
+    "        return \"D-0-4\"\n",
+    "    elif x>4 and x<=8:\n",
+    "        return \"D-4-8\"\n",
+    "    elif x>8 and x<=12:\n",
+    "        return \"D-8-12\"\n",
+    "    elif x>12 and x<=16:\n",
+    "        return \"D-12-16\"\n",
+    "    elif x>16 and x<=20:\n",
+    "        return \"D-16-20\"\n",
+    "    elif x>20 and x<=24:\n",
+    "        return \"D-20-24\"\n",
+    "\n",
+    "df_charge_static[\"time_phase_b\"]=df_charge_static[\"hour_b\"].apply(lambda x: Timephased(x))\n",
+    "df_charge_static[\"time_phase_e\"]=df_charge_static[\"hour_e\"].apply(lambda x: Timephased(x))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 157,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##计算 SOC 差值\n",
+    "df_charge_static[\"soc_diff\"]=df_charge_static[\"soc_max\"]-df_charge_static[\"soc_min\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 160,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##选取充电时间大于12分钟的时间段\n",
+    "df_charge_static=df_charge_static[df_charge_static[\"time_diff\"]>0.2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 167,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电时间统计\n",
+    "df_charge_static_time=df_charge_static[\"time_diff\"].agg(['sum','max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 177,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_charge_static_time_75=df_charge_static[\"time_diff\"].quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 168,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电SOC变化统计\n",
+    "df_charge_static_soc=df_charge_static[\"soc_diff\"].agg(['sum','max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 178,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_charge_static_soc_75=df_charge_static[\"soc_diff\"].quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 179,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电时间段的情况\n",
+    "df_charge_static_phase=df_charge_static[[\"time_phase_b\",\"time_block\"]].groupby(\"time_phase_b\").agg(['count'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 181,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电结束SOC分布情况\n",
+    "df_charge_static_soc_e=df_charge_static[\"soc_max\"].agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 183,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_charge_static_soc_e75=df_charge_static[\"soc_max\"].quantile(0.75)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 184,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电前SOC分布情况\n",
+    "df_charge_static_soc_b=df_charge_static[\"soc_min\"].agg(['max','min','median','mean'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 185,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_charge_static_soc_b75=df_charge_static[\"soc_min\"].quantile(0.75)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py38",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.15 (default, Nov 24 2022, 14:38:14) [MSC v.1916 64 bit (AMD64)]"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "4d6c15edccb966d2bb52e4527dc0611a26206d084fadc4df5e610d719652857f"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

Різницю між файлами не показано, бо вона завелика
+ 845 - 0
TEST/qiyuan.ipynb


Різницю між файлами не показано, бо вона завелика
+ 845 - 0
TEST/qiyuan_explore.ipynb


+ 25 - 0
TEST/qiyuandata/change_time_slot

@@ -0,0 +1,25 @@
+change_num,time_phase_e
+5,D-0-0.5
+3,D-0.5-1
+5,D-1.5-2
+6,D-10.5-11
+3,D-11.5-12
+3,D-12.5-13
+2,D-13.5-14
+2,D-14.5-15
+7,D-15.5-16
+5,D-16.5-17
+7,D-17.5-18
+3,D-18.5-19
+3,D-19.5-20
+1,D-2.5-3
+2,D-20.5-21
+2,D-21.5-22
+1,D-22.5-23
+1,D-3.5-4
+2,D-4.5-5
+2,D-5.5-6
+1,D-6.5-7
+2,D-7.5-8
+4,D-8.5-9
+1,D-9.5-10

Різницю між файлами не показано, бо вона завелика
+ 70 - 0
TEST/soc_gps/trace.html


Різницю між файлами не показано, бо вона завелика
+ 70 - 0
TEST/soc_gps/trace1226.html


Різницю між файлами не показано, бо вона завелика
+ 70 - 0
TEST/soc_gps/trace1227.html


Різницю між файлами не показано, бо вона завелика
+ 69 - 0
TEST/soc_gps/trace_mul.html


+ 593 - 0
TEST/soc_gps_ana.ipynb

@@ -0,0 +1,593 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 79,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "import pandas as pd\n",
+    "from LIB.BACKEND import DBManager, Log\n",
+    "from pandasql import sqldf "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 80,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "start_time = (datetime.datetime.now()+datetime.timedelta(days=-7)).strftime('%Y-%m-%d %H:%M:%S')\n",
+    "end_time=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 81,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dbManager = DBManager.DBManager()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#idlist=['TJMCL120502305010','TJMCL120502305022','TJMCL120502305038','TJMCL120502305026']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 82,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "idlist=['PJXCLL128N22C5001','PJXCLL128N22C5002','PJXCLL128N22C5003','PJXCLL128N22C5004']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 83,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_bms=pd.DataFrame()\n",
+    "df_gps=pd.DataFrame()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for sn in idlist:\n",
+    "    df_data = dbManager.get_data(sn=sn, start_time=start_time, end_time=end_time, data_groups=['bms','gps'])\n",
+    "    df_data_bms=df_data['bms'] \n",
+    "    df_data_bms[\"sn\"]=sn\n",
+    "    df_data_gps=df_data['gps']\n",
+    "    df_data_gps[\"sn\"]=sn\n",
+    "    df_bms=pd.concat([df_bms,df_data_bms],axis=0,ignore_index=True)\n",
+    "    df_gps=pd.concat([df_gps,df_data_gps],axis=0,ignore_index=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 85,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##bms选取字段\n",
+    "df_bms_1=df_bms[[\"时间戳\",\"SOC[%]\",\"sn\"]]\n",
+    "df_bms_1.columns=[\"data_time\",\"soc\",\"sn\"]\n",
+    "df_bms_2=df_bms_1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 86,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11389, 3)"
+      ]
+     },
+     "execution_count": 86,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_bms_2.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 87,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##gps选取字段\n",
+    "df_gps_1=df_gps[[\"时间戳\",\"纬度\",\"经度\",\"sn\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 88,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_gps_1.columns=[\"data_time\",\"lat\",\"lon\",\"sn\"]\n",
+    "df_gps_2=df_gps_1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 89,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##soc 数据 关联gps 数据\n",
+    "sqlc6=\"select a.*,  b.lat  lat ,b.lon  lon from  df_bms_2 a \\\n",
+    "left join  df_gps_2 b  on a.sn=b.sn and  abs((JULIANDAY(a.data_time)-JULIANDAY(b.data_time))*24*60*60)<5 \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 90,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pandasql import sqldf "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 91,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pysqldf = lambda q: sqldf(q, globals())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 92,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soc_gps= pysqldf(sqlc6)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "soc_gps.to_csv(\"soc_gps/soc_gps.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 94,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(11389, 5)"
+      ]
+     },
+     "execution_count": 94,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "soc_gps.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##筛选数据,排除空值和等于零的值\n",
+    "sqlc7=\"select *  from  soc_gps  where  lat>0 and lon>0 and  soc>0 \""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "soc_gps_add= pysqldf(sqlc7)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 97,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "soc_gps_add.to_csv(\"soc_gps/soc_gps_add.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 99,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##预处理字段\n",
+    "data=soc_gps_add[[\"lat\",\"lon\",\"soc\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 101,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##坐标转化,GPS 转火星\n",
+    "import math\n",
+    "pi = 3.1415926535897932384626\n",
+    "a = 6378245.0\n",
+    "ee = 0.00669342162296594323\n",
+    "\n",
+    "def wgs_gcj(lat, lon,soc):\n",
+    "    dLat = transform_lat(lon - 105.0, lat - 35.0)\n",
+    "    dLon = transform_lon(lon - 105.0, lat - 35.0)\n",
+    "    radLat = lat / 180.0 * pi\n",
+    "    magic = math.sin(radLat)\n",
+    "    magic = 1 - ee * magic * magic\n",
+    "    sqrtMagic = math.sqrt(magic)\n",
+    "    dLat = (dLat * 180.0) / ((a * (1 - ee)) / (magic * sqrtMagic) * pi)\n",
+    "    dLon = (dLon * 180.0) / (a / sqrtMagic * math.cos(radLat) * pi)\n",
+    "    mgLat = lat + dLat\n",
+    "    mgLon = lon + dLon\n",
+    "    return [mgLat,mgLon,soc]\n",
+    "\n",
+    "\n",
+    "def transform_lat(x, y):\n",
+    "\n",
+    "    ret = -100.0 + 2.0 * x + 3.0 * y + 0.2 * y * y + 0.1 * x * y + 0.2 * math.sqrt(abs(x))\n",
+    "    ret += (20.0 * math.sin(6.0 * x * pi) + 20.0 * math.sin(2.0 * x * pi)) * 2.0 / 3.0\n",
+    "    ret += (20.0 * math.sin(y * pi) + 40.0 * math.sin(y / 3.0 * pi)) * 2.0 / 3.0\n",
+    "    ret += (160.0 * math.sin(y / 12.0 * pi) + 320 * math.sin(y * pi / 30.0)) * 2.0 / 3.0\n",
+    "    return ret\n",
+    "\n",
+    "def transform_lon(x, y):\n",
+    "\n",
+    "    ret = 300.0 + x + 2.0 * y + 0.1 * x * x + 0.1 * x * y + 0.1 * math.sqrt(abs(x))\n",
+    "    ret += (20.0 * math.sin(6.0 * x * pi) + 20.0 * math.sin(2.0 * x * pi)) * 2.0 / 3.0\n",
+    "    ret += (20.0 * math.sin(x * pi) + 40.0 * math.sin(x / 3.0 * pi)) * 2.0 / 3.0\n",
+    "    ret += (150.0 * math.sin(x / 12.0 * pi) + 300.0 * math.sin(x / 30.0 * pi)) * 2.0 / 3.0\n",
+    "    return ret\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 102,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "core=[29.934539,107.79961,0]\n",
+    "core=wgs_gcj(core[0], core[1],core[2]) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 103,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[29.93186073070887, 107.80401736670477, 0]"
+      ]
+     },
+     "execution_count": 103,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "core"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 104,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "locations = data.values.tolist()\n",
+    "\n",
+    "zz=[ wgs_gcj(i[0], i[1],i[2])  for i  in  locations ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import folium\n",
+    "import os\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from folium.plugins import HeatMap\n",
+    "\n",
+    "m = folium.Map(core[:2], zoom_start=16,\n",
+    "               tiles='http://webrd02.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=7&x={x}&y={y}&z={z}',\n",
+    "               attr='default') \n",
+    "\n",
+    "\n",
+    "HeatMap(zz).add_to(m) \n",
+    "\n",
+    "\n",
+    "m.save(\"soc_gps/trace1227.html\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[29.93186073070887, 107.80401736670477]"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "core[:2]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_statics_l_add=pd.read_csv(\"qiyuandata/drive_statics_l_add.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##充电路径绘制\n",
+    "##寻找充电的时间段\n",
+    "sql8=\"select data_time_b,data_time_e, sn,drive_block  from drive_statics_l_add where end_change=1\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 113,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_charge= pysqldf(sql8)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_charge"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 115,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "sql9=\"select a.data_time,a.sn,a.lat,a.lon,b.drive_block  from soc_gps_add a left join drive_charge b on a.sn=b.sn \\\n",
+    "    and a.data_time  < b.data_time_e  and   a.data_time>b.data_time_b\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 116,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_charge_gps=pysqldf(sql9)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 119,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##核实数据\n",
+    "drive_charge_gps.to_csv(\"qiyuandata/drive_charge_gps.csv\",index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 120,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "##驾驶段\n",
+    "drive_group=drive_charge_gps[\"drive_block\"].values.tolist()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 121,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_group=set(drive_group)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 128,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "drive_group=[int(i) for i in drive_group if i >0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 129,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[73, 70, 71]"
+      ]
+     },
+     "execution_count": 129,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "drive_group"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 130,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import math\n",
+    "\n",
+    "pi = 3.1415926535897932384626\n",
+    "a = 6378245.0\n",
+    "ee = 0.00669342162296594323\n",
+    "\n",
+    "\n",
+    "def wgs_gcj(lat, lon):\n",
+    "\n",
+    "    dLat = transform_lat(lon - 105.0, lat - 35.0)\n",
+    "    dLon = transform_lon(lon - 105.0, lat - 35.0)\n",
+    "    radLat = lat / 180.0 * pi\n",
+    "    magic = math.sin(radLat)\n",
+    "    magic = 1 - ee * magic * magic\n",
+    "    sqrtMagic = math.sqrt(magic)\n",
+    "    dLat = (dLat * 180.0) / ((a * (1 - ee)) / (magic * sqrtMagic) * pi)\n",
+    "    dLon = (dLon * 180.0) / (a / sqrtMagic * math.cos(radLat) * pi)\n",
+    "    mgLat = lat + dLat\n",
+    "    mgLon = lon + dLon\n",
+    "    return [mgLat,mgLon]\n",
+    "\n",
+    "\n",
+    "def transform_lat(x, y):\n",
+    "\n",
+    "    ret = -100.0 + 2.0 * x + 3.0 * y + 0.2 * y * y + 0.1 * x * y + 0.2 * math.sqrt(abs(x))\n",
+    "    ret += (20.0 * math.sin(6.0 * x * pi) + 20.0 * math.sin(2.0 * x * pi)) * 2.0 / 3.0\n",
+    "    ret += (20.0 * math.sin(y * pi) + 40.0 * math.sin(y / 3.0 * pi)) * 2.0 / 3.0\n",
+    "    ret += (160.0 * math.sin(y / 12.0 * pi) + 320 * math.sin(y * pi / 30.0)) * 2.0 / 3.0\n",
+    "    return ret\n",
+    "\n",
+    "\n",
+    "def transform_lon(x, y):\n",
+    "\n",
+    "    ret = 300.0 + x + 2.0 * y + 0.1 * x * x + 0.1 * x * y + 0.1 * math.sqrt(abs(x))\n",
+    "    ret += (20.0 * math.sin(6.0 * x * pi) + 20.0 * math.sin(2.0 * x * pi)) * 2.0 / 3.0\n",
+    "    ret += (20.0 * math.sin(x * pi) + 40.0 * math.sin(x / 3.0 * pi)) * 2.0 / 3.0\n",
+    "    ret += (150.0 * math.sin(x / 12.0 * pi) + 300.0 * math.sin(x / 30.0 * pi)) * 2.0 / 3.0\n",
+    "    return ret\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 131,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = folium.Map([29.934539,107.79961], zoom_start=16,\n",
+    "               tiles='http://webrd02.is.autonavi.com/appmaptile?lang=zh_cn&size=1&scale=1&style=7&x={x}&y={y}&z={z}',\n",
+    "               attr='default') "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 136,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in range(len(drive_group)):\n",
+    "    q=drive_group[i]\n",
+    "    qq=[\"red\",\"blue\",\"green\",\"black\",\"orange\",\"pink\",\"yellow\"][i]\n",
+    "    locz=drive_charge_gps[[\"lat\",\"lon\"]][drive_charge_gps[\"drive_block\"]==q].values.tolist()\n",
+    "    zz=[ wgs_gcj(i[0], i[1])  for i  in  locz ]\n",
+    "    folium.PolyLine(  # polyline方法为将坐标用实线形式连接起来\n",
+    "        zz,  # 将坐标点连接起来\n",
+    "        weight=4,  # 线的大小为4\n",
+    "        color=qq,  # 线的颜色为红色\n",
+    "        opacity=0.8,  # 线的透明度\n",
+    "    ).add_to(m)  # 将这条线添加到刚才的区域m内"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 137,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m.save(\"soc_gps/trace_mul.html\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "py38",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.15"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "4d6c15edccb966d2bb52e4527dc0611a26206d084fadc4df5e610d719652857f"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 20 - 0
test.py

@@ -0,0 +1,20 @@
+# 获取数据
+import sys
+from LIB.BACKEND import DBManager
+from LIB.BACKEND import DataPreProcess
+
+dataPrePro = DataPreProcess.DataPreProcess()
+
+sn = "MGMCLN750N215N097"
+st = '2022-01-01 00:00:00'
+et = '2022-03-01 00:00:00'
+
+dbManager = DBManager.DBManager()
+df_data = dbManager.get_data(sn=sn, start_time=st, end_time=et, data_groups=['bms', 'gps', 'accum', 'system'])
+# 
+df_bms = df_data['bms']
+df_gps = df_data['gps']
+df_accum = df_data['accum']
+df_system = df_data['system']
+
+df_bms = dataPrePro.data_fault_tag(sn,df_bms)

Деякі файли не було показано, через те що забагато файлів було змінено