{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "\n", "\n", "\n", "\n", "# 文件夹路径\n", "folder_path = r'D:\\ZLWORK\\code\\zlwl-algos\\USER\\shouxueqi\\Track\\benz_data_analyze\\\\'\n", "\n", "# 读取文件夹中所有Excel文件\n", "excel_files = [file for file in os.listdir(folder_path) if file.endswith('.xlsx')]\n", "print(excel_files)\n", "# 将所有Excel文件拼接成一个DataFrame\n", "df_list = []\n", "for file in excel_files:\n", " file_path = os.path.join(folder_path, file)\n", " df = pd.read_excel(file_path)\n", " df_list.append(df)\n", "\n", "combined_df = pd.concat(df_list, ignore_index=True)\n", "combined_df.to_excel(r'all_split_data.xlsx')\n", "combined_df=combined_df.reset_index(drop=True)\n", "combined_standing=combined_df[combined_df['status']==0]\n", "combined_ofc=combined_df[combined_df['status']==1]\n", "combined_onc=combined_df[combined_df['status']==2]\n", "combined_df_drive=combined_df[combined_df['status']==3]\n", "combined_chg=combined_ofc.append(combined_onc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df[combined_df['status']==2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import subprocess\n", "import platform\n", "\n", "def Data_activite_anaylze(daily_rate):\n", " daily_rate=daily_rate*100\n", " # delta_datetermp=pd.to_timedelta(delta_date)\n", " # delta_date=(delta_datetermp.total_seconds())/3600/24\n", " mean_active_rate=round((daily_rate).mean(),2)#平均车辆日活跃度\n", " print('平均车辆活跃度为:{}%'.format(mean_active_rate))\n", " \n", " print('日活最高车辆为{},其活跃度为{}%'.format(daily_rate.idxmax(),round(daily_rate[daily_rate.idxmax()],2)))\n", " print('日活最低车辆为{},其活跃度为{}%'.format(daily_rate.idxmin(),round(daily_rate[daily_rate.idxmin()],2)))\n", "\n", "def _sn_Data_activity(df,folder_name,fig, ax):\n", " \n", " \n", " \n", " delta_date=df['time_end'].dt.to_period('D').iloc[-1]-df['time_st'].dt.to_period('D').iloc[0]\n", " delta_datetermp=pd.to_timedelta(delta_date)\n", " delta_date=(delta_datetermp.total_seconds())/3600/24\n", " num_days = len(df['time_st'].dt.date.unique())\n", " num_days_rate=num_days/delta_date\n", " # 按月份统计时间分布\n", " sn=list(set(df['sn']))\n", " df['time_st']=pd.to_datetime(df['time_st']).dt.date\n", " df=df.drop_duplicates(subset='time_st', keep='last')\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", " monthly_counts = df.groupby(df['time_st'].dt.to_period('M')).size()\n", " # 绘制柱状图\n", "\n", " monthly_counts.plot(kind='bar')\n", " bars = ax.containers[0]\n", " # 在每个柱子的顶部添加数值\n", " for bar in bars:\n", " height = bar.get_height()\n", " ax.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)), ha='center', va='bottom')\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('Count')\n", " #ax.xticks(rotation=0)\n", " ax.set_title('{}_Time Distribution by Month'.format(str(sn[0])))\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", "\n", " fig.savefig('{}/{}_time_distribution.jpg'.format(folder_name,str(sn[0])))\n", " ax.cla()\n", "\n", " return num_days_rate\n", "\n", "\n", "combined_df=combined_df.reset_index(drop=True)\n", "combined_df['time_st'] = pd.to_datetime(combined_df['time_st'])\n", "#monthly_counts = combined_df.groupby('sn').apply(_sn_plot)\n", "combined_df_first_day=str(combined_df['time_st'].dt.to_period('D').iloc[0])\n", "combined_df_last_day=str(combined_df['time_end'].dt.to_period('D').iloc[-1])\n", "delta_date=combined_df['time_end'].dt.to_period('D').iloc[-1]-combined_df['time_st'].dt.to_period('D').iloc[0]\n", "print('有效数据时间范围:{}至{}'.format(combined_df_first_day,combined_df_last_day))\n", "\n", "analyze_rslt_folder_name='Data_analyze_rlt'\n", "if not os.path.exists(analyze_rslt_folder_name):\n", " os.mkdir(analyze_rslt_folder_name)\n", "subprocess.Popen(f'explorer \"{analyze_rslt_folder_name}\"')\n", "time_folder_name=analyze_rslt_folder_name+'/Time_Distribution'\n", "if not os.path.exists(time_folder_name):\n", " os.mkdir(time_folder_name)\n", "\n", "analyze_rslt_folder_name='Data_analyze_rlt'\n", "if not os.path.exists(analyze_rslt_folder_name):\n", " os.mkdir(analyze_rslt_folder_name)\n", "Drive_folder_name=analyze_rslt_folder_name+'/Drive_Distribution'\n", "if not os.path.exists(Drive_folder_name):\n", " os.mkdir(Drive_folder_name)\n", "Charge_folder_name=analyze_rslt_folder_name+'/Charge_Distribution'\n", "\n", "fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "daily_rate= combined_df.groupby('sn').apply(_sn_Data_activity,time_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n", "Data_activite_anaylze(daily_rate)\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", " # 在每个柱子的顶部添加数值\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "def _sn_start_charge_soc(df,Drive_folder_name,fig, ax):\n", " #创建路径\n", " if not os.path.exists(Drive_folder_name):\n", " os.mkdir(Drive_folder_name)\n", " sn_Drive_folder_name=Drive_folder_name+'/End_off_sn'\n", " if not os.path.exists(sn_Drive_folder_name):\n", " os.mkdir(sn_Drive_folder_name) \n", " soc_series = df['soc_st']\n", " sn=list(set(df['sn']))\n", "# 使用 cut() 方法按照 10 为间隔进行分组\n", " soc_grouped = pd.cut(soc_series, bins=range(0, 101, 10), include_lowest=True)\n", "\n", " # 对每个分组进行计数,并将计数结果保存到一个新的 DataFrame 中\n", " soc_counts = soc_grouped.value_counts().sort_index().reset_index()\n", " soc_counts.columns = ['soc_range', 'count']\n", "\n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", " \n", " \n", " \n", "\n", "\n", " total_soc_count=soc_counts['count'].sum()\n", " # 计算每个类别的销售额占比\n", " soc_couns_percentages=[]\n", " for x in range(len(soc_counts)):\n", " soc_couns_percentages = soc_couns_percentages+[100 * soc_counts.loc[x,'count'] / total_soc_count]\n", "\n", " soc_counts.plot(kind='bar', x='soc_range', y='count', ax=ax)\n", " bars = ax.containers[0]\n", " # 在每个柱形上添加标签\n", " for i, rect in enumerate(bars):\n", " height = rect.get_height()\n", " soc_counts_i=soc_counts['count']\n", " ax.text(rect.get_x() + rect.get_width()/2 , height, f'{soc_counts_i[i]} ({soc_couns_percentages[i]:.1f}%)', ha='center', va='bottom')\n", "\n", " \n", " \n", " # for bar in bars:\n", " # height = bar.get_height()\n", " # ax.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)), ha='center', va='bottom')\n", " ax.set_xlabel('End_off_SOC')\n", " ax.set_ylabel('Count')\n", " \n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", "\n", " if len(sn)>1:\n", " ax.set_title('Total_End_off_SOC_Distribution')\n", " fig.savefig('{}/总放电截止SOC分布.jpg'.format(Drive_folder_name))\n", " else:\n", " ax.set_title('{}_End_off_SOC_Distribution'.format(sn[0]))\n", " fig.savefig('{}/{}_放电截止SOC分布.jpg'.format(sn_Drive_folder_name,sn[0]))\n", " ax.cla()\n", " plt.close()\n", " \n", " #满充比例 \n", " \n", "def _sn_Fullcharge_ratio(df,Charge_folder_name,fig, ax):\n", " \n", " #充电数据分析\n", " sn=list(set(df['sn'])) \n", " #创建路径\n", " if not os.path.exists(Charge_folder_name):\n", " os.mkdir(Charge_folder_name)\n", " sn_Charge_folder_name=Charge_folder_name+'/FullChareg_Ratio_sn'\n", " if not os.path.exists(sn_Charge_folder_name):\n", " os.mkdir(sn_Charge_folder_name) \n", " # FS_charge_ratio=# 统计每个状态出现的次数\n", " # fig,ax= plt.subplots(dpi=500)\n", " Full_charge_count=len(df[ (df['max_vol_ed']>3.6)])/len(df)\n", "\n", " # 创建饼图\n", " ax.pie([Full_charge_count, 1-Full_charge_count], labels=['Full_Charge', 'Not_Full_Charge'], autopct='%1.1f%%')\n", "\n", " # 添加标题\n", " \n", " if len(sn)>1:\n", " ax.set_title('Total_Full_Charge_Ratio')\n", " fig.savefig('{}/满充比例.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Full_Charge_Ratio'.format(sn[0]))\n", " fig.savefig('{}/{}_满充比例.jpg'.format(sn_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.close()\n", " \n", "def _sn_charge_ratio(df,Charge_folder_name,fig, ax):\n", " \n", " #充电数据分析\n", " sn=list(set(df['sn'])) \n", " #创建路径\n", " if not os.path.exists(Charge_folder_name):\n", " os.mkdir(Charge_folder_name)\n", " sn_Charge_folder_name=Charge_folder_name+'/Charg_Ratio_sn'\n", " if not os.path.exists(sn_Charge_folder_name):\n", " os.mkdir(sn_Charge_folder_name) \n", " # FS_charge_ratio=# 统计每个状态出现的次数\n", " # fig,ax= plt.subplots(dpi=500)\n", " status_count = df['status'].value_counts()\n", "\n", " # 计算1和2的比例\n", " if 1 in status_count:\n", " Fast_charge_count = status_count[1] / status_count.sum()\n", " else:\n", " Fast_charge_count=0\n", " if 2 in status_count:\n", " Slow_Charge_count = status_count[2] / status_count.sum()\n", " else:\n", " Slow_Charge_count=0\n", " # 创建饼图\n", " ax.pie([Fast_charge_count, Slow_Charge_count], labels=['Fast_charge', 'Slow_Charge'], autopct='%1.1f%%')\n", "\n", " # 添加标题\n", " \n", " if len(sn)>1:\n", " ax.set_title('Total_Fast_Slow_Charge_Ratio')\n", " fig.savefig('{}/快慢充比例.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Fast_Slow_Charge_Ratio'.format(sn[0]))\n", " fig.savefig('{}/{}_快慢充比例.jpg'.format(sn_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.close()\n", "def _sn_charge_depth(df,Charge_folder_name,fig, ax):\n", " #创建路径\n", " if not os.path.exists(Charge_folder_name):\n", " os.mkdir(Charge_folder_name)\n", " sn_Charge_folder_name=Charge_folder_name+'/Charg_Depth_sn'\n", " if not os.path.exists(sn_Charge_folder_name):\n", " os.mkdir(sn_Charge_folder_name) \n", " soc_series = df['delta_soc']\n", " sn=list(set(df['sn']))\n", "# 使用 cut() 方法按照 10 为间隔进行分组\n", " soc_grouped = pd.cut(soc_series, bins=range(0, 101, 10), include_lowest=True)\n", "\n", " # 对每个分组进行计数,并将计数结果保存到一个新的 DataFrame 中\n", " soc_counts = soc_grouped.value_counts().sort_index().reset_index()\n", " soc_counts.columns = ['soc_range', 'count']\n", "\n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", " soc_counts.plot(kind='bar', x='soc_range', y='count', ax=ax)\n", " bars = ax.containers[0]\n", "\n", " \n", " total_soc_count=soc_counts['count'].sum()\n", " # 计算每个类别的销售额占比\n", " soc_couns_percentages=[]\n", " for x in range(len(soc_counts)):\n", " soc_couns_percentages = soc_couns_percentages+[100 * soc_counts.loc[x,'count'] / total_soc_count]\n", "\n", " # 在每个柱形上添加标签\n", " for i, rect in enumerate(bars):\n", " height = rect.get_height()\n", " soc_counts_i=soc_counts['count']\n", " ax.text(rect.get_x() + rect.get_width()/2 , height, f'{soc_counts_i[i]} ({soc_couns_percentages[i]:.1f}%)', ha='center', va='bottom') \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " ax.set_ylabel('Count')\n", " ax.set_title('Charge_Depth_SOC_Distribution')\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", "\n", " if len(sn)>1:\n", " ax.set_xlabel('Charge_Depth_SOC')\n", " fig.savefig('{}/单次充电深度分布.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_xlabel('{}_Charge_Depth_SOC'.format(sn[0]))\n", " fig.savefig('{}/{}单次充电深度分布.jpg'.format(sn_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.close()\n", "def _sn_charge_maxtemp(df,Charge_folder_name,fig, ax):\n", " # 使用 cut() 方法按照 10 为间隔进行分组\n", " df=df[df['temp_max']<100]\n", " temp_series = df['temp_max']\n", " sn=list(set(df['sn']))\n", " #创建路径\n", " if not os.path.exists(Charge_folder_name):\n", " os.mkdir(Charge_folder_name)\n", " sn_Charge_folder_name=Charge_folder_name+'/Charg_TempMax_sn'\n", " if not os.path.exists(sn_Charge_folder_name):\n", " os.mkdir(sn_Charge_folder_name) \n", " \n", " temp_grouped = pd.cut(temp_series, bins=range(-20, 71, 10), include_lowest=True)\n", "\n", " # 对每个分组进行计数,并将计数结果保存到一个新的 DataFrame 中\n", " temp_counts = temp_grouped.value_counts().sort_index().reset_index()\n", " temp_counts.columns = ['TempMax', 'count']\n", "\n", "\n", " \n", " temp_counts.plot(kind='bar', x='TempMax', y='count', ax=ax)\n", " bars = ax.containers[0]\n", " # 在每个柱子的顶部添加数值\n", " for bar in bars:\n", " height = bar.get_height()\n", " ax.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)), ha='center', va='bottom')\n", " ax.set_xlabel('Charge_Temp_Max')\n", " ax.set_ylabel('Count')\n", " \n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " if len(sn)>1:\n", " ax.set_title('Charge_Temp_Max_Distribution')\n", " fig.savefig('{}/单次充电最高温度.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Charge_Temp_Max_Distribution'.format(sn[0]))\n", " fig.savefig('{}/{}单次充电最高温度.jpg'.format(sn_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " # plt.close()\n", "def _sn_charge_maxtemp_monthly(df,Charge_folder_name,fig, ax): \n", " df=df[df['temp_max']<100]\n", " temp_series = df['temp_max']\n", " sn=list(set(df['sn'])) \n", " sn_month_Charge_folder_name=Charge_folder_name+'/Charg_TempMax_sn_mounth'\n", " if not os.path.exists(sn_month_Charge_folder_name):\n", " os.mkdir(sn_month_Charge_folder_name) \n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", "\n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", "\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " temp_monthly = [group['temp_max'].tolist() for _, group in df.groupby(pd.Grouper(key='time_st', freq='M'))]\n", "\n", " # 将所有月份的温度数据合并到一个列表中\n", " all_temps = [temp for month_temps in temp_monthly for temp in month_temps]\n", "\n", " # 创建散点图\n", " ax.scatter(df['time_st'], all_temps)\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('Temperature(℃)')\n", " \n", " if len(sn)>1:\n", " ax.set_title('Max_Temperature by Month')\n", " fig.savefig('{}/充电高温_月份分布.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Max_Temperature by Month'.format(sn[0]))\n", " fig.savefig('{}/{}充电高温_月份分布.jpg'.format(sn_month_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", "\n", " \n", " \n", " \n", " \n", " \n", "\n", "\n", "\n", "\n", "#行车数据分析\n", "fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "_sn_start_charge_soc(combined_chg,Charge_folder_name,fig, ax)\n", "combined_chg.groupby('sn').apply(_sn_start_charge_soc,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n", "#充电数据分析\n", " #充电类型比例分析\n", "fig, ax = plt.subplots(dpi=500)\n", "_sn_charge_ratio(combined_chg,Charge_folder_name,fig, ax)\n", "combined_chg.groupby('sn').apply(_sn_charge_ratio,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", " #满充比例分析\n", "fig, ax = plt.subplots(dpi=500)\n", "_sn_Fullcharge_ratio(combined_chg,Charge_folder_name,fig, ax)\n", "combined_chg.groupby('sn').apply(_sn_Fullcharge_ratio,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", " #充电深度分析\n", "fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "_sn_charge_depth(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_charge_depth,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", " #充电温度度分析\n", "fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "_sn_charge_maxtemp(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_charge_maxtemp,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "_sn_charge_maxtemp_monthly(combined_chg,Charge_folder_name,fig, ax)\n", "combined_chg.groupby('sn').apply(_sn_charge_maxtemp_monthly,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#电压分布\n", "import pandas as pd\n", "import re\n", "from collections import Counter\n", "import matplotlib.pyplot as plt\n", "\n", "def _sn_volt_charge_end_voltgap(df,Charge_folder_name,fig, ax):\n", " sn=list(set(df['sn'])) \n", " df=df[(df['max_vol_ed']>3.47)&(df['max_vol_ed']<3.52)]\n", " df['delta_end_volt']=df['max_vol_ed']-df['min_vol_ed']\n", " \n", " sn_chgend_volgap_Charge_folder_name=Charge_folder_name+'/Charg_EndVolTgap_sn_mounth_35'\n", " if not os.path.exists(sn_chgend_volgap_Charge_folder_name):\n", " os.mkdir(sn_chgend_volgap_Charge_folder_name) \n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", "\n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", "\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " temp_monthly = [group['delta_end_volt'].tolist() for _, group in df.groupby(pd.Grouper(key='time_st', freq='M'))]\n", "\n", " # 将所有月份的温度数据合并到一个列表中\n", " all_temps = [temp for month_temps in temp_monthly for temp in month_temps]\n", "\n", " # 创建散点图\n", " ax.scatter(df['time_st'], all_temps)\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('delta_end_volt(V)')\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " if len(sn)>1:\n", " ax.set_title('Charg_Finish_VoltDiff by Month')\n", " fig.savefig('{}/充电压差_月份分布.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Charg_Finish_VoltDiff by Month'.format(sn[0]))\n", " fig.savefig('{}/{}充电压差_月份分布.jpg'.format(sn_chgend_volgap_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", " \n", "def _sn_volt_charge_st_voltgap(df,Charge_folder_name,fig, ax):\n", " sn=list(set(df['sn'])) \n", " df=df[(df['soc_st']<28)]\n", " df['delta_st_volt']=df['max_vol_st']-df['min_vol_st']\n", " \n", " sn_chgst_volgap_Charge_folder_name=Charge_folder_name+'/Charg_StVolTgap_sn_mounth'\n", " if not os.path.exists(sn_chgst_volgap_Charge_folder_name):\n", " os.mkdir(sn_chgst_volgap_Charge_folder_name) \n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", "\n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", "\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " temp_monthly = [group['delta_st_volt'].tolist() for _, group in df.groupby(pd.Grouper(key='time_st', freq='M'))]\n", "\n", " # 将所有月份的温度数据合并到一个列表中\n", " all_temps = [temp for month_temps in temp_monthly for temp in month_temps]\n", "\n", " # 创建散点图\n", " ax.scatter(df['time_st'], all_temps)\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('delta_st_volt(V)')\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " if len(sn)>1:\n", " ax.set_title('Charg_Start_VoltDiff by Month')\n", " fig.savefig('{}/充电起始压差_月份分布.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_Charg_Start_VoltDiff by Month'.format(sn[0]))\n", " fig.savefig('{}/{}充电起始压差_月份分布.jpg'.format(sn_chgst_volgap_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", " \n", "def _sn_volt_charge_end_volt_max(df,Charge_folder_name,fig, ax):\n", " sn=list(set(df['sn'])) \n", " df=df[(df['soc_end']>97.9)]\n", " #df=df[(df['max_vol_ed']>3.7)]\n", " \n", " sn_chgend_volgap_Charge_folder_name=Charge_folder_name+'/Charg_EndVoltMax_sn_mounth'\n", " if not os.path.exists(sn_chgend_volgap_Charge_folder_name):\n", " os.mkdir(sn_chgend_volgap_Charge_folder_name) \n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", "\n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", " df_l37=df[df['max_vol_ed']>3.7]\n", " df_s37=df[df['max_vol_ed']<=3.7]\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " temp_monthly_l37 = [group['max_vol_ed'].tolist() for _, group in df_l37.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " all_temps_l37 = [temp for month_temps in temp_monthly_l37 for temp in month_temps]\n", " \n", " temp_monthly_s37 = [group['max_vol_ed'].tolist() for _, group in df_s37.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " all_temps_s37 = [temp for month_temps in temp_monthly_s37 for temp in month_temps]\n", " # 创建散点图\n", " ax.scatter(df_l37['time_st'], all_temps_l37,color='red')\n", " ax.scatter(df_s37['time_st'], all_temps_s37,color='green')\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('max_vol_ed(V)')\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " if len(sn)>1:\n", " ax.set_title('FullCharg_Finish_MaxVol by Month')\n", " fig.savefig('{}/满充最高电压_月份分布.jpg'.format(Charge_folder_name))\n", " else:\n", " ax.set_title('{}_FullCharg_Finish_MaxVol by Month'.format(sn[0]))\n", " fig.savefig('{}/{}满充最高电压_月份分布.jpg'.format(sn_chgend_volgap_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "def _sn_volt_max_min_loc(df,Charge_folder_name,fig, ax):\n", " sn=list(set(df['sn']))\n", " df=df[df['soc_st']<30]\n", " #创建路径\n", " if not os.path.exists(Charge_folder_name):\n", " os.mkdir(Charge_folder_name)\n", " sn_peakvolindex_Charge_folder_name=Charge_folder_name+'/Charg_peakvoltindex_sn'\n", " if not os.path.exists(sn_peakvolindex_Charge_folder_name):\n", " os.mkdir(sn_peakvolindex_Charge_folder_name) \n", "\n", " # 假设你的 DataFrame 名称为 df,包含一个名为 minvolindex 的列\n", " # 使用正则表达式提取所有数字\n", " numbers_ed = df['max_vol_idx_ed'].apply(lambda x: [int(i) for i in re.findall('\\d+', x)])\n", "\n", " # 将提取的数字组成一个列表\n", " result_ed = [x for sublist in numbers_ed for x in sublist]\n", " counter_ed = Counter(result_ed)\n", " # 打印结果\n", " numbers_st = df['min_vol_idx_st'].apply(lambda x: [int(i) for i in re.findall('\\d+', x)])\n", "\n", " # 将提取的数字组成一个列表\n", " result_st = [x for sublist in numbers_st for x in sublist]\n", " counter_st = Counter(result_st)\n", "\n", " # 假设你已经有一个名为 counter 的 Counter 对象\n", " # 提取计数和标签数据\n", " counts_ed = list(counter_ed.values())\n", " labels_ed = list(counter_ed.keys())\n", " counts_st = list(counter_st.values())\n", " labels_st = list(counter_st.keys())\n", " greater_than_50_st = [i for i in range(len(counts_st)) if counts_st[i] > 50]\n", " greater_than_50_ed = [i for i in range(len(counts_ed)) if counts_ed[i] > 50]\n", " # 创建柱状图\n", " \n", " ax.scatter(labels_ed, counts_ed, s=2, c='red',label='finish_charge_max_volt_index')\n", " ax.scatter(labels_st, counts_st, s=2, c='green',label='start_charge_min_volt_index')\n", " \n", " \n", " # Add labels for counts greater than 50\n", " for i in greater_than_50_st:\n", " ax.annotate(labels_st[i], (labels_st[i], counts_st[i]))\n", " # Add labels for counts greater than 50\n", " for i in greater_than_50_ed:\n", " ax.annotate(labels_ed[i], (labels_ed[i], counts_ed[i])) \n", " # 添加标签和标题\n", " ax.set_xlabel('cell_number')\n", " ax.set_ylabel('appear_times')\n", " ax.set_title('{}_low_soc_cell_number_count'.format(sn[0]))\n", " # 添加图例\n", " ax.legend(loc='upper right')\n", " # 自动调整 X 轴标签角度和位置\n", " #plt.xticks(rotation=45, ha='right')\n", " if len(sn)>1:\n", " fig.savefig('{}/充电起止最值电压分布.jpg'.format(Charge_folder_name))\n", " else:\n", " fig.savefig('{}/{}充电起止最值电压分布.jpg'.format(sn_peakvolindex_Charge_folder_name,sn[0]))\n", " ax.cla()\n", " # plt.clf()\n", " # plt.close() \n", "#充电起止电压排名 \n", "fig, ax = plt.subplots(dpi=500)\n", "_sn_volt_max_min_loc(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_volt_max_min_loc,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "#满充压差\n", "fig, ax = plt.subplots(figsize=(16,9),dpi=500)\n", "_sn_volt_charge_end_voltgap(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_volt_charge_end_voltgap,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n", "fig, ax = plt.subplots(figsize=(16,9),dpi=500)\n", "_sn_volt_charge_end_volt_max(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_volt_charge_end_volt_max,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n", "fig, ax = plt.subplots(figsize=(16,9),dpi=500)\n", "_sn_volt_charge_st_voltgap(combined_chg,Charge_folder_name,fig, ax) \n", "combined_chg.groupby('sn').apply(_sn_volt_charge_st_voltgap,Charge_folder_name,fig, ax)\n", "plt.cla()\n", "plt.clf()\n", "plt.close()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#能耗分析\n", "def _sn_energy_consumption(df,Drive_folder_name,fig, ax):\n", " sn=list(set(df['sn']))\n", " if not os.path.exists(Drive_folder_name):\n", " os.mkdir(Drive_folder_name)\n", " sn_Drive_EnergyCoum_folder_name=Drive_folder_name+'/Dscharge_EnergyConsm_sn'\n", " if not os.path.exists(sn_Drive_EnergyCoum_folder_name):\n", " os.mkdir(sn_Drive_EnergyCoum_folder_name) \n", " # 计算差值并除以能量\n", " if len(sn) ==1:\n", " df=df.sort_values('time_st', ascending=True)\n", " diff = df['chrgkwh']/df['start_odo'].diff()\n", " # 在 DataFrame 中添加新的一列\n", " df['enrgy_cons'] = diff*100\n", " df=df[df['enrgy_cons']<500]\n", " df.dropna(subset=['enrgy_cons'], inplace=True)\n", " \n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", "\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " else:\n", " pass\n", " df_hec=df[df['enrgy_cons']>300]\n", " df_nec=df[(df['enrgy_cons']<=300) & (df['enrgy_cons']>100)]\n", " df_lec=df[(df['enrgy_cons']<=100) & (df['enrgy_cons']>50)]\n", " ec_monthly_hec = [group['enrgy_cons'].tolist() for _, group in df_hec.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " ec_monthly_nec = [group['enrgy_cons'].tolist() for _, group in df_nec.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " ec_monthly_lec = [group['enrgy_cons'].tolist() for _, group in df_lec.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " # 将所有月份的温度数据合并到一个列表中\n", " all_ec_monthly_hec = [temp for month_temps in ec_monthly_hec for temp in month_temps]\n", " all_ec_monthly_nec = [temp for month_temps in ec_monthly_nec for temp in month_temps]\n", " all_ec_monthly_lec = [temp for month_temps in ec_monthly_lec for temp in month_temps]\n", " # 创建散点图\n", " ax.scatter(df_hec['time_st'], all_ec_monthly_hec,color='red')\n", " ax.scatter(df_nec['time_st'], all_ec_monthly_nec,color='blue')\n", " ax.scatter(df_lec['time_st'], all_ec_monthly_lec,color='green')\n", " ax.set_xlabel('Month')\n", " ax.set_ylabel('EnerygCosump(kwh/100km)')\n", " \n", " if len(sn)>1:\n", " ax.set_title('EnerygCosump by Month')\n", " fig.savefig('{}/行驶能耗统计.jpg'.format(Drive_folder_name))\n", " else:\n", " ax.set_title('{}_EnerygCosump by Month'.format(sn[0]))\n", " fig.savefig('{}/{}行驶能耗统计.jpg'.format(sn_Drive_EnergyCoum_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", " return df \n", " \n", "\n", " \n", "def _sn_drive_maxtemp_monthly(df,folder_name,fig, ax): \n", " df=df[df['temp_max']<100]\n", " temp_series = df['temp_max']\n", " sn=list(set(df['sn'])) \n", " sn_month_DsCharge_folder_name=folder_name+'/Dsharg_TempMax_sn_mounth'\n", " if not os.path.exists(sn_month_DsCharge_folder_name):\n", " os.mkdir(sn_month_DsCharge_folder_name) \n", "\n", " # fig, ax = plt.subplots(figsize=(16, 9),dpi=500)\n", "\n", "\n", " # 读取数据,并将时间列转换为datetime类型\n", " df['time_st'] = pd.to_datetime(df['time_st'])\n", "\n", " # 将数据按月分组,将每个月的温度数据存储在一个列表中\n", " df_htmp=df[df['temp_max']>50]\n", " df_ntemp=df[(df['temp_max']<=50) & (df['temp_min']>-10)]\n", " df_ltemp=df[(df['temp_min']<=-10)]\n", " if len(df_htmp):\n", " ec_monthly_htmp = [group['temp_max'].tolist() for _, group in df_htmp.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " all_ec_monthly_hec = [temp for month_temps in ec_monthly_htmp for temp in month_temps]\n", " ax.scatter(df_htmp['time_st'], all_ec_monthly_hec,color='red')\n", " if len(df_ntemp):\n", " ec_monthly_ntmp = [group['temp_max'].tolist() for _, group in df_ntemp.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " all_ec_monthly_nec = [temp for month_temps in ec_monthly_ntmp for temp in month_temps]\n", " ax.scatter(df_ntemp['time_st'], all_ec_monthly_nec,color='blue')\n", " if len(df_ntemp):\n", " ec_monthly_ltmp = [group['temp_min'].tolist() for _, group in df_ltemp.groupby(pd.Grouper(key='time_st', freq='M'))]\n", " all_ec_monthly_lec = [temp for month_temps in ec_monthly_ltmp for temp in month_temps]\n", " ax.scatter(df_ltemp['time_st'], all_ec_monthly_lec,color='green')\n", " # 将所有月份的温度数据合并到一个列表中\n", " \n", " \n", " \n", " # 创建散点图\n", " \n", " \n", " \n", " \n", " \n", " if len(sn)>1:\n", " ax.set_title('Max_Temperature by Month')\n", " fig.savefig('{}/放电温度_月份分布.jpg'.format(folder_name))\n", " else:\n", " ax.set_title('{}_Max_Temperature by Month'.format(sn[0]))\n", " fig.savefig('{}/{}放电温度温_月份分布.jpg'.format(sn_month_DsCharge_folder_name,sn[0]))\n", " ax.cla()\n", " plt.tight_layout()\n", " plt.xticks(rotation=0)\n", " plt.close() \n", " \n", " \n", " \n", "# fig, ax = plt.subplots(figsize=(16,9),dpi=500)\n", "\n", "# df_energy_coump=combined_chg.groupby('sn').apply(_sn_energy_consumption,Drive_folder_name,fig, ax)\n", "# _sn_energy_consumption(df_energy_coump,Drive_folder_name,fig, ax) \n", "# plt.cla()\n", "# plt.clf()\n", "# plt.close()\n", "\n", "\n", " \n", "fig, ax = plt.subplots(figsize=(16,9),dpi=500)\n", "_sn_drive_maxtemp_monthly(combined_df_drive,Drive_folder_name,fig, ax) \n", "df_energy_coump=combined_df_drive.groupby('sn').apply(_sn_drive_maxtemp_monthly,Drive_folder_name,fig, ax)\n", "\n", "plt.cla()\n", "plt.clf()\n", "plt.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "import docx\n", "\n", "def generate_table_of_contents(headings):\n", " table_of_contents = \"Table of Contents\\n\\n\"\n", " for level, text in headings:\n", " indent = \" \" * (level - 1)\n", " table_of_contents += f\"{indent}- {text}\\n\"\n", " return table_of_contents\n", "\n", "# 打开要处理的 Word 文档\n", "doc_path = \"日信息统计结果.docx\" # 替换为你的文档路径\n", "doc = docx.Document(doc_path)\n", "\n", "# 读取原始内容\n", "original_content = []\n", "for paragraph in doc.paragraphs:\n", " original_content.append(paragraph.text)\n", "\n", "# 获取标题列表\n", "headings = []\n", "for paragraph in doc.paragraphs:\n", " if paragraph.style.name.startswith('Heading'):\n", " level = int(paragraph.style.name.split()[-1])\n", " text = paragraph.text\n", " headings.append((level, text))\n", "\n", "# 生成目录字符串\n", "table_of_contents = generate_table_of_contents(headings)\n", "\n", "# 创建一个新的 Word 文档对象\n", "new_doc = docx.Document()\n", "\n", "# 插入目录到新文档\n", "new_doc.add_paragraph(table_of_contents, style='Normal')\n", "\n", "# 将原始内容插入到新文档中\n", "for paragraph_text in original_content:\n", " new_doc.add_paragraph(paragraph_text, style='Normal')\n", "\n", "# 保存新文档\n", "new_doc.save(\"测试结果.docx\") # 替换为适当的文件名\n" ] } ], "metadata": { "kernelspec": { "display_name": "py3916", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }