12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- import pandas as pd
- import os
- from tqdm import tqdm
- from matplotlib import pyplot as plt
- import numpy as np
- # import defaultdict
- from collections import defaultdict
- from tqdm import tqdm
- # create a dict
- dict_col = {'fst_acc_rank_2':'driving_behavior',
- 'dschrgah_rank_2': 'discharge_ah',
- 'temp_min_75_rank_2' : 'low_temp',
- 'delta_odo_rank_2' : 'driving_behavior',
- 'temp_35_rank_2' : 'high_temp',
- 'temp_max_25_rank_2' : 'high_temp',
- 'temp_time_30_rank_2' : 'high_temp',
- 'spd_mean_rank_2' : 'driving_speed',
- 'maxspd_rank_2' : 'driving_speed',
- 'temp_max_75_rank_2' : 'high_temp',
- 'temp_min_25_rank_2' : 'low_temp',
- 'temp_10_rank_2' : 'low_temp',
- 'accon_mean_rank_2' : 'driving_behavior',
- 'meancrnt_rank_2' : 'current',
- 'temp_time_15_rank_2' : 'low_temp',
- 'cellvol_max_75_rank_2' : 'volt',
- 'sts_flg_rank_2': 'charging_behavior',
- 'chrgah_rank_2': 'charge_ah',
- 'packvol_max_25_rank_2': 'volt',
- 'packvol_max_75_rank_2': 'volt',
- 'cellvol_max_25_rank_2': 'volt',
- 'full_chrg_flg_rank_2': 'charging_behavior',
- }
- default_value = 'other'
- dict_col = defaultdict(lambda: default_value, dict_col)
- if __name__ == '__main__':
- pack_code_list = os.listdir('./dataframes/')
- for pack_code in tqdm(pack_code_list):
-
- for name in ['charge', 'drive']:
- df_new = pd.DataFrame()
- group_list = os.listdir(f'./dataframes/{pack_code}/{name}/')
- df_group = pd.DataFrame()
- for group in group_list:
- df_group = pd.concat([df_group, pd.read_csv(f'./dataframes/{pack_code}/{name}/{group}')])
-
- df_group_week_list = list(df_group.groupby('week'))
- df_group = pd.DataFrame()
- df_group_rank = pd.DataFrame()
-
- for week, df_group_week in df_group_week_list:
- # check if df_group_week has zero values, if so, replace them with np.nan
- df_group_week_0 = df_group_week[df_group_week.columns[3:]].replace(0, np.nan)
- df_group_week_rank = df_group_week_0.rank(axis=0, method='min', ascending=False).fillna(0).astype(int)
- # rename the columns of df_group_week_rank by adding '_rank' to the original column names
- df_group_week_rank.columns = [f'{col}_rank_2' for col in df_group_week_rank.columns]
-
- # merge df_group_week and df_group_week_rank
- df_group_week = pd.concat([df_group_week, df_group_week_rank], axis=1)
- df_group = pd.concat([df_group, df_group_week])
- df_group_rank = pd.concat([df_group_rank, df_group_week_rank])
- df_rank_arr = np.array(df_group_rank)
- # find index that has at least one element that is > 0 and < 20
- vins = df_group.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['vin'].tolist()
- weeks = df_group.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['week'].tolist()
- cols = df_group_rank.columns[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[1]].tolist()
- cols_cat = [dict_col[col] for col in cols]
- value = df_rank_arr[np.where((df_rank_arr > 0) & (df_rank_arr < 20))]
- # make a new dataframe
- df_new = pd.DataFrame({'vin':vins, 'mode':name, 'week':weeks, 'col':cols, 'cols_cat':cols_cat, 'value':value})
- df_new.reset_index(drop=True, inplace=True)
- df_new = df_new.loc[df_new.groupby(['vin', 'mode', 'week', 'cols_cat'])['value'].idxmin()].reset_index(drop=True)
- df_new_2 = df_new.groupby(['vin', 'week', 'cols_cat'])['value'].count().unstack().fillna(0).astype(int)
- df_new_2 = df_new_2.apply(lambda x: round(x/x.max(), 2), axis=1)
- df_new_2.to_csv(f'./pack/df_{name}_{pack_code}.csv')
-
-
-
-
-
|