12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485 |
- import pandas as pd
- import os
- from tqdm import tqdm
- from matplotlib import pyplot as plt
- import numpy as np
- # import defaultdict
- from collections import defaultdict
- from tqdm import tqdm
- pd.set_option('display.max_columns', 500)
- pd.set_option('display.max_rows', 500)
- # create a dict
- dict_col = {'fst_acc_rank_2':'driving_behavior',
- 'dschrgah_rank_2': 'discharge_ah',
- 'temp_min_75_rank_2' : 'temp_min',
- 'delta_odo_rank_2' : 'odo',
- 'temp_35_rank_2' : 'high_temp',
- 'temp_max_25_rank_2' : 'temp_max',
- 'temp_time_30_rank_2' : 'high_temp',
- 'spd_mean_rank_2' : 'driving_speed',
- 'maxspd_rank_2' : 'driving_speed',
- 'temp_max_75_rank_2' : 'temp_max',
- 'temp_min_25_rank_2' : 'temp_min',
- 'temp_10_rank_2' : 'low_temp',
- 'accon_mean_rank_2' : 'driving_behavior',
- 'meancrnt_rank_2' : 'current',
- 'temp_time_15_rank_2' : 'low_temp',
- 'cellvol_max_75_rank_2' : 'cellvol',
- 'sts_flg_rank_2': 'charge_status',
- 'chrgah_rank_2': 'charge_ah',
- 'packvol_max_25_rank_2': 'packvol',
- 'packvol_max_75_rank_2': 'packvol',
- 'cellvol_max_25_rank_2': 'cellvol',
- 'full_chrg_flg_rank_2': 'charge_status',
- }
- default_value = 'other'
- dict_col = defaultdict(lambda: default_value, dict_col)
- def eva(x):
- try:
- return list(eval(x))
- except:
- return [0,0,0]
-
- df_new = pd.DataFrame()
- vin_list = os.listdir('./data/')
- for vin in tqdm(vin_list):
- df_new = pd.DataFrame()
- for name in ['drive', 'charge']:
- df = pd.read_csv(f'./{name}/group_{vin}.csv')
- # select all the columns with 'rank' in the name
- df_rank = df.filter(regex='rank')
- df_basic = df.drop(columns=(df_rank.columns.tolist() + df.columns[:3].tolist()))
- df_rank.columns = [col+'_2' for col in df_rank.columns]
- # set df_basic to round 3
- df_basic = df_basic.round(3)
- df = pd.concat([df[df.columns[:3]], df_basic, df_rank], axis=1)
- #sort column names
- df = df.reindex((df.columns[:3].tolist() + sorted(df.columns[3:])), axis=1)
- # select all the columns with 'rank' in the name
- df_rank = df.filter(regex='rank_2')
- df_rank_arr = np.array(df_rank)
- # find index that has at least one element that is > 0 and < 20
- vins = df.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['vin'].tolist()
- weeks = df.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['week'].tolist()
- cols = df_rank.columns[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[1]].tolist()
- cols_cat = [dict_col[col] for col in cols]
- value = df_rank_arr[np.where((df_rank_arr > 0) & (df_rank_arr < 20))]
- # make a new dataframe
- df_new = pd.concat([df_new, pd.DataFrame({'vin':vins, 'mode':name, 'week':weeks, 'col':cols, 'cols_cat':cols_cat, 'value':value})], axis=0)
- df_new.reset_index(drop=True, inplace=True)
- df_new = df_new.loc[df_new.groupby(['vin', 'mode', 'week', 'cols_cat'])['value'].idxmin()].reset_index(drop=True)
- df_new_1 = df_new.groupby(['mode', 'vin', 'cols_cat'])['value'].count().unstack().fillna(0).astype(int)
- df_new_2 = df_new.groupby(['vin', 'cols_cat'])['value'].count().unstack().fillna(0).astype(int)
- # sum all the columns grouped by mode
- df_new_1.loc[('drive','total'), :] = df_new_1.loc['drive'].sum(axis=0)
- df_new_1.loc[('charge','total'), :] = df_new_1.loc['charge'].sum(axis=0)
- # sum all the rows
- df_new_2['total'] = df_new_2.sum(axis=1)
- df_new_2 = df_new_2.sort_values(by='total', ascending=False)
- df_new_1.to_csv(f'./df_new_{vin}_1.csv')
- df_new_2.to_csv(f'./df_new_{vin}_2.csv')
-
|