user_analysis.py 4.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import pandas as pd
  2. import os
  3. from tqdm import tqdm
  4. from matplotlib import pyplot as plt
  5. import numpy as np
  6. # import defaultdict
  7. from collections import defaultdict
  8. from tqdm import tqdm
  9. # create a dict
  10. dict_col = {'fst_acc_rank_2':'driving_behavior',
  11. 'dschrgah_rank_2': 'discharge_ah',
  12. 'temp_min_75_rank_2' : 'low_temp',
  13. 'delta_odo_rank_2' : 'driving_behavior',
  14. 'temp_35_rank_2' : 'high_temp',
  15. 'temp_max_25_rank_2' : 'high_temp',
  16. 'temp_time_30_rank_2' : 'high_temp',
  17. 'spd_mean_rank_2' : 'driving_speed',
  18. 'maxspd_rank_2' : 'driving_speed',
  19. 'temp_max_75_rank_2' : 'high_temp',
  20. 'temp_min_25_rank_2' : 'low_temp',
  21. 'temp_10_rank_2' : 'low_temp',
  22. 'accon_mean_rank_2' : 'driving_behavior',
  23. 'meancrnt_rank_2' : 'current',
  24. 'temp_time_15_rank_2' : 'low_temp',
  25. 'cellvol_max_75_rank_2' : 'volt',
  26. 'sts_flg_rank_2': 'charging_behavior',
  27. 'chrgah_rank_2': 'charge_ah',
  28. 'packvol_max_25_rank_2': 'volt',
  29. 'packvol_max_75_rank_2': 'volt',
  30. 'cellvol_max_25_rank_2': 'volt',
  31. 'full_chrg_flg_rank_2': 'charging_behavior',
  32. }
  33. default_value = 'other'
  34. dict_col = defaultdict(lambda: default_value, dict_col)
  35. if __name__ == '__main__':
  36. pack_code_list = os.listdir('./dataframes/')
  37. for pack_code in tqdm(pack_code_list):
  38. for name in ['charge', 'drive']:
  39. df_new = pd.DataFrame()
  40. group_list = os.listdir(f'./dataframes/{pack_code}/{name}/')
  41. df_group = pd.DataFrame()
  42. for group in group_list:
  43. df_group = pd.concat([df_group, pd.read_csv(f'./dataframes/{pack_code}/{name}/{group}')])
  44. df_group_week_list = list(df_group.groupby('week'))
  45. df_group = pd.DataFrame()
  46. df_group_rank = pd.DataFrame()
  47. for week, df_group_week in df_group_week_list:
  48. # check if df_group_week has zero values, if so, replace them with np.nan
  49. df_group_week_0 = df_group_week[df_group_week.columns[3:]].replace(0, np.nan)
  50. df_group_week_rank = df_group_week_0.rank(axis=0, method='min', ascending=False).fillna(0).astype(int)
  51. # rename the columns of df_group_week_rank by adding '_rank' to the original column names
  52. df_group_week_rank.columns = [f'{col}_rank_2' for col in df_group_week_rank.columns]
  53. # merge df_group_week and df_group_week_rank
  54. df_group_week = pd.concat([df_group_week, df_group_week_rank], axis=1)
  55. df_group = pd.concat([df_group, df_group_week])
  56. df_group_rank = pd.concat([df_group_rank, df_group_week_rank])
  57. df_rank_arr = np.array(df_group_rank)
  58. # find index that has at least one element that is > 0 and < 20
  59. vins = df_group.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['vin'].tolist()
  60. weeks = df_group.iloc[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[0]]['week'].tolist()
  61. cols = df_group_rank.columns[np.where((df_rank_arr > 0) & (df_rank_arr < 20))[1]].tolist()
  62. cols_cat = [dict_col[col] for col in cols]
  63. value = df_rank_arr[np.where((df_rank_arr > 0) & (df_rank_arr < 20))]
  64. # make a new dataframe
  65. df_new = pd.DataFrame({'vin':vins, 'mode':name, 'week':weeks, 'col':cols, 'cols_cat':cols_cat, 'value':value})
  66. df_new.reset_index(drop=True, inplace=True)
  67. df_new = df_new.loc[df_new.groupby(['vin', 'mode', 'week', 'cols_cat'])['value'].idxmin()].reset_index(drop=True)
  68. df_new_2 = df_new.groupby(['vin', 'week', 'cols_cat'])['value'].count().unstack().fillna(0).astype(int)
  69. df_new_2 = df_new_2.apply(lambda x: round(x/x.max(), 2), axis=1)
  70. df_new_2.to_csv(f'./pack/df_{name}_{pack_code}.csv')