get_dataframes.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import pandas as pd
  2. import os
  3. import numpy as np
  4. from tqdm import tqdm
  5. from datetime import datetime
  6. pd.set_option('display.max_columns', 500)
  7. pd.set_option('display.max_rows', 500)
  8. path = './data/'
  9. out_path = './dataframes/'
  10. temp_thresh_max_c = 35
  11. temp_thresh_min_c = 10
  12. mean_crnt_thresh_c = 1
  13. temp_thresh_max_d = 35
  14. temp_thresh_min_d = 10
  15. mean_crnt_thresh_d = 0.1
  16. if __name__ == '__main__':
  17. test_vin_list = os.listdir(path)
  18. for test_vin in test_vin_list:
  19. if not os.path.exists(out_path+test_vin+'/charge/'):
  20. os.makedirs(out_path+test_vin+'/charge/')
  21. if not os.path.exists(out_path+test_vin+'/drive/'):
  22. os.makedirs(out_path+test_vin+'/drive/')
  23. crg_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('charge_proc_di.csv')]
  24. for crg_file in tqdm(crg_file_list):
  25. df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \
  26. 'chrgah', 'meancrnt', 'sts_flg', 'full_chrg_flg', 'cellvol_max_25', 'cellvol_max_75', 'packvol_max_25', 'packvol_max_75', 'temp_time_15', 'temp_time_30'])
  27. vin = crg_file.split('_')[0]
  28. df_data = pd.read_csv(path+test_vin+'/' + crg_file)
  29. # convert dt from int to datetime
  30. df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d'))
  31. df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)]
  32. if len(df_data) == 0:
  33. continue
  34. df_data['wk'] = df_data['dt'].dt.isocalendar().week
  35. pack_model = df_data['pack_model'][0]
  36. df_data_dt_list = list(df_data.groupby('wk'))
  37. for date, df_data_dt in df_data_dt_list:
  38. temp_max_list = df_data_dt['temp_max'].tolist()
  39. temp_max_25 = np.quantile(temp_max_list, 0.25)
  40. temp_max_75 = np.quantile(temp_max_list, 0.75)
  41. temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_c])
  42. temp_min_list = df_data_dt['temp_min'].tolist()
  43. temp_min_25 = np.quantile(temp_min_list, 0.25)
  44. temp_min_75 = np.quantile(temp_min_list, 0.75)
  45. temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_c])
  46. chrgah = df_data_dt['chrgah'].sum()
  47. meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_c])
  48. sts_flg = len([sts for sts in df_data_dt['sts_flg'].tolist() if sts == 1])
  49. full_chrg_flg = len([full_chrg for full_chrg in df_data_dt['full_chrg_flg'].tolist() if full_chrg == 1])
  50. cellvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['cellvol_max'].tolist()
  51. if len(cellvol_max_list) == 0:
  52. cellvol_max_25 = 0
  53. cellvol_max_75 = 0
  54. else:
  55. cellvol_max_25 = np.quantile(cellvol_max_list, 0.25)
  56. cellvol_max_75 = np.quantile(cellvol_max_list, 0.75)
  57. packvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['packvol_max'].tolist()
  58. if len(packvol_max_list) == 0:
  59. packvol_max_25 = 0
  60. packvol_max_75 = 0
  61. else:
  62. packvol_max_25 = np.quantile(packvol_max_list, 0.25)
  63. packvol_max_75 = np.quantile(packvol_max_list, 0.75)
  64. temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum()
  65. temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum()
  66. df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \
  67. chrgah, meancrnt, sts_flg, full_chrg_flg, cellvol_max_25, cellvol_max_75, packvol_max_25, packvol_max_75, temp_time_15, temp_time_30]
  68. df_user.to_csv(out_path+test_vin+'/charge/' +f'{vin}.csv', index=False)
  69. drv_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('drive_proc_di.csv')]
  70. for drv_file in tqdm(drv_file_list):
  71. df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \
  72. 'delta_odo', 'dschrgah', 'meancrnt','temp_time_15', 'temp_time_30', 'spd_mean', 'accon_mean', 'fst_acc', 'maxspd'])
  73. vin = drv_file.split('_')[0]
  74. df_data = pd.read_csv(path+test_vin+'/' + drv_file)
  75. # convert dt from int to datetime
  76. df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d'))
  77. df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)]
  78. if len(df_data) == 0:
  79. continue
  80. df_data['wk'] = df_data['dt'].dt.isocalendar().week
  81. pack_model = df_data['pack_model'][0]
  82. df_data_dt_list = list(df_data.groupby('wk'))
  83. for date, df_data_dt in df_data_dt_list:
  84. temp_max_list = df_data_dt['temp_max'].tolist()
  85. temp_max_25 = np.quantile(temp_max_list, 0.25)
  86. temp_max_75 = np.quantile(temp_max_list, 0.75)
  87. temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_d])
  88. temp_min_list = df_data_dt['temp_min'].tolist()
  89. temp_min_25 = np.quantile(temp_min_list, 0.25)
  90. temp_min_75 = np.quantile(temp_min_list, 0.75)
  91. temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_d])
  92. delta_odo = df_data_dt['delta_odo'].sum()
  93. dschrg_ah = df_data_dt['dschrg_ah'].sum()
  94. spd_mean = df_data_dt['spd_mean'].max()
  95. accon_mean = df_data_dt['accon_mean'].max()
  96. fst_acc = (df_data_dt['fst_acc_pls']+df_data_dt['fst_acc_mus']+df_data_dt['fst_acc_trn']).sum()
  97. meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_d])
  98. maxspd = df_data_dt['maxspd'].max()
  99. temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum()
  100. temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum()
  101. df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \
  102. delta_odo, dschrg_ah, meancrnt, temp_time_15, temp_time_30, spd_mean, accon_mean, fst_acc, maxspd]
  103. df_user.to_csv(out_path+test_vin+'/drive/' +f'{vin}.csv', index=False)