import pandas as pd import os import numpy as np from tqdm import tqdm from datetime import datetime pd.set_option('display.max_columns', 500) pd.set_option('display.max_rows', 500) path = './data/' out_path = './dataframes/' temp_thresh_max_c = 35 temp_thresh_min_c = 10 mean_crnt_thresh_c = 1 temp_thresh_max_d = 35 temp_thresh_min_d = 10 mean_crnt_thresh_d = 0.1 if __name__ == '__main__': test_vin_list = os.listdir(path) for test_vin in test_vin_list: if not os.path.exists(out_path+test_vin+'/charge/'): os.makedirs(out_path+test_vin+'/charge/') if not os.path.exists(out_path+test_vin+'/drive/'): os.makedirs(out_path+test_vin+'/drive/') crg_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('charge_proc_di.csv')] for crg_file in tqdm(crg_file_list): df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \ 'chrgah', 'meancrnt', 'sts_flg', 'full_chrg_flg', 'cellvol_max_25', 'cellvol_max_75', 'packvol_max_25', 'packvol_max_75', 'temp_time_15', 'temp_time_30']) vin = crg_file.split('_')[0] df_data = pd.read_csv(path+test_vin+'/' + crg_file) # convert dt from int to datetime df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d')) df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)] if len(df_data) == 0: continue df_data['wk'] = df_data['dt'].dt.isocalendar().week pack_model = df_data['pack_model'][0] df_data_dt_list = list(df_data.groupby('wk')) for date, df_data_dt in df_data_dt_list: temp_max_list = df_data_dt['temp_max'].tolist() temp_max_25 = np.quantile(temp_max_list, 0.25) temp_max_75 = np.quantile(temp_max_list, 0.75) temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_c]) temp_min_list = df_data_dt['temp_min'].tolist() temp_min_25 = np.quantile(temp_min_list, 0.25) temp_min_75 = np.quantile(temp_min_list, 0.75) temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_c]) chrgah = df_data_dt['chrgah'].sum() meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_c]) sts_flg = len([sts for sts in df_data_dt['sts_flg'].tolist() if sts == 1]) full_chrg_flg = len([full_chrg for full_chrg in df_data_dt['full_chrg_flg'].tolist() if full_chrg == 1]) cellvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['cellvol_max'].tolist() if len(cellvol_max_list) == 0: cellvol_max_25 = 0 cellvol_max_75 = 0 else: cellvol_max_25 = np.quantile(cellvol_max_list, 0.25) cellvol_max_75 = np.quantile(cellvol_max_list, 0.75) packvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['packvol_max'].tolist() if len(packvol_max_list) == 0: packvol_max_25 = 0 packvol_max_75 = 0 else: packvol_max_25 = np.quantile(packvol_max_list, 0.25) packvol_max_75 = np.quantile(packvol_max_list, 0.75) temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum() temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum() df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \ chrgah, meancrnt, sts_flg, full_chrg_flg, cellvol_max_25, cellvol_max_75, packvol_max_25, packvol_max_75, temp_time_15, temp_time_30] df_user.to_csv(out_path+test_vin+'/charge/' +f'{vin}.csv', index=False) drv_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('drive_proc_di.csv')] for drv_file in tqdm(drv_file_list): df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \ 'delta_odo', 'dschrgah', 'meancrnt','temp_time_15', 'temp_time_30', 'spd_mean', 'accon_mean', 'fst_acc', 'maxspd']) vin = drv_file.split('_')[0] df_data = pd.read_csv(path+test_vin+'/' + drv_file) # convert dt from int to datetime df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d')) df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)] if len(df_data) == 0: continue df_data['wk'] = df_data['dt'].dt.isocalendar().week pack_model = df_data['pack_model'][0] df_data_dt_list = list(df_data.groupby('wk')) for date, df_data_dt in df_data_dt_list: temp_max_list = df_data_dt['temp_max'].tolist() temp_max_25 = np.quantile(temp_max_list, 0.25) temp_max_75 = np.quantile(temp_max_list, 0.75) temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_d]) temp_min_list = df_data_dt['temp_min'].tolist() temp_min_25 = np.quantile(temp_min_list, 0.25) temp_min_75 = np.quantile(temp_min_list, 0.75) temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_d]) delta_odo = df_data_dt['delta_odo'].sum() dschrg_ah = df_data_dt['dschrg_ah'].sum() spd_mean = df_data_dt['spd_mean'].max() accon_mean = df_data_dt['accon_mean'].max() fst_acc = (df_data_dt['fst_acc_pls']+df_data_dt['fst_acc_mus']+df_data_dt['fst_acc_trn']).sum() meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_d]) maxspd = df_data_dt['maxspd'].max() temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum() temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum() df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \ delta_odo, dschrg_ah, meancrnt, temp_time_15, temp_time_30, spd_mean, accon_mean, fst_acc, maxspd] df_user.to_csv(out_path+test_vin+'/drive/' +f'{vin}.csv', index=False)