import pandas as pd import os import numpy as np from tqdm import tqdm from datetime import datetime import shutil pd.set_option('display.max_columns', 500) pd.set_option('display.max_rows', 500) path = '/home/chenenze/hz_user/' out_path = './dataframes/' temp_thresh_max_c = 35 temp_thresh_min_c = 10 mean_crnt_thresh_c = 1 temp_thresh_max_d = 35 temp_thresh_min_d = 10 mean_crnt_thresh_d = 0.1 if __name__ == '__main__': test_vin_list = os.listdir(path) df_vin_pack_cell_info = pd.read_csv('./vin_pack_cell_info.csv') df_vin_pack_cell_info = df_vin_pack_cell_info.drop_duplicates(subset=['vin'], keep='first') df_vin_pack_cell_info = df_vin_pack_cell_info.set_index('vin') # remove all the folders in out_path # if os.path.exists(out_path): # shutil.rmtree(out_path) for test_vin in test_vin_list: pack_model = df_vin_pack_cell_info.loc[test_vin]['pack_model_code'] if pack_model != '2101TBC': continue if not os.path.exists(out_path+pack_model+'/charge/'): os.makedirs(out_path+pack_model+'/charge/') if not os.path.exists(out_path+pack_model+'/drive/'): os.makedirs(out_path+pack_model+'/drive/') crg_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('charge_proc_di.feather')] for crg_file in tqdm(crg_file_list): df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \ 'chrgah', 'meancrnt', 'sts_flg', 'full_chrg_flg', 'cellvol_max_25', 'cellvol_max_75', 'packvol_max_25', 'packvol_max_75', 'temp_time_15', 'temp_time_30']) vin = crg_file.split('_')[0] df_data = pd.read_feather(path+test_vin+'/' + crg_file) # convert dt from int to datetime df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d')) df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=60)] if len(df_data) == 0: continue df_data['wk'] = df_data['dt'].apply(lambda x: x.isocalendar()[1]) pack_model = df_data['pack_model'][0] df_data_dt_list = list(df_data.groupby('wk')) for date, df_data_dt in df_data_dt_list: temp_max_list = df_data_dt['temp_max'].tolist() temp_max_25 = np.quantile(temp_max_list, 0.25) temp_max_75 = np.quantile(temp_max_list, 0.75) temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_c]) temp_min_list = df_data_dt['temp_min'].tolist() temp_min_25 = np.quantile(temp_min_list, 0.25) temp_min_75 = np.quantile(temp_min_list, 0.75) temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_c]) chrgah = df_data_dt['chrgah'].sum() meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_c]) sts_flg = len([sts for sts in df_data_dt['sts_flg'].tolist() if sts == 1]) full_chrg_flg = len([full_chrg for full_chrg in df_data_dt['full_chrg_flg'].tolist() if full_chrg == 1]) cellvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['cellvol_max'].tolist() if len(cellvol_max_list) == 0: cellvol_max_25 = 0 cellvol_max_75 = 0 else: cellvol_max_25 = np.quantile(cellvol_max_list, 0.25) cellvol_max_75 = np.quantile(cellvol_max_list, 0.75) packvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['packvol_max'].tolist() if len(packvol_max_list) == 0: packvol_max_25 = 0 packvol_max_75 = 0 else: packvol_max_25 = np.quantile(packvol_max_list, 0.25) packvol_max_75 = np.quantile(packvol_max_list, 0.75) temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum() temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum() temp_max_25 = round(temp_max_25, 2) temp_max_75 = round(temp_max_75, 2) temp_min_25 = round(temp_min_25, 2) temp_min_75 = round(temp_min_75, 2) cellvol_max_25 = round(cellvol_max_25, 2) cellvol_max_75 = round(cellvol_max_75, 2) packvol_max_25 = round(packvol_max_25, 2) packvol_max_75 = round(packvol_max_75, 2) temp_35 = round(temp_35, 2) temp_10 = round(temp_10, 2) chrgah = round(chrgah, 2) meancrnt = round(meancrnt, 2) sts_flg = round(sts_flg, 2) full_chrg_flg = round(full_chrg_flg, 2) temp_time_15 = round(temp_time_15, 2) temp_time_30 = round(temp_time_30, 2) df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \ chrgah, meancrnt, sts_flg, full_chrg_flg, cellvol_max_25, cellvol_max_75, packvol_max_25, packvol_max_75, temp_time_15, temp_time_30] if os.path.exists(out_path+pack_model+'/charge/' +f'{vin}.csv'): df_user = pd.concat([df_user, pd.read_csv(out_path+pack_model+'/charge/' +f'{vin}.csv')]) df_user.to_csv(out_path+pack_model+'/charge/' +f'{vin}.csv', index=False) drv_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('drive_proc_di.feather')] for drv_file in tqdm(drv_file_list): df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \ 'delta_odo', 'dschrgah', 'meancrnt','temp_time_15', 'temp_time_30', 'spd_mean', 'accon_mean', 'fst_acc', 'maxspd']) vin = drv_file.split('_')[0] df_data = pd.read_feather(path+test_vin+'/' + drv_file) # convert dt from int to datetime df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d')) df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)] if len(df_data) == 0: continue df_data['wk'] = df_data['dt'].apply(lambda x: x.isocalendar()[1]) pack_model = df_data['pack_model'][0] df_data_dt_list = list(df_data.groupby('wk')) for date, df_data_dt in df_data_dt_list: temp_max_list = df_data_dt['temp_max'].tolist() temp_max_25 = np.quantile(temp_max_list, 0.25) temp_max_75 = np.quantile(temp_max_list, 0.75) temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_d]) temp_min_list = df_data_dt['temp_min'].tolist() temp_min_25 = np.quantile(temp_min_list, 0.25) temp_min_75 = np.quantile(temp_min_list, 0.75) temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_d]) delta_odo = df_data_dt['delta_odo'].sum() dschrg_ah = df_data_dt['dschrg_ah'].sum() spd_mean = df_data_dt['spd_mean'].max() accon_mean = df_data_dt['accon_mean'].max() fst_acc = (df_data_dt['fst_acc_pls']+df_data_dt['fst_acc_mus']+df_data_dt['fst_acc_trn']).sum() meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_d]) maxspd = df_data_dt['maxspd'].max() temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum() temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum() temp_max_25 = round(temp_max_25, 2) temp_max_75 = round(temp_max_75, 2) temp_min_25 = round(temp_min_25, 2) temp_min_75 = round(temp_min_75, 2) temp_35 = round(temp_35, 2) temp_10 = round(temp_10, 2) delta_odo = round(delta_odo, 2) dschrg_ah = round(dschrg_ah, 2) spd_mean = round(spd_mean, 2) accon_mean = round(accon_mean, 2) fst_acc = round(fst_acc, 2) meancrnt = round(meancrnt, 2) maxspd = round(maxspd, 2) temp_time_15 = round(temp_time_15, 2) temp_time_30 = round(temp_time_30, 2) df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \ delta_odo, dschrg_ah, meancrnt, temp_time_15, temp_time_30, spd_mean, accon_mean, fst_acc, maxspd] if os.path.exists(out_path+pack_model+'/drive/' +f'{vin}.csv'): df_user = pd.concat([df_user, pd.read_csv(out_path+pack_model+'/drive/' +f'{vin}.csv')]) df_user.to_csv(out_path+pack_model+'/drive/' +f'{vin}.csv', index=False)