123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- import pandas as pd
- import os
- import numpy as np
- from tqdm import tqdm
- from datetime import datetime
- import shutil
- pd.set_option('display.max_columns', 500)
- pd.set_option('display.max_rows', 500)
- path = '/home/chenenze/hz_user/'
- out_path = './dataframes/'
- temp_thresh_max_c = 35
- temp_thresh_min_c = 10
- mean_crnt_thresh_c = 1
- temp_thresh_max_d = 35
- temp_thresh_min_d = 10
- mean_crnt_thresh_d = 0.1
- if __name__ == '__main__':
- test_vin_list = os.listdir(path)
-
- df_vin_pack_cell_info = pd.read_csv('./vin_pack_cell_info.csv')
- df_vin_pack_cell_info = df_vin_pack_cell_info.drop_duplicates(subset=['vin'], keep='first')
- df_vin_pack_cell_info = df_vin_pack_cell_info.set_index('vin')
-
- # remove all the folders in out_path
- # if os.path.exists(out_path):
- # shutil.rmtree(out_path)
-
- for test_vin in test_vin_list:
- pack_model = df_vin_pack_cell_info.loc[test_vin]['pack_model_code']
- if pack_model != '2101TBC':
- continue
- if not os.path.exists(out_path+pack_model+'/charge/'):
- os.makedirs(out_path+pack_model+'/charge/')
-
- if not os.path.exists(out_path+pack_model+'/drive/'):
- os.makedirs(out_path+pack_model+'/drive/')
-
- crg_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('charge_proc_di.feather')]
- for crg_file in tqdm(crg_file_list):
- df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \
- 'chrgah', 'meancrnt', 'sts_flg', 'full_chrg_flg', 'cellvol_max_25', 'cellvol_max_75', 'packvol_max_25', 'packvol_max_75', 'temp_time_15', 'temp_time_30'])
-
- vin = crg_file.split('_')[0]
- df_data = pd.read_feather(path+test_vin+'/' + crg_file)
- # convert dt from int to datetime
- df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d'))
- df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=60)]
- if len(df_data) == 0:
- continue
- df_data['wk'] = df_data['dt'].apply(lambda x: x.isocalendar()[1])
- pack_model = df_data['pack_model'][0]
- df_data_dt_list = list(df_data.groupby('wk'))
- for date, df_data_dt in df_data_dt_list:
- temp_max_list = df_data_dt['temp_max'].tolist()
- temp_max_25 = np.quantile(temp_max_list, 0.25)
- temp_max_75 = np.quantile(temp_max_list, 0.75)
- temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_c])
-
- temp_min_list = df_data_dt['temp_min'].tolist()
- temp_min_25 = np.quantile(temp_min_list, 0.25)
- temp_min_75 = np.quantile(temp_min_list, 0.75)
- temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_c])
-
- chrgah = df_data_dt['chrgah'].sum()
-
- meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_c])
-
- sts_flg = len([sts for sts in df_data_dt['sts_flg'].tolist() if sts == 1])
-
- full_chrg_flg = len([full_chrg for full_chrg in df_data_dt['full_chrg_flg'].tolist() if full_chrg == 1])
-
- cellvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['cellvol_max'].tolist()
- if len(cellvol_max_list) == 0:
- cellvol_max_25 = 0
- cellvol_max_75 = 0
- else:
- cellvol_max_25 = np.quantile(cellvol_max_list, 0.25)
- cellvol_max_75 = np.quantile(cellvol_max_list, 0.75)
-
- packvol_max_list = df_data_dt.loc[df_data_dt['full_chrg_flg'] == 1]['packvol_max'].tolist()
- if len(packvol_max_list) == 0:
- packvol_max_25 = 0
- packvol_max_75 = 0
- else:
- packvol_max_25 = np.quantile(packvol_max_list, 0.25)
- packvol_max_75 = np.quantile(packvol_max_list, 0.75)
-
- temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum()
- temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum()
-
- temp_max_25 = round(temp_max_25, 2)
- temp_max_75 = round(temp_max_75, 2)
- temp_min_25 = round(temp_min_25, 2)
- temp_min_75 = round(temp_min_75, 2)
- cellvol_max_25 = round(cellvol_max_25, 2)
- cellvol_max_75 = round(cellvol_max_75, 2)
- packvol_max_25 = round(packvol_max_25, 2)
- packvol_max_75 = round(packvol_max_75, 2)
-
- temp_35 = round(temp_35, 2)
- temp_10 = round(temp_10, 2)
- chrgah = round(chrgah, 2)
- meancrnt = round(meancrnt, 2)
- sts_flg = round(sts_flg, 2)
- full_chrg_flg = round(full_chrg_flg, 2)
- temp_time_15 = round(temp_time_15, 2)
- temp_time_30 = round(temp_time_30, 2)
-
-
- df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \
- chrgah, meancrnt, sts_flg, full_chrg_flg, cellvol_max_25, cellvol_max_75, packvol_max_25, packvol_max_75, temp_time_15, temp_time_30]
-
- if os.path.exists(out_path+pack_model+'/charge/' +f'{vin}.csv'):
- df_user = pd.concat([df_user, pd.read_csv(out_path+pack_model+'/charge/' +f'{vin}.csv')])
- df_user.to_csv(out_path+pack_model+'/charge/' +f'{vin}.csv', index=False)
-
-
-
- drv_file_list = [file for file in os.listdir(path+test_vin+'/') if file.endswith('drive_proc_di.feather')]
-
- for drv_file in tqdm(drv_file_list):
- df_user = pd.DataFrame(columns=['vin', 'pack_model', 'week', 'temp_max_25', 'temp_max_75', 'temp_35', 'temp_min_25', 'temp_min_75', 'temp_10', \
- 'delta_odo', 'dschrgah', 'meancrnt','temp_time_15', 'temp_time_30', 'spd_mean', 'accon_mean', 'fst_acc', 'maxspd'])
- vin = drv_file.split('_')[0]
- df_data = pd.read_feather(path+test_vin+'/' + drv_file)
- # convert dt from int to datetime
- df_data['dt'] = df_data['dt'].apply(lambda x: datetime.strptime(str(x), '%Y%m%d'))
- df_data = df_data.loc[df_data['dt'] >= df_data['dt'].max() - pd.Timedelta(days=90)]
- if len(df_data) == 0:
- continue
- df_data['wk'] = df_data['dt'].apply(lambda x: x.isocalendar()[1])
- pack_model = df_data['pack_model'][0]
- df_data_dt_list = list(df_data.groupby('wk'))
- for date, df_data_dt in df_data_dt_list:
- temp_max_list = df_data_dt['temp_max'].tolist()
- temp_max_25 = np.quantile(temp_max_list, 0.25)
- temp_max_75 = np.quantile(temp_max_list, 0.75)
- temp_35 = len([temp for temp in temp_max_list if temp > temp_thresh_max_d])
-
- temp_min_list = df_data_dt['temp_min'].tolist()
- temp_min_25 = np.quantile(temp_min_list, 0.25)
- temp_min_75 = np.quantile(temp_min_list, 0.75)
- temp_10 = len([temp for temp in temp_min_list if temp < temp_thresh_min_d])
-
- delta_odo = df_data_dt['delta_odo'].sum()
-
- dschrg_ah = df_data_dt['dschrg_ah'].sum()
-
- spd_mean = df_data_dt['spd_mean'].max()
-
- accon_mean = df_data_dt['accon_mean'].max()
-
- fst_acc = (df_data_dt['fst_acc_pls']+df_data_dt['fst_acc_mus']+df_data_dt['fst_acc_trn']).sum()
-
- meancrnt = len([crnt for crnt in df_data_dt['meancrnt'].tolist() if crnt > mean_crnt_thresh_d])
-
- maxspd = df_data_dt['maxspd'].max()
-
- temp_time_15 = (df_data_dt['temp_time_1']+df_data_dt['temp_time_2']+df_data_dt['temp_time_3']+df_data_dt['temp_time_4']).sum()
- temp_time_30 = (df_data_dt['temp_time_6']+df_data_dt['temp_time_7']).sum()
-
- temp_max_25 = round(temp_max_25, 2)
- temp_max_75 = round(temp_max_75, 2)
- temp_min_25 = round(temp_min_25, 2)
- temp_min_75 = round(temp_min_75, 2)
-
- temp_35 = round(temp_35, 2)
- temp_10 = round(temp_10, 2)
- delta_odo = round(delta_odo, 2)
- dschrg_ah = round(dschrg_ah, 2)
- spd_mean = round(spd_mean, 2)
- accon_mean = round(accon_mean, 2)
- fst_acc = round(fst_acc, 2)
- meancrnt = round(meancrnt, 2)
- maxspd = round(maxspd, 2)
- temp_time_15 = round(temp_time_15, 2)
- temp_time_30 = round(temp_time_30, 2)
-
-
- df_user.loc[len(df_user)] = [vin, pack_model, date, temp_max_25, temp_max_75, temp_35, temp_min_25, temp_min_75, temp_10, \
- delta_odo, dschrg_ah, meancrnt, temp_time_15, temp_time_30, spd_mean, accon_mean, fst_acc, maxspd]
-
- if os.path.exists(out_path+pack_model+'/drive/' +f'{vin}.csv'):
- df_user = pd.concat([df_user, pd.read_csv(out_path+pack_model+'/drive/' +f'{vin}.csv')])
- df_user.to_csv(out_path+pack_model+'/drive/' +f'{vin}.csv', index=False)
-
|