import pandas as pd import os import numpy as np from tqdm import tqdm # def get_rank(stat, week, name, df_group): # df_group_name_list = df_group.loc[df_group['week'] == week][name].tolist() # df_group_name_list = sorted(df_group_name_list, reverse=True) # # return [round(df_group_name_list.index(stat)/len(df_group_name_list), 3) , df_group_name_list.index(stat)+1, len(df_group_name_list)] # if len(df_group_name_list) == 0: # return [np.nan, np.nan, np.nan] # if stat == 0: # return [round(0/len(df_group_name_list), 3) , 0, len(df_group_name_list)] # for i, v in enumerate(df_group_name_list): # if stat >= v: # return [round(i/len(df_group_name_list), 3) , i+1, len(df_group_name_list)] if __name__ == '__main__': vin_list = os.listdir('./dataframes/') for name in ['charge', 'drive']: df_stats = pd.DataFrame() for vin in tqdm(vin_list): group_list = os.listdir(f'./dataframes/{vin}/{name}/') # df_charge_vin = pd.read_csv(f'./dataframes/{vin}/{name}/{vin}.csv') df_group = pd.DataFrame() for group in group_list: # df_group = pd.read_csv(f'./dataframes/{vin}/charge/{group}') df_group = pd.concat([df_group, pd.read_csv(f'./dataframes/{vin}/{name}/{group}')]) df_group_week_list = list(df_group.groupby('week')) df_group = pd.DataFrame() for week, df_group_week in df_group_week_list: # check if df_group_week has zero values, if so, replace them with np.nan df_group_week_0 = df_group_week[df_group_week.columns[3:]].replace(0, np.nan) df_group_week_rank = df_group_week_0.rank(axis=0, method='min', ascending=False).fillna(0).astype(int) # rename the columns of df_group_week_rank by adding '_rank' to the original column names df_group_week_rank.columns = [f'{col}_rank' for col in df_group_week_rank.columns] # merge df_group_week and df_group_week_rank df_group_week = pd.concat([df_group_week, df_group_week_rank], axis=1) df_group = pd.concat([df_group, df_group_week]) df_group.to_csv(f'./{name}/group_{vin}.csv', index=False) # df_stats = pd.concat([df_stats, df_charge_vin]) # df_stats.to_csv(f'./{name}_rank.csv', index=False)