get_rank.py 3.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import pandas as pd
  2. import os
  3. import numpy as np
  4. from tqdm import tqdm
  5. # def get_rank(stat, week, name, df_group):
  6. # df_group_name_list = df_group.loc[df_group['week'] == week][name].tolist()
  7. # df_group_name_list = sorted(df_group_name_list, reverse=True)
  8. # # return [round(df_group_name_list.index(stat)/len(df_group_name_list), 3) , df_group_name_list.index(stat)+1, len(df_group_name_list)]
  9. # if len(df_group_name_list) == 0:
  10. # return [np.nan, np.nan, np.nan]
  11. # if stat == 0:
  12. # return [round(0/len(df_group_name_list), 3) , 0, len(df_group_name_list)]
  13. # for i, v in enumerate(df_group_name_list):
  14. # if stat >= v:
  15. # return [round(i/len(df_group_name_list), 3) , i+1, len(df_group_name_list)]
  16. if __name__ == '__main__':
  17. vin_list = os.listdir('./dataframes/')
  18. for name in ['charge', 'drive']:
  19. df_stats = pd.DataFrame()
  20. for vin in tqdm(vin_list):
  21. group_list = os.listdir(f'./dataframes/{vin}/{name}/')
  22. # df_charge_vin = pd.read_csv(f'./dataframes/{vin}/{name}/{vin}.csv')
  23. df_group = pd.DataFrame()
  24. for group in group_list:
  25. # df_group = pd.read_csv(f'./dataframes/{vin}/charge/{group}')
  26. df_group = pd.concat([df_group, pd.read_csv(f'./dataframes/{vin}/{name}/{group}')])
  27. df_group_week_list = list(df_group.groupby('week'))
  28. df_group = pd.DataFrame()
  29. for week, df_group_week in df_group_week_list:
  30. # check if df_group_week has zero values, if so, replace them with np.nan
  31. df_group_week_0 = df_group_week[df_group_week.columns[3:]].replace(0, np.nan)
  32. df_group_week_rank = df_group_week_0.rank(axis=0, method='min', ascending=False).fillna(0).astype(int)
  33. # rename the columns of df_group_week_rank by adding '_rank' to the original column names
  34. df_group_week_rank.columns = [f'{col}_rank' for col in df_group_week_rank.columns]
  35. # merge df_group_week and df_group_week_rank
  36. df_group_week = pd.concat([df_group_week, df_group_week_rank], axis=1)
  37. df_group = pd.concat([df_group, df_group_week])
  38. df_group.to_csv(f'./{name}/group_{vin}.csv', index=False)
  39. # df_stats = pd.concat([df_stats, df_charge_vin])
  40. # df_stats.to_csv(f'./{name}_rank.csv', index=False)