data_split.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. ##对行驶、静置和充电数据进行分段,输入数据为标准化状态码的数据
  2. import pandas as pd
  3. import numpy as np
  4. def split(df_merge,drive_interval_time_min=1200,charge_interval_time_min=1200,stand_interval_time_min=1200,single_num_min=3,drive_sts=3,charge_sts=[21,22],stand_sts=0):
  5. ##行驶段的划分,将行驶状态划分为不同的行驶段
  6. df_drive=df_merge[df_merge['bms_sta']==drive_sts]
  7. ###行驶时间段切割
  8. df_drive["order_delta"]= pd.to_datetime(df_drive["time"] )-pd.to_datetime( df_drive["time"].shift(1).fillna(method = 'backfill',axis = 0))
  9. df_drive["order_delta"]=pd.to_timedelta(df_drive["order_delta"])
  10. df_drive["order_delta"] = df_drive["order_delta"].dt.total_seconds()
  11. ##限定时间间隔1200秒切割和合并
  12. df_drive["time_flag"]=df_drive["order_delta"]>drive_interval_time_min
  13. df_drive['drive_block']=(df_drive["time_flag"].shift(1) != df_drive["time_flag"]).astype(int).cumsum()
  14. ##排除单点
  15. df_drive_count=df_drive.groupby(["sn","drive_block"]).agg({'time':'count'})
  16. df_drive_count=df_drive_count.reset_index()
  17. df_drive_count=df_drive_count[df_drive_count["time"]>single_num_min]
  18. df_drive_count_choice=df_drive_count[["sn","drive_block"]]
  19. df_drive=pd.merge(df_drive,df_drive_count_choice,on=['sn', 'drive_block'],how='inner')
  20. df_drive=df_drive.drop(['time_flag', 'order_delta'], axis=1, inplace=False)
  21. ##充电段的划分,将充电状态划分为不同的充电段
  22. df_charge=df_merge[df_merge['bms_sta'].isin(charge_sts)]
  23. ##充电时间段切割
  24. df_charge["order_delta"]= pd.to_datetime(df_charge["time"] )-pd.to_datetime( df_charge["time"].shift(1).fillna(method = 'backfill',axis = 0))
  25. df_charge["order_delta"]=pd.to_timedelta(df_charge["order_delta"])
  26. df_charge["order_delta"] = df_charge["order_delta"].dt.total_seconds()
  27. ##限定时间间隔1200秒切割和合并
  28. df_charge["time_flag"]=df_charge["order_delta"]>charge_interval_time_min
  29. df_charge['charge_block']=(df_charge["time_flag"].shift(1) != df_charge["time_flag"]).astype(int).cumsum()
  30. df_charge_count=df_charge.groupby(["sn","charge_block"]).agg({'time':'count'})
  31. df_charge_count=df_charge_count.reset_index()
  32. df_charge_count=df_charge_count[df_charge_count["time"]>single_num_min]
  33. df_charge_count_choice=df_charge_count[["sn","charge_block"]]
  34. df_charge=pd.merge(df_charge,df_charge_count_choice,on=['sn', 'charge_block'],how='inner')
  35. df_charge=df_charge.drop(['time_flag', 'order_delta'], axis=1, inplace=False)
  36. ##静置段的划分,将静置状态划分为不同的静置段
  37. df_stand=df_merge[df_merge['bms_sta']==stand_sts]
  38. ##静置时间段切割
  39. df_stand["order_delta"]= pd.to_datetime(df_stand["time"] )-pd.to_datetime( df_stand["time"].shift(1).fillna(method = 'backfill',axis = 0))
  40. df_stand["order_delta"]=pd.to_timedelta(df_stand["order_delta"])
  41. df_stand["order_delta"] = df_stand["order_delta"].dt.total_seconds()
  42. ##限定时间间隔1200秒切割和合并
  43. df_stand["time_flag"]=df_stand["order_delta"]>stand_interval_time_min
  44. df_stand['stand_block']=(df_stand["time_flag"].shift(1) != df_stand["time_flag"]).astype(int).cumsum()
  45. df_stand_count=df_stand.groupby(["sn","stand_block"]).agg({'time':'count'})
  46. df_stand_count=df_stand_count.reset_index()
  47. df_stand_count=df_stand_count[df_stand_count["time"]>single_num_min]
  48. df_stand_count_choice=df_stand_count[["sn","stand_block"]]
  49. df_stand=pd.merge(df_stand,df_stand_count_choice,on=['sn', 'stand_block'],how='inner')
  50. df_stand=df_stand.drop(['time_flag', 'order_delta'], axis=1, inplace=False)
  51. return df_drive,df_charge,df_stand