OfflineUtils.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import pandas as pd
  2. import numpy as np
  3. import logging
  4. import logging.handlers
  5. import os
  6. import re
  7. class OfflineAlgoUtils:
  8. def __init__(self):
  9. pass
  10. def get_log_handler(self, log_file, level=logging.INFO):
  11. # 根据日期滚动
  12. if not os.path.exists(os.path.dirname(log_file)):
  13. os.makedirs(os.path.dirname(log_file))
  14. fh = logging.handlers.TimedRotatingFileHandler(filename=log_file, when="D", interval=1, backupCount=7,
  15. encoding="utf-8")
  16. formatter = logging.Formatter("%(asctime)s - %(name)s-%(levelname)s %(message)s")
  17. fh.suffix = "%Y-%m-%d_%H-%M-%S"
  18. fh.extMatch = re.compile(r"^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}")
  19. fh.setFormatter(formatter)
  20. fh.setLevel(level)
  21. return fh
  22. def get_logger(self, log_name, log_path):
  23. # 日志配置
  24. logger = logging.getLogger(log_name)
  25. logger.setLevel(logging.INFO)
  26. logger.addHandler(self.get_log_handler("{}/{}.info.log".format(log_path, log_name), logging.INFO))
  27. logger.addHandler(self.get_log_handler("{}/{}.error.log".format(log_path, log_name), logging.ERROR))
  28. return logger
  29. def datacleaning(self, df_algo_pack_param, df_in, droplmt=1):
  30. df_in=df_in[df_in['datatype']==12]
  31. df_in = df_in.drop(['latitude','longitude','mileage','accum_chg_wh','accum_dschg_wh','accum_dschg_ah','accum_chg_ah','vin','accum_energyfeed_wh','accum_energyfeed_ah'], axis=1, errors='ignore')
  32. if not df_in.empty:
  33. # df['Time'] = pd.to_datetime(list(df['Time']), utc=True, unit='ms').tz_convert('Asia/Shanghai')
  34. df_in=df_in.replace('[]', np.nan)
  35. df_in.dropna(axis=0,subset = ["time", "sn", "cell_voltage", "cell_temp", "pack_crnt"], inplace=True)
  36. df_in['time'] = pd.to_datetime(df_in['time'], format='%Y-%m-%d %H:%M:%S')
  37. if droplmt==1:
  38. df_in.drop(df_in.index[(df_in['pack_volt'] < 0.001) | (df_in['pack_volt'] > 1000) | (df_in['pack_soc'] > 100) | (df_in['pack_soc'] < 0) | (df_in['pack_crnt'] > 1000) | (df_in['pack_crnt'] < -1000)], inplace=True)
  39. else:
  40. pass
  41. if not df_in.empty:
  42. df_in = df_in.groupby('sn',group_keys=False).apply(lambda x:x.sort_values('time'))
  43. df_in.reset_index(drop=True, inplace=True)
  44. #电压、温度分列
  45. CellVoltNums=int(df_algo_pack_param['CellVoltTotalCount'])
  46. CellTempNums = int(df_algo_pack_param['CellTempTotalCount'])
  47. cellvolt_name=['cell_voltage'+str(x) for x in range(1, CellVoltNums+1)]
  48. celltemp_name=['cell_temp'+str(x) for x in range(1, CellTempNums+1)]
  49. df_volt = df_in['cell_voltage'].apply(lambda x : pd.Series(list(x)[:CellVoltNums]))
  50. df_volt.columns = cellvolt_name
  51. df_volt=df_volt.astype('float')
  52. cellvoltmax = df_volt.max(axis=1)
  53. cellvoltmin = df_volt.min(axis=1)
  54. df_volt[['cell_volt_max','cell_volt_min']] = pd.concat([cellvoltmax,cellvoltmin], axis=1)
  55. df_temp = df_in['cell_temp'].apply(lambda x : pd.Series(list(x)[:CellTempNums]))
  56. df_temp.columns = celltemp_name
  57. df_temp=df_temp.astype('float')
  58. celltempmax = df_temp.max(axis=1)
  59. celltempmin = df_temp.min(axis=1)
  60. df_temp[['cell_temp_max','cell_temp_min']] = pd.concat([celltempmax,celltempmin], axis=1)
  61. #其他温度分列
  62. if len(df_in['other_temp_value'].loc[0]):
  63. df_otherTemp_name=['mos_temp', 'env_temp', 'fastcharg_connector_temp',
  64. 'onc_connector_temp', 'heat_plate1_temp', 'heat_plate2_temp', 'connector_1_temp','connector_2_temp', 'pcb_temp', 'bat_inner_temp']
  65. df_otherTemp=pd.DataFrame([list(x[0]) for x in np.array(df_in[['other_temp_value']])]).iloc[:,list(range(len(df_otherTemp_name)))]
  66. df_otherTemp.columns=df_otherTemp_name
  67. df_otherTemp=df_otherTemp.astype('float')
  68. df_out = pd.concat([df_in, df_volt, df_temp, df_otherTemp],axis=1)
  69. else:
  70. df_out = pd.concat([df_in, df_volt, df_temp],axis=1)
  71. # df_out.dropna(axis=0, inplace=True)
  72. df_out.dropna(axis=0,subset = cellvolt_name+celltemp_name, inplace=True)
  73. df_out.reset_index(inplace=True, drop=True)
  74. df_table = df_out.drop_duplicates(subset=['sn'], keep='first', ignore_index=True)
  75. df_table = df_table.set_index('sn')
  76. else:
  77. df_out = pd.DataFrame()
  78. df_table = pd.DataFrame()
  79. cellvolt_name = []
  80. celltemp_name = []
  81. return df_out, df_table, cellvolt_name, celltemp_name
  82. else:
  83. return pd.DataFrame(), pd.DataFrame(), [], []
  84. def gps_datacleaning(self, df_in):
  85. df_in=df_in[df_in['datatype']==16]
  86. df_in=df_in[['sn','time','datatype','latitude','longitude','mileage']]
  87. if not df_in.empty:
  88. # df['Time'] = pd.to_datetime(list(df['Time']), utc=True, unit='ms').tz_convert('Asia/Shanghai')
  89. df_in=df_in.replace('[]', np.nan)
  90. df_in=df_in.replace('',np.nan)
  91. df_in=df_in.dropna(axis=0,how='any')
  92. if 'latitude' in df_in.columns:
  93. df_in["latitude"]=df_in["latitude"].astype(float)
  94. if 'longitude' in df_in.columns:
  95. df_in["longitude"]=df_in["longitude"].astype(float)
  96. if 'mileage' in df_in.columns:
  97. df_in["mileage"]=df_in["mileage"].astype(float)
  98. ##处理经纬度为0的情况
  99. df_in=df_in.replace(0,np.nan)
  100. df_in=df_in.sort_values(["sn","time"],ascending = [True, True])
  101. df_in_filled = df_in.groupby("sn").fillna(method='ffill')
  102. df_out = pd.concat([df_in[['sn']], df_in_filled], axis=1)
  103. df_out.reset_index(inplace=True, drop=True)
  104. return df_out
  105. else:
  106. return pd.DataFrame()
  107. def accum_datacleaning(self, df_in):
  108. df_in=df_in[df_in['datatype']==23]
  109. df_in=df_in[['sn','time','datatype','accum_chg_wh','accum_dschg_wh','accum_dschg_ah','accum_chg_ah','accum_energyfeed_wh','accum_energyfeed_ah']]
  110. if not df_in.empty:
  111. # df['Time'] = pd.to_datetime(list(df['Time']), utc=True, unit='ms').tz_convert('Asia/Shanghai')
  112. df_in=df_in.replace('[]', np.nan)
  113. df_in=df_in.replace('',np.nan)
  114. df_in=df_in.dropna(axis=0,how='any')
  115. if 'accum_energyfeed_wh' in df_in.columns:
  116. df_in["accum_energyfeed_wh"]=df_in["accum_energyfeed_wh"].astype(float)
  117. if 'accum_chg_wh' in df_in.columns:
  118. df_in["accum_chg_wh"]=df_in["accum_chg_wh"].astype(float)
  119. if 'accum_dschg_wh' in df_in.columns:
  120. df_in["accum_dschg_wh"]=df_in["accum_dschg_wh"].astype(float)
  121. ##处理经纬度为0的情况
  122. df_in=df_in.replace(0,np.nan)
  123. df_in=df_in.sort_values(["sn","time"],ascending = [True, True])
  124. df_in_filled = df_in.groupby("sn").fillna(method='ffill')
  125. df_in_filled = df_in.groupby("sn").fillna(method='bfill')
  126. df_out = pd.concat([df_in[['sn']], df_in_filled], axis=1)
  127. df_out.reset_index(inplace=True, drop=True)
  128. return df_out
  129. else:
  130. return pd.DataFrame()
  131. def vin_datacleaning(self, df_in):
  132. df_in=df_in[df_in['datatype']==50]
  133. df_in=df_in[['sn','time','datatype','vin']]
  134. if not df_in.empty:
  135. df_in["vin"].replace("","z",inplace=True)
  136. df_out= df_in
  137. return df_out
  138. else:
  139. return pd.DataFrame()