DataCleaning.py 3.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. import pandas as pd
  2. import numpy as np
  3. import logging
  4. class DataClean:
  5. def __init__(self, df_algo_pack_param) -> None:
  6. self.df_algo_pack_param = df_algo_pack_param
  7. def datacleaning(self, df_in):
  8. if not df_in.empty:
  9. # df['Time'] = pd.to_datetime(list(df['Time']), utc=True, unit='ms').tz_convert('Asia/Shanghai')
  10. df_in=df_in.replace('[]', np.nan)
  11. df_in.dropna(axis=0,subset = ["time", "sn", "cell_voltage", "cell_temp", "pack_crnt"], inplace=True)
  12. df_in['time'] = pd.to_datetime(df_in['time'], format='%Y-%m-%d %H:%M:%S')
  13. df_in.drop(df_in.index[(df_in['pack_volt'] < 0.001) | (df_in['pack_volt'] > 1000) | (df_in['pack_soc'] > 100) | (df_in['pack_soc'] < 0) | (df_in['pack_crnt'] > 1000) | (df_in['pack_crnt'] < -1000)], inplace=True)
  14. if not df_in.empty:
  15. df_in = df_in.groupby('sn',group_keys=False).apply(lambda x:x.sort_values('time'))
  16. df_in.reset_index(drop=True, inplace=True)
  17. #电压、温度分列
  18. CellVoltNums=int(self.df_algo_pack_param['CellVoltTotalCount'])
  19. CellTempNums = int(self.df_algo_pack_param['CellTempTotalCount'])
  20. cellvolt_name=['cell_voltage'+str(x) for x in range(1, CellVoltNums+1)]
  21. celltemp_name=['cell_temp'+str(x) for x in range(1, CellTempNums+1)]
  22. df_volt = df_in['cell_voltage'].apply(lambda x : pd.Series(list(x)[:CellVoltNums]))
  23. df_volt.columns = cellvolt_name
  24. df_volt=df_volt.astype('float')
  25. cellvoltmax = df_volt.max(axis=1)
  26. cellvoltmin = df_volt.min(axis=1)
  27. df_volt[['cell_volt_max','cell_volt_min']] = pd.concat([cellvoltmax,cellvoltmin], axis=1)
  28. df_temp = df_in['cell_temp'].apply(lambda x : pd.Series(list(x)[:CellTempNums]))
  29. df_temp.columns = celltemp_name
  30. df_temp=df_temp.astype('float')
  31. celltempmax = df_temp.max(axis=1)
  32. celltempmin = df_temp.min(axis=1)
  33. df_temp[['cell_temp_max','cell_temp_min']] = pd.concat([celltempmax,celltempmin], axis=1)
  34. #其他温度分列
  35. if len(df_in['other_temp_value'].loc[0]):
  36. df_otherTemp_name=['mos_temp', 'env_temp', 'fastcharg_connector_temp',
  37. 'onc_connector_temp', 'heat_plate1_temp', 'heat_plate2_temp', 'connector_1_temp','connector_2_temp', 'pcb_temp', 'bat_inner_temp']
  38. df_otherTemp=pd.DataFrame([list(x[0]) for x in np.array(df_in[['other_temp_value']])]).iloc[:,list(range(len(df_otherTemp_name)))]
  39. df_otherTemp.columns=df_otherTemp_name
  40. df_otherTemp=df_otherTemp.astype('float')
  41. df_out = pd.concat([df_in, df_volt, df_temp, df_otherTemp],axis=1)
  42. else:
  43. df_out = pd.concat([df_in, df_volt, df_temp],axis=1)
  44. df_out.dropna(axis=0, inplace=True)
  45. # df_out.dropna(axis=0,subset = cellvolt_name+celltemp_name, inplace=True)
  46. df_out.reset_index(inplace=True, drop=True)
  47. df_table = df_out.drop_duplicates(subset=['sn'], keep='first', ignore_index=True)
  48. df_table = df_table.set_index('sn')
  49. else:
  50. df_out = pd.DataFrame()
  51. df_table = pd.DataFrame()
  52. cellvolt_name = []
  53. celltemp_name = []
  54. return df_out, df_table, cellvolt_name, celltemp_name
  55. else:
  56. return pd.DataFrame(), pd.DataFrame(), [], []