get_table.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. from DBManager import LindormConnection
  2. import pandas as pd
  3. import time
  4. import os
  5. from datetime import datetime, timedelta
  6. from tqdm import tqdm
  7. df_vin_pack_cell_info = pd.read_csv('./vin_pack_cell_info.csv')
  8. df_vin_pack_cell_info = df_vin_pack_cell_info.drop_duplicates(subset=['vin'], keep='first')
  9. df_vin_pack_cell_info = df_vin_pack_cell_info.set_index('vin')
  10. df_vin = pd.read_csv('./vin_list.csv')
  11. df = pd.read_excel("./算法需求数据下发.xlsx")
  12. table_name = ['dwd_batt_persona_charge_proc_di', 'dwd_batt_persona_drive_proc_di', 'dwd_batt_persona_standing_proc_di', 'dwd_batt_persona_soh_proc_di']
  13. with LindormConnection(database_url='http://ld-uf625g146ox78yz76-proxy-hbaseue.hbaseue.rds.aliyuncs.com:30060', username='root', password='root',database='vehicle_analysis') as conn:
  14. if not os.path.exists(f'./data/'):
  15. os.makedirs(f'./data/')
  16. vin_list_exist = set([x.split('_')[0] for x in os.listdir(f'./data/')])
  17. end_time = datetime.strptime('2023-09-07', '%Y-%m-%d')
  18. start_time = datetime.strptime('2023-07-01', '%Y-%m-%d')
  19. # df_vin_list = set(df_vin_pack_cell_info.loc[df_vin_pack_cell_info['pack_model_code'] == pack_model].index)
  20. df_vin_list = list(set(df_vin_pack_cell_info.index.tolist()).difference(vin_list_exist))
  21. for i in tqdm(range(len(df_vin_list))):
  22. vin1 = df_vin_list[i]
  23. if str(vin1) == 'nan':
  24. continue
  25. if i % 100 == 0:
  26. time.sleep(1)
  27. for t in table_name[:]:
  28. conn.table = t
  29. d = df[df['表(en)']==t]
  30. sel_columns = d['字段英文名'].tolist()
  31. df_data = conn.get_data(vin=vin1, start_time=start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time=end_time.strftime('%Y-%m-%d %H:%M:%S'), sel_columns=sel_columns)
  32. df_data.reset_index(drop=True).to_feather(f'./data/{vin1}_{t}.csv', compression='zstd')