12345678910111213141516171819202122232425262728293031323334353637 |
- from DBManager import LindormConnection
- import pandas as pd
- import time
- import os
- from datetime import datetime, timedelta
- from tqdm import tqdm
- df_vin_pack_cell_info = pd.read_csv('./vin_pack_cell_info.csv')
- df_vin_pack_cell_info = df_vin_pack_cell_info.drop_duplicates(subset=['vin'], keep='first')
- df_vin_pack_cell_info = df_vin_pack_cell_info.set_index('vin')
- df_vin = pd.read_csv('./vin_list.csv')
- df = pd.read_excel("./算法需求数据下发.xlsx")
- table_name = ['dwd_batt_persona_charge_proc_di', 'dwd_batt_persona_drive_proc_di', 'dwd_batt_persona_standing_proc_di', 'dwd_batt_persona_soh_proc_di']
- with LindormConnection(database_url='http://ld-uf625g146ox78yz76-proxy-hbaseue.hbaseue.rds.aliyuncs.com:30060', username='root', password='root',database='vehicle_analysis') as conn:
- if not os.path.exists(f'./data/'):
- os.makedirs(f'./data/')
-
- vin_list_exist = set([x.split('_')[0] for x in os.listdir(f'./data/')])
-
- end_time = datetime.strptime('2023-09-07', '%Y-%m-%d')
- start_time = datetime.strptime('2023-07-01', '%Y-%m-%d')
- # df_vin_list = set(df_vin_pack_cell_info.loc[df_vin_pack_cell_info['pack_model_code'] == pack_model].index)
- df_vin_list = list(set(df_vin_pack_cell_info.index.tolist()).difference(vin_list_exist))
- for i in tqdm(range(len(df_vin_list))):
- vin1 = df_vin_list[i]
- if str(vin1) == 'nan':
- continue
- if i % 100 == 0:
- time.sleep(1)
- for t in table_name[:]:
- conn.table = t
- d = df[df['表(en)']==t]
- sel_columns = d['字段英文名'].tolist()
- df_data = conn.get_data(vin=vin1, start_time=start_time.strftime('%Y-%m-%d %H:%M:%S'), end_time=end_time.strftime('%Y-%m-%d %H:%M:%S'), sel_columns=sel_columns)
- df_data.reset_index(drop=True).to_feather(f'./data/{vin1}_{t}.csv', compression='zstd')
|