main.ipynb 12 KB

# 获取数据
import sys
from LIB.BACKEND import DBManager
from LIB.MIDDLE.OutlierDetection.VoltOutlier.V_1_0_0 import sta
import pymysql
import pandas as pd
from matplotlib import pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

# 更新sn列表
host='rm-bp10j10qy42bzy0q7.mysql.rds.aliyuncs.com'
port=3306
db='qixiang_oss'
user='qixiang_oss'
password='Qixiang2021'
conn = pymysql.connect(host=host, port=port, user=user, password=password, database=db)
cursor = conn.cursor()
cursor.execute("select sn, imei, add_time from app_device")
res = cursor.fetchall()
df_sn = pd.DataFrame(res, columns=['sn', 'imei', 'add_time'])
df_sn = df_sn.reset_index(drop=True)
conn.close();

window = 50
step = 10
window2 = 5
step2 = 3
volt_column = ['单体电压1', '单体电压2',
    '单体电压3', '单体电压4','单体电压5', '单体电压6','单体电压7', '单体电压8','单体电压9', '单体电压10', '单体电压11', '单体电压12',
    '单体电压13', '单体电压14','单体电压15', '单体电压16','单体电压17', '单体电压18','单体电压19', '单体电压20']
columns = ['时间戳', '单体电压1', '单体电压2','单体电压3', '单体电压4','单体电压5', '单体电压6','单体电压7', '单体电压8','单体电压9', '单体电压10', '单体电压11', '单体电压12',
'单体电压13', '单体电压14','单体电压15', '单体电压16','单体电压17', '单体电压18','单体电压19', '单体电压20']
dbManager = DBManager.DBManager()

all_result = pd.DataFrame(columns=['sn', 'suptype', 'type', 'num', 'alarm_time'])
for sn in df_sn['sn'].tolist()[0:3]:
    st = '2021-10-30 00:00:00'
    et = '2021-10-31 00:00:00'

    
    df_data = dbManager.get_data(sn=sn, start_time=st, end_time=et, data_groups=['bms'])
    # 
    df_bms = df_data['bms']

    df_ori = df_bms[columns]
    df_ori.rename(columns = {'时间戳':'time'}, inplace=True)
    df = df_ori.drop_duplicates(subset=['time']) # 删除时间相同的数据
    df = df.reset_index(drop=True)
    df_result_1,time_list_1 = sta.cal_voltdiff_uniform(df,volt_column, window=window, step=step, window2=window2, step2=step2)
    df_result_2,time_list_2 = sta.cal_volt_uniform(df,volt_column, window=window, step=step)

    df_result_1['time'] = time_list_1
    df_result_2['time'] = time_list_2
    # second_list = [((pd.to_datetime(x)-pd.to_datetime(time_list[0])).total_seconds()) for x in time_list]  # 距离第一个点的分钟数

    # fig = plt.figure(figsize=(20,10))

    # length = len(df_result)
    # for column in volt_column:
    #     # plt.plot([x for x in range(0, length)], df_result[column][0:length], label=column)
    #     plt.scatter(time_list[:], df_result[column][:length], label=column)

    # plt.legend()
    # plt.title('{}电压离群(电压滤波窗口大小:{}, 步进大小:{}'.format(sn, window, step, window2, step2))
    # sn = sn.replace('/','_')
    # plt.savefig('./结果/新数据/' + sn + '.png')
    # plt.close()

    # 报警
    # df_result_1['time'] = time_list_1
    # alarm_result = sta.alarm(df_result_1, volt_column,  alarm_window=20, alarm_ratio=0.9, alarm_threshold=3.5)
    # alarm_result['sn'] = [sn] * len(alarm_result)
    # alarm_result['suptype'] = ['电压变化量离群'] * len(alarm_result)
    # all_result = pd.concat([all_result, alarm_result])

    # df_result_2['time'] = time_list_2
    # alarm_result = sta.alarm(df_result_2, volt_column,  alarm_window=20, alarm_ratio=0.9, alarm_threshold=3.5)
    # alarm_result['sn'] = [sn] * len(alarm_result)
    # alarm_result['suptype'] = ['电压离群'] * len(alarm_result)

    # 记录偏差超过3的电芯编号
    df_all_result = sta.instorage(sn, df_result_1, df_result_2)
    
    # all_result = pd.concat([all_result, alarm_result])
    # all_result.to_csv('./result_volt.csv')
    break
### start to get data PK504B10100004341 from 2021-10-30 00:00:00 to 2021-10-31 00:00:00
# get data from 2021-10-30 00:00:00 to 2021-10-31 00:00:00......... 
all data-getting done, bms_count is 3754, gps_count is 0, system_count is 0, accum_count is 0 

C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\frame.py:4441: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-1-8edc7c0a33b7> in <module>
     82 
     83     # 记录偏差超过3的电芯编号
---> 84     df_all_result = sta.instorage(sn, df_result_1, df_result_2)
     85 
     86     # all_result = pd.concat([all_result, alarm_result])

D:\WORK\QX\PROJECT\data_analyze_platform\LIB\MIDDLE\OutlierDetection\VoltOutlier\V_1_0_0\sta.py in instorage(sn, df_voltdiff_result, df_volt_result)
    112     time_list = []
    113     type_list = []
--> 114     df_result = df_voltdiff_result.copy().drop(column='time')
    115     time_list_1 = df_voltdiff_result['time']
    116     df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')

TypeError: drop() got an unexpected keyword argument 'column'
    df_all_result = pd.DataFrame(columns=['sn', 'time', 'cellnum', 'value', 'type'])
    
    value_list = []
    cellnum_list = []
    time_list = []
    type_list = []
    df_result = df_result_1[volt_column].copy()
    df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
    for column in df_result.columns:
        df = df_result[[column]].dropna(axis=0, how='all')
        value_list.extend(df[column].tolist())
        cellnum_list.extend([column]*len(df))
        time_list.extend([time_list_1[x] for x in df.index])
    length_1 = len(value_list)
    


    df_result = df_result_2[volt_column].copy()
    df_result = df_result[(df_result>3) | (df_result<-3)].dropna(axis=0, how='all').dropna(axis=1, how='all')
    for column in df_result.columns:
        df = df_result[[column]].dropna(axis=0, how='all')
        value_list.extend(df[column].tolist())
        cellnum_list.extend([column]*len(df))
        time_list.extend([time_list_2[x] for x in df.index])

    length_2 = len(value_list) - length_1
    type_list.extend(['电压变化量离群'] * length_1)
    type_list.extend(['电压离群'] * length_2)
    df_all_result['sn'] = [sn] * len(value_list)
    df_all_result['cellnum'] = cellnum_list
    df_all_result['value'] = value_list
    df_all_result['time'] = time_list
    df_all_result['type'] = type_list

len(df_bms)
3754