UpdtFct.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. import pandas as pd
  2. import pymysql
  3. from sqlalchemy import create_engine
  4. import datetime
  5. #建立引擎
  6. engine = create_engine(str(r"mysql+mysqldb://%s:" + '%s' + "@%s/%s") % ('root', 'pengmin', 'localhost', 'qixiangdb'))
  7. conn_qx = pymysql.connect(
  8. host='rm-bp10j10qy42bzy0q77o.mysql.rds.aliyuncs.com',
  9. user='qx_cas',
  10. password='Qx@123456',#Qx@123456
  11. database='qx_cas',
  12. charset='utf8'
  13. )
  14. conn_local = pymysql.connect(
  15. host='localhost',
  16. user='root',
  17. password='pengmin',
  18. database='qixiangdb',
  19. charset='utf8'
  20. )
  21. def getNextSoc(start_soc):
  22. '''输入当前的soc,寻找目标soc函数'''
  23. if start_soc>80:
  24. next_soc=80
  25. elif start_soc>60:
  26. next_soc=60
  27. elif start_soc>40:
  28. next_soc=40
  29. elif start_soc>20:
  30. next_soc=20
  31. else:
  32. next_soc=1
  33. return next_soc
  34. def updtSnFct(sn_factor_df,end_soc,delta_range,range_soc):
  35. '''输入当前的soc区间段,里程变量量,soc变化量,输出新的df
  36. sn_factor_df为dataframe,delta_range单位为km,range_soc单位为km/persoc'''
  37. if end_soc==80:
  38. updtFctByCol(sn_factor_df,'a0',delta_range,range_soc)
  39. elif end_soc==60:
  40. updtFctByCol(sn_factor_df,'a1',delta_range,range_soc)
  41. elif end_soc==40:
  42. updtFctByCol(sn_factor_df,'a2',delta_range,range_soc)
  43. elif end_soc==20:
  44. updtFctByCol(sn_factor_df,'a3',delta_range,range_soc)
  45. elif end_soc<20:
  46. updtFctByCol(sn_factor_df,'a4',delta_range,range_soc)
  47. return sn_factor_df
  48. def updtFctByCol(sn_factor_df,colmun_name,delta_range,range_soc):
  49. '''更新制定列的factor,sn_factor_df为dataframe,新的系数更新到第一行。delta_range单位为km,
  50. range_soc单位为km/persoc,默认按照100km更新续驶里程权重'''
  51. range_soc_old=sn_factor_df.loc[0,colmun_name]#读取第0行的老factor
  52. debounce_range=200#更新权重
  53. new_factor=range_soc*((delta_range)/debounce_range)+range_soc_old*(1-(delta_range)/debounce_range)
  54. #在第1行,存储新的factor
  55. sn_factor_df.loc[1,colmun_name]=new_factor
  56. return sn_factor_df
  57. def updtTodayFct(factor_input,sn_day_df):
  58. '''更新今日的Factor***'''
  59. sn_factor_df_last=factor_input
  60. start_soc=sn_day_df.loc[0,'soc']
  61. next_soc=getNextSoc(start_soc)
  62. start_range=sn_day_df.loc[0,'vehodo']
  63. sn=sn_day_df.loc[0,'name']
  64. for index in range(len(sn_day_df)-1):
  65. #寻找分割点,
  66. index_soc=sn_day_df.loc[index,'soc']#当前行soc
  67. next_index_soc=sn_day_df.loc[index+1,'soc']#下一行soc
  68. if (index_soc>=next_soc)&(next_index_soc<next_soc):#当前行高,下一行低
  69. delta_soc_tonext=start_soc-next_soc#两个距离点的soc差,单位为%
  70. delta_range_tonext=sn_day_df.loc[index,'vehodo']-start_range#两个时间点的距离差,单位为m
  71. delta_range_tonext_km=delta_range_tonext/1000#两个时间点的距离差,单位为km
  72. range_soc_tonext=(delta_range_tonext/1000)/delta_soc_tonext#单位soc可行驶的公里数
  73. print(sn+'start_soc: '+str(start_soc),'next_soc: '+str(next_soc),'delta_vehodo; '+str(round(delta_range_tonext_km,3))
  74. +'km'+' range_soc:'+str(round(range_soc_tonext,3)))
  75. if (delta_range_tonext_km)>1:
  76. sn_factor_df_last=updtSnFct(sn_factor_df_last,next_soc,delta_range_tonext_km,range_soc_tonext)
  77. start_soc=next_index_soc#变更开始soc
  78. next_soc=getNextSoc(start_soc)#变更结束soc
  79. start_range=sn_day_df.loc[index+1,'vehodo']#变更开始里程
  80. return sn_factor_df_last
  81. def snDayDfPreProcess(sn_day_df):
  82. '''预处理,判断是否在dirvemode,获取drivemode条件下的累计行驶距离。
  83. 增加delta_soc列,drive_flg列,vehodo列'''
  84. sn_day_df=sn_day_df.reset_index(drop=True)#重置index
  85. #增加列,计算delta_soc
  86. for index in range(len(sn_day_df)):
  87. if index==0:
  88. sn_day_df.loc[index,'delta_soc']=0
  89. else:
  90. sn_day_df.loc[index,'delta_soc']=sn_day_df.loc[index,'soc']-sn_day_df.loc[index-1,'soc']
  91. #增加列,判断是否在drive状态
  92. drive_flg=False
  93. accum_distance=0
  94. for index in range(len(sn_day_df)):
  95. if index==0:
  96. sn_day_df.loc[index,'drive_status']=drive_flg
  97. sn_day_df.loc[index,'vehodo']=0
  98. else:
  99. if (sn_day_df.loc[index,'delta_soc']<-0.1)|\
  100. ((sn_day_df.loc[index,'delta_soc']<=0)&(sn_day_df.loc[index,'distance']>500)):#soc处于下降状态,说明在drive
  101. drive_flg=True#置true
  102. elif sn_day_df.loc[index,'delta_soc']>0.1:#soc处于上升状态,说明不在drive
  103. drive_flg=False#置false
  104. accum_distance=0#清零
  105. sn_day_df.loc[index,'drive_flg']=drive_flg
  106. accum_distance+=sn_day_df.loc[index,'distance']#对行驶里程进行累加
  107. sn_day_df.loc[index,'vehodo']=accum_distance
  108. #筛选所有的drive信息行
  109. sn_day_drive_df=sn_day_df.loc[sn_day_df['drive_flg']==True,:]
  110. sn_day_drive_df=sn_day_drive_df.reset_index(drop=True)#重置index
  111. return sn_day_drive_df
  112. def updtAllSnFct(start_date,end_date):
  113. '''计算开始时间到结束时间的,所有sn的factor'''
  114. start_date_datetime=datetime.datetime.strptime(start_date,'%Y-%m-%d')#开始时间
  115. end_date_datetime=datetime.datetime.strptime(end_date,'%Y-%m-%d')#开始时间
  116. delta_day=(end_date_datetime-start_date_datetime).days#间隔天数
  117. i=1
  118. while i<=delta_day:
  119. end_date=(start_date_datetime+datetime.timedelta(days=i)).strftime("%Y-%m-%d")
  120. updtAllSnTodayFct(start_date,end_date)#调用函数
  121. print('update all sn factor from '+start_date+" to "+end_date)
  122. start_date=end_date
  123. i+=1#自加
  124. def updtAllSnTodayFct(start_date,end_date):
  125. ''''更新今天所有sn的factorx信息,start_date和end_date相隔一天。此处还可优化'''
  126. start_date_str="'"+start_date+"'"
  127. end_date_str="'"+end_date+"'"
  128. sql_cmd="select * from drive_info where time between "+start_date_str+" and "+end_date_str+" and distance!=0;"
  129. range_soc_df = pd.read_sql(sql_cmd, conn_qx)#使用read_sql方法查询qx数据库
  130. #筛选出所有当日数据之后,筛选当日有更新的sn
  131. today_sn_list=range_soc_df['name'].unique().tolist()#[:100]#先一次更新5个
  132. #建立空的dataframe,用于承接所有更新的factor信息
  133. today_sn_fct_df=pd.DataFrame([],columns=['sn','date','a0','a1','a2','a3','a4'])
  134. for sn in today_sn_list:
  135. #寻找factor_df,里面是否有sn号,如果没有sn对应信息,则新增信息。
  136. sn_str="'"+sn+"'"
  137. sql_cmd2="select sn,date,a0,a1,a2,a3,a4 from tb_sn_factor where date<"+start_date_str+" and sn="+sn_str
  138. #此处可以限定每次查询的数量,例如不高于5行
  139. factor_df=pd.read_sql(sql_cmd2, conn_local)#使用read_sql方法查询local数据库
  140. #按照sn号和日期进行去重,避免运行时重复产生factor数据,保留第一次出现的行。
  141. factor_df=factor_df.drop_duplicates(subset=['sn','date'],keep='first')
  142. if len(factor_df)==0:
  143. #如果没有搜索到factor历史数据,则声明一个新的进行初始化
  144. start_date_datetime=datetime.datetime.strptime(start_date,'%Y-%m-%d')
  145. yesterday=(start_date_datetime+datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
  146. #为sn申请一个新的factor,初始值为1
  147. factor_df=pd.DataFrame({'sn':sn,'date':yesterday,'a0':[1],'a1':[1],'a2':[1],'a3':[1],'a4':[1]})
  148. sn_factor_df=factor_df.loc[factor_df['sn']==sn,:]#筛选sn对应的factor
  149. sn_factor_df=sn_factor_df.sort_values(by='date',ascending='True')#按照日期排序
  150. sn_factor_df_last=sn_factor_df.tail(1).copy()#寻找最后一行,代表最近日期
  151. sn_factor_df_last=sn_factor_df_last.append(sn_factor_df_last)#新增加一行,用于存储新的factor
  152. sn_factor_df_last=sn_factor_df_last.reset_index(drop=True)#重置index
  153. sn_factor_df_last.loc[1,'date']=start_date#更改后一行的date为当前日期
  154. #筛选对应车辆的信息
  155. condition_sn=(range_soc_df['name']==sn)
  156. sn_day_df=range_soc_df.loc[condition_sn,:].copy()
  157. sn_day_df=sn_day_df.reset_index(drop=True)
  158. #使用updtTodayFct函数更新今天的factor
  159. if len(sn_day_df)>=2:
  160. #使用process函数,进行预处理
  161. sn_day_df=snDayDfPreProcess(sn_day_df)#预处理函数
  162. if len(sn_day_df)>=2:
  163. sn_factor_df_new=updtTodayFct(sn_factor_df_last,sn_day_df)#
  164. today_sn_fct_df=today_sn_fct_df.append(sn_factor_df_new.loc[1,:])#筛选第一行,进行拼接,最后写入到数据库中
  165. #将today_sn_fct_df写入到数据库中,今天所有factor更新的系数,一次写入。
  166. if len(today_sn_fct_df)>=1:
  167. today_sn_fct_df.to_sql('tb_sn_factor',con=engine,chunksize=10000,if_exists='append',index=False)
  168. def updtOneSnFct(sn,start_date,end_date):
  169. '''计算开始时间到结束时间的,一个sn的所有factor'''
  170. start_date_datetime=datetime.datetime.strptime(start_date,'%Y-%m-%d')#开始时间
  171. end_date_datetime=datetime.datetime.strptime(end_date,'%Y-%m-%d')#开始时间
  172. delta_day=(end_date_datetime-start_date_datetime).days#间隔天数
  173. i=1
  174. while i<=delta_day:
  175. end_date=(start_date_datetime+datetime.timedelta(days=i)).strftime("%Y-%m-%d")
  176. updtOneSnTodayFct(sn,start_date,end_date)#调用函数
  177. print('update one sn factor from '+start_date+" to "+end_date)
  178. start_date=end_date
  179. i+=1#自加
  180. def updtOneSnTodayFct(sn,start_date,end_date):
  181. start_date_str="'"+start_date+"'"
  182. end_date_str="'"+end_date+"'"
  183. sn_str="'"+sn+"'"
  184. sql_cmd="select * from drive_info where time between "+start_date_str+" and "+end_date_str+\
  185. " and distance!=0 and name="+sn_str
  186. range_soc_df = pd.read_sql(sql_cmd, conn_qx)#使用read_sql方法查询qx数据库
  187. if len(range_soc_df)>0:
  188. #筛选出所有当日数据之后,筛选当日有更新的sn
  189. today_sn_list=range_soc_df['name'].unique().tolist()
  190. #建立空的dataframe,用于承接所有更新的factor信息
  191. today_sn_fct_df=pd.DataFrame([],columns=['sn','date','a0','a1','a2','a3','a4'])
  192. for sn in today_sn_list:
  193. #寻找factor_df,里面是否有sn号,如果没有sn对应信息,则新增信息。
  194. sn_str="'"+sn+"'"
  195. sql_cmd2="select sn,date,a0,a1,a2,a3,a4 from tb_sn_factor where date<"+start_date_str+" and sn="+sn_str
  196. factor_df=pd.read_sql(sql_cmd2, conn_local)#使用read_sql方法查询local数据库
  197. #按照sn号和日期进行去重,避免运行时重复产生factor数据,保留第一次出现的行。
  198. factor_df=factor_df.drop_duplicates(subset=['sn','date'],keep='first')
  199. if len(factor_df)==0:
  200. #如果没有搜索到factor历史数据,则声明一个新的进行初始化
  201. start_date_datetime=datetime.datetime.strptime(start_date,'%Y-%m-%d')
  202. yesterday=(start_date_datetime+datetime.timedelta(days=-1)).strftime("%Y-%m-%d")
  203. factor_df=pd.DataFrame({'sn':sn,'date':yesterday,'a0':[1],'a1':[1],'a2':[1],'a3':[1],'a4':[1]})
  204. today_sn_fct_df=today_sn_fct_df.append(factor_df.loc[0,:])#将初始化的行记录到数据库
  205. sn_factor_df=factor_df.loc[factor_df['sn']==sn,:]#筛选sn对应的factor
  206. sn_factor_df=sn_factor_df.sort_values(by='date',ascending='True')#按照日期排序
  207. sn_factor_df_last=sn_factor_df.tail(1).copy()#寻找最后一行,代表最近日期
  208. sn_factor_df_last=sn_factor_df_last.append(sn_factor_df_last)#新增加一行,用于存储新的factor
  209. sn_factor_df_last=sn_factor_df_last.reset_index(drop=True)#重置index
  210. sn_factor_df_last.loc[1,'date']=start_date#更改后一行的date为当前日期
  211. #筛选对应车辆的信息
  212. condition_sn=(range_soc_df['name']==sn)
  213. sn_day_df=range_soc_df.loc[condition_sn,:].copy()
  214. sn_day_df=sn_day_df.reset_index(drop=True)
  215. #使用updtTodayFct函数更新今天的factor
  216. if len(sn_day_df)>=2:
  217. #使用process函数,进行预处理
  218. sn_day_df=snDayDfPreProcess(sn_day_df)#!!!!!!!!!!!增加
  219. if len(sn_day_df)>=2:
  220. sn_factor_df_new=updtTodayFct(sn_factor_df_last,sn_day_df)#
  221. today_sn_fct_df=today_sn_fct_df.append(sn_factor_df_new.loc[1,:])#筛选第一行,进行拼接,最后写入到数据库中
  222. # #将today_sn_fct_df写入到数据库中
  223. if len(today_sn_fct_df)>=1:
  224. today_sn_fct_df.to_sql('tb_sn_factor',con=engine,chunksize=10000,if_exists='append',index=False)
  225. # print(sn+' factor will be update in table tb_sn_factor!')
  226. return today_sn_fct_df
  227. # def updtASnTodayFct(start_date,end_date,today_sn_list):
  228. # sql_cmd="select * from qixiang_test where time>='"+start_date+"' and time<='"+end_date+"'"
  229. # range_soc_df = pd.read_sql(sql_cmd, conn)#使用read_sql方法查询数据库
  230. # sql_cmd2="select sn,date,a0,a1,a2,a3,a4 from tb_sn_factor where date<'"+start_date+"'"
  231. # factor_df=pd.read_sql(sql_cmd2, conn)#使用read_sql方法查询数据库
  232. # #筛选出所有当日数据之后,筛选当日有更新的sn
  233. # # today_sn_list=range_soc_df['sn'].unique().tolist()
  234. # # today_sn_list=today_sn_list[:10]#更新若干个
  235. # #建立空的dataframe,用于承接所有更新的factor信息
  236. # today_sn_fct_df=pd.DataFrame([],columns=['sn','date','a0','a1','a2','a3','a4'])
  237. # for sn in today_sn_list:
  238. # sn_factor_df=factor_df.loc[factor_df['sn']==sn,:]#筛选sn对应的factor
  239. # sn_factor_df=sn_factor_df.sort_values(by='date',ascending='True')#按照日期排序
  240. # sn_factor_df_last=sn_factor_df.tail(1).copy()#寻找最后一行,代表最近日期
  241. # sn_factor_df_last=sn_factor_df_last.append(sn_factor_df_last)#新增加一行,用于存储新的factor
  242. # sn_factor_df_last=sn_factor_df_last.reset_index(drop=True)#重置index
  243. # sn_factor_df_last.loc[1,'date']=start_date#更改后一行的date为当前日期
  244. # #筛选对应车辆的信息
  245. # condition_sn=(range_soc_df['sn']==sn)
  246. # sn_day_df=range_soc_df.loc[condition_sn,:].copy()
  247. # sn_day_df=sn_day_df.reset_index(drop=True)
  248. # #使用updtTodayFct函数更新今天的factor
  249. # sn_factor_df_new=updtTodayFct(sn_factor_df_last,sn_day_df)
  250. # today_sn_fct_df=today_sn_fct_df.append(sn_factor_df_new.loc[1,:])#筛选第一行,进行拼接,最后写入到数据库中
  251. # #将today_sn_fct_df写入到数据库中
  252. # today_sn_fct_df.to_sql('tb_sn_factor',con=engine,chunksize=10000,if_exists='append',index=False)