{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2023-05-27 18:00:50.790237: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n", "2023-05-27 18:00:50.791341: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", "2023-05-27 18:00:50.810737: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.\n", "2023-05-27 18:00:50.811081: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n", "To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2023-05-27 18:00:51.132805: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n" ] } ], "source": [ "##深度学习的库\n", "import tensorflow as tf\n", "import statistics\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import os\n", "import pandas as pd\n", "from math import sqrt\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.metrics import mean_absolute_error\n", "from tensorflow.keras.layers import Dropout\n", "from tensorflow.keras.regularizers import l2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from pandasql import sqldf \n", "from sqlalchemy import create_engine\n", "import statsmodels.api as sm \n", "import numpy as np\n", "\n", "mysql_user = 'root'\n", "mysql_password = 'qx123456'\n", "mysql_host = 'algo-dev-internal.li-ai.com.cn'\n", "mysql_port = 3306\n", "mysql_db = 'algo'\n", "\n", "db_engine = create_engine(\"mysql+pymysql://{}:{}@{}:{}/{}?charset=utf8\".format(mysql_user, mysql_password, mysql_host, mysql_port, mysql_db),\n", " pool_recycle=7200,pool_size=2)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "##成都撬装站" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "sql=\"select day,slot,c_b_count from algo_dwd_station_change_flow where organ_code='qiaozhuang' \"\n", "\n", "change_data=pd.read_sql(sql, db_engine)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayslotc_b_count
02022-10-1913:30-13:591
12022-10-1914:00-14:291
22022-10-1914:30-14:591
32022-10-1915:00-15:290
42022-10-1915:30-15:590
\n", "
" ], "text/plain": [ " day slot c_b_count\n", "0 2022-10-19 13:30-13:59 1\n", "1 2022-10-19 14:00-14:29 1\n", "2 2022-10-19 14:30-14:59 1\n", "3 2022-10-19 15:00-15:29 0\n", "4 2022-10-19 15:30-15:59 0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "change_data.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "##向前取两周数据" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "slots=[\"00:00-00:29\",\n", "\"00:30-00:59\",\n", "\"01:00-01:29\",\n", "\"01:30-01:59\",\n", "\"02:00-02:29\",\n", "\"02:30-02:59\",\n", "\"03:00-03:29\",\n", "\"03:30-03:59\",\n", "\"04:00-04:29\",\n", "\"04:30-04:59\",\n", "\"05:00-05:29\",\n", "\"05:30-05:59\",\n", "\"06:00-06:29\",\n", "\"06:30-06:59\",\n", "\"07:00-07:29\",\n", "\"07:30-07:59\",\n", "\"08:00-08:29\",\n", "\"08:30-08:59\",\n", "\"09:00-09:29\",\n", "\"09:30-09:59\",\n", "\"10:00-10:29\",\n", "\"10:30-10:59\",\n", "\"11:00-11:29\",\n", "\"11:30-11:59\",\n", "\"12:00-12:29\",\n", "\"12:30-12:59\",\n", "\"13:00-13:29\",\n", "\"13:30-13:59\",\n", "\"14:00-14:29\",\n", "\"14:30-14:59\",\n", "\"15:00-15:29\",\n", "\"15:30-15:59\",\n", "\"16:00-16:29\",\n", "\"16:30-16:59\",\n", "\"17:00-17:29\",\n", "\"17:30-17:59\",\n", "\"18:00-18:29\",\n", "\"18:30-18:59\",\n", "\"19:00-19:29\",\n", "\"19:30-19:59\",\n", "\"20:00-20:29\",\n", "\"20:30-20:59\",\n", "\"21:00-21:29\",\n", "\"21:30-21:59\",\n", "\"22:00-22:29\",\n", "\"22:30-22:59\",\n", "\"23:00-23:29\",\n", "\"23:30-23:59\"]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "df_slots_onehot = pd.get_dummies(change_data.slot)[slots]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "slot_col_name=[]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "for i in range(48):\n", " v=\"s\"+str(i)\n", " slot_col_name.append(v)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "change_data[slot_col_name]=df_slots_onehot.values" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dayslotc_b_counts0s1s2s3s4s5s6...s38s39s40s41s42s43s44s45s46s47
02022-10-1913:30-13:5910000000...0000000000
12022-10-1914:00-14:2910000000...0000000000
22022-10-1914:30-14:5910000000...0000000000
32022-10-1915:00-15:2900000000...0000000000
42022-10-1915:30-15:5900000000...0000000000
..................................................................
57572023-02-1612:00-12:2900000000...0000000000
57582023-02-1612:30-12:5900000000...0000000000
57592023-02-1613:00-13:2910000000...0000000000
57602023-02-1613:30-13:5900000000...0000000000
57612023-02-1614:00-14:2910000000...0000000000
\n", "

5762 rows × 51 columns

\n", "
" ], "text/plain": [ " day slot c_b_count s0 s1 s2 s3 s4 s5 s6 ... \\\n", "0 2022-10-19 13:30-13:59 1 0 0 0 0 0 0 0 ... \n", "1 2022-10-19 14:00-14:29 1 0 0 0 0 0 0 0 ... \n", "2 2022-10-19 14:30-14:59 1 0 0 0 0 0 0 0 ... \n", "3 2022-10-19 15:00-15:29 0 0 0 0 0 0 0 0 ... \n", "4 2022-10-19 15:30-15:59 0 0 0 0 0 0 0 0 ... \n", "... ... ... ... .. .. .. .. .. .. .. ... \n", "5757 2023-02-16 12:00-12:29 0 0 0 0 0 0 0 0 ... \n", "5758 2023-02-16 12:30-12:59 0 0 0 0 0 0 0 0 ... \n", "5759 2023-02-16 13:00-13:29 1 0 0 0 0 0 0 0 ... \n", "5760 2023-02-16 13:30-13:59 0 0 0 0 0 0 0 0 ... \n", "5761 2023-02-16 14:00-14:29 1 0 0 0 0 0 0 0 ... \n", "\n", " s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 \n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 0 \n", "... ... ... ... ... ... ... ... ... ... ... \n", "5757 0 0 0 0 0 0 0 0 0 0 \n", "5758 0 0 0 0 0 0 0 0 0 0 \n", "5759 0 0 0 0 0 0 0 0 0 0 \n", "5760 0 0 0 0 0 0 0 0 0 0 \n", "5761 0 0 0 0 0 0 0 0 0 0 \n", "\n", "[5762 rows x 51 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "change_data" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_183905/1568745592.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " change_data['c_b_count'][change_data['c_b_count']>4]=4\n" ] } ], "source": [ "change_data['c_b_count'][change_data['c_b_count']>4]=4" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5762, 51)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "change_data.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "n0=48*7" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "336" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n0" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "from data import *\n", "from plot import *" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.models import load_model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(5500):\n", " try:\n", " data_s=change_data.iloc[i:n0+i,2:].values\n", " if i%48==0:\n", " past_history = 24\n", " future_target = 3\n", " STEP = 1\n", " TRAIN_SPLIT = 180\n", " tf.random.set_seed(13)\n", " BATCH_SIZE = 16\n", " BUFFER_SIZE = 180\n", " EVALUATION_INTERVAL = 50\n", " EPOCHS = 30\n", " x_train_multi, y_train_multi = multivariate_data(data_s, data_s[:, 0], 0,\n", " TRAIN_SPLIT, past_history,\n", " future_target, STEP)\n", " x_val_multi, y_val_multi = multivariate_data(data_s, data_s[:, 0],\n", " TRAIN_SPLIT, None, past_history,\n", " future_target, STEP)\n", " train_data_multi = tf.data.Dataset.from_tensor_slices((x_train_multi, y_train_multi))\n", " train_data_multi = train_data_multi.cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE).repeat()\n", " val_data_multi = tf.data.Dataset.from_tensor_slices((x_val_multi, y_val_multi))\n", " val_data_multi = val_data_multi.batch(BATCH_SIZE).repeat()\n", " opt = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.99, epsilon=1e-08)\n", " multi_step_model = tf.keras.models.Sequential()\n", " multi_step_model.add(tf.keras.layers.LSTM(32, kernel_regularizer=l2(0.005), \n", " recurrent_regularizer=l2(0.005), bias_regularizer=l2(0.005), \n", " return_sequences=True, input_shape=x_train_multi.shape[-2:]))\n", " multi_step_model.add(tf.keras.layers.LSTM(16, activation='relu'))\n", " multi_step_model.add(tf.keras.layers.Dense(3))\n", " #multi_step_model.compile(optimizer='adam', loss='mse')\n", " multi_step_model.compile(optimizer=opt, loss='mse')\n", " checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath='model.h5', monitor='val_loss', verbose=1, save_best_only=True, mode = 'min')\n", " callback_list = [checkpoint]\n", " multi_step_history = multi_step_model.fit(train_data_multi, epochs=EPOCHS,\n", " steps_per_epoch=EVALUATION_INTERVAL,\n", " validation_data=val_data_multi,\n", " validation_steps=50,\n", " callbacks = callback_list)\n", " model=load_model(\"model.h5\")\n", " data_in=data_s[-24:].reshape(1,24,49)\n", " data_out=model(data_in)\n", " pre=np.round(data_out)\n", " zz=change_data[[\"day\",\"slot\",\"c_b_count\"]][n0+i:n0+i+3]\n", " zz[\"pre\"]=pre[0]\n", " zz[\"pre\"][zz[\"pre\"]>=3]=3\n", " zz[\"pre\"][zz[\"pre\"]<0]=0\n", " zz[\"pre_time\"]=[1,2,3]\n", " zz[\"organ_code\"]=\"fengdu2\"\n", " zz[\"algo\"]=\"lstm\"\n", " zz[\"data_period\"]=3\n", " zz.to_sql(\"algo_dwd_station_change_static\",con=db_engine, if_exists=\"append\",index=False)\n", " else:\n", " data_in=data_s[-24:].reshape(1,24,49)\n", " data_out=model(data_in)\n", " pre=np.round(data_out)\n", " zz=change_data[[\"day\",\"slot\",\"c_b_count\"]][n0+i:n0+i+3]\n", " zz[\"pre\"]=pre[0]\n", " zz[\"pre\"][zz[\"pre\"]>=3]=3\n", " zz[\"pre\"][zz[\"pre\"]<0]=0\n", " zz[\"pre_time\"]=[1,2,3]\n", " zz[\"organ_code\"]=\"fengdu2\"\n", " zz[\"algo\"]=\"lstm\"\n", " zz[\"data_period\"]=3\n", " zz.to_sql(\"algo_dwd_station_change_static\",con=db_engine, if_exists=\"append\",index=False)\n", " except Exception as e:\n", " print(str(e))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "py3916", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.16" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }