Update notebook 7

rickwierenga · rickwierenga · commit b29442f18fba · 2020-01-27T19:39:12.000+01:00
diff --git a/07_lstm.ipynb b/07_lstm.ipynb
@@ -1,155 +1,203 @@
 {
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": [
-    "#Inspired by https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py\n",
-    "import tensorflow as tf\n",
-    "import numpy as np\n",
-    "from tensorflow.contrib import rnn\n",
-    "from tensorflow.examples.tutorials.mnist import input_data"
-   ]
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "LSTM.ipynb",
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "accelerator": "GPU"
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "# configuration\n",
-    "#                        O * W + b -> 10 labels for each image, O[? 28], W[28 10], B[10]\n",
-    "#                       ^ (O: output 28 vec from 28 vec input)\n",
-    "#                       |\n",
-    "#      +-+  +-+       +--+\n",
-    "#      |1|->|2|-> ... |28| time_step_size = 28\n",
-    "#      +-+  +-+       +--+\n",
-    "#       ^    ^    ...  ^\n",
-    "#       |    |         |\n",
-    "# img1:[28] [28]  ... [28]\n",
-    "# img2:[28] [28]  ... [28]\n",
-    "# img3:[28] [28]  ... [28]\n",
-    "# ...\n",
-    "# img128 or img256 (batch_size or test_size 256)\n",
-    "#      each input size = input_vec_size=lstm_size=28\n",
-    "\n",
-    "# configuration variables\n",
-    "input_vec_size = lstm_size = 28\n",
-    "time_step_size = 28\n",
-    "\n",
-    "batch_size = 128\n",
-    "test_size = 256\n",
-    "\n",
-    "def init_weights(shape):\n",
-    "    return tf.Variable(tf.random_normal(shape, stddev=0.01))\n",
-    "\n",
-    "def model(X, W, B, lstm_size):\n",
-    "    # X, input shape: (batch_size, time_step_size, input_vec_size)\n",
-    "    XT = tf.transpose(X, [1, 0, 2])  # permute time_step_size and batch_size\n",
-    "    # XT shape: (time_step_size, batch_size, input_vec_size)\n",
-    "    XR = tf.reshape(XT, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size=input_vec_size)\n",
-    "    # XR shape: (time_step_size * batch_size, input_vec_size)\n",
-    "    X_split = tf.split(XR, time_step_size, 0) # split them to time_step_size (28 arrays)\n",
-    "    # Each array shape: (batch_size, input_vec_size)\n",
-    "\n",
-    "    # Make lstm with lstm_size (each input vector size)\n",
-    "    lstm = rnn.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True)\n",
-    "\n",
-    "    # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)\n",
-    "    outputs, _states = rnn.static_rnn(lstm, X_split, dtype=tf.float32)\n",
-    "\n",
-    "    # Linear activation\n",
-    "    # Get the last output\n",
-    "    return tf.matmul(outputs[-1], W) + B, lstm.state_size # State size to initialize the stat\n",
-    "\n",
-    "mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)\n",
-    "trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels\n",
-    "trX = trX.reshape(-1, 28, 28)\n",
-    "teX = teX.reshape(-1, 28, 28)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "X = tf.placeholder(\"float\", [None, 28, 28])\n",
-    "Y = tf.placeholder(\"float\", [None, 10])\n",
-    "\n",
-    "# get lstm_size and output 10 labels\n",
-    "W = init_weights([lstm_size, 10])\n",
-    "B = init_weights([10])\n",
-    "\n",
-    "py_x, state_size = model(X, W, B, lstm_size)\n",
-    "\n",
-    "cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))\n",
-    "train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)\n",
-    "predict_op = tf.argmax(py_x, 1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": false
-   },
-   "outputs": [],
-   "source": [
-    "# Launch the graph in a session\n",
-    "with tf.Session() as sess:\n",
-    "    # you need to initialize all variables\n",
-    "    tf.global_variables_initializer().run()\n",
-    "\n",
-    "    for i in range(100):\n",
-    "        for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)):\n",
-    "            sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})\n",
-    "\n",
-    "        test_indices = np.arange(len(teX))  # Get A Test Batch\n",
-    "        np.random.shuffle(test_indices)\n",
-    "        test_indices = test_indices[0:test_size]\n",
-    "\n",
-    "        print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==\n",
-    "                         sess.run(predict_op, feed_dict={X: teX[test_indices]})))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "collapsed": true
-   },
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 2",
-   "language": "python",
-   "name": "python2"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 2
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.13"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 0
-}
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DElOK_LVQBmw",
+        "colab_type": "text"
+      },
+      "source": [
+        "# Long short-term memory\n",
+        "\n",
+        "Inspired by https://www.tensorflow.org/guide/keras/rnn"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "zAlbiCVkPxTT",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "523dbae7-5197-43aa-ffa8-4ef97e201a61"
+      },
+      "source": [
+        "%tensorflow_version 2.x"
+      ],
+      "execution_count": 1,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "TensorFlow 2.x selected.\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "41IzE0EMQNrC",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "outputId": "1713b179-2d8b-4356-8c19-7ee01913a23f"
+      },
+      "source": [
+        "%pylab inline"
+      ],
+      "execution_count": 2,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Populating the interactive namespace from numpy and matplotlib\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "aglsiJuLQOv7",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "import tensorflow as tf"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qYeg56-0QvvM",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
+        "x_train = x_train / 255\n",
+        "x_test = x_test / 255"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "adkoUZ-jQPqF",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model = tf.keras.Sequential([\n",
+        "  tf.keras.layers.LSTM(64, input_shape=(None, 28)),\n",
+        "  tf.keras.layers.BatchNormalization(),\n",
+        "  tf.keras.layers.Dense(10, activation='softmax')\n",
+        "])"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "2IyJnHkEQvKA",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        "model.compile(\n",
+        "    optimizer=tf.keras.optimizers.SGD(lr=0.01),\n",
+        "    loss=tf.keras.losses.sparse_categorical_crossentropy,\n",
+        "    metrics=['accuracy']\n",
+        ")"
+      ],
+      "execution_count": 0,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "roj1wljnQ2NM",
+        "colab_type": "code",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 374
+        },
+        "outputId": "e39349a5-e790-4331-caa6-e15d3eb4babe"
+      },
+      "source": [
+        "history = model.fit(\n",
+        "    x_train, y_train,\n",
+        "    epochs=10,\n",
+        "    validation_data=(x_test, y_test)\n",
+        ")"
+      ],
+      "execution_count": 7,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "text": [
+            "Train on 60000 samples, validate on 10000 samples\n",
+            "Epoch 1/10\n",
+            "60000/60000 [==============================] - 12s 208us/sample - loss: 0.7070 - accuracy: 0.7732 - val_loss: 0.3934 - val_accuracy: 0.8707\n",
+            "Epoch 2/10\n",
+            "60000/60000 [==============================] - 9s 156us/sample - loss: 0.2560 - accuracy: 0.9209 - val_loss: 0.2281 - val_accuracy: 0.9229\n",
+            "Epoch 3/10\n",
+            "60000/60000 [==============================] - 10s 159us/sample - loss: 0.1845 - accuracy: 0.9430 - val_loss: 0.1797 - val_accuracy: 0.9450\n",
+            "Epoch 4/10\n",
+            "60000/60000 [==============================] - 9s 156us/sample - loss: 0.1517 - accuracy: 0.9525 - val_loss: 0.1557 - val_accuracy: 0.9513\n",
+            "Epoch 5/10\n",
+            "60000/60000 [==============================] - 9s 158us/sample - loss: 0.1289 - accuracy: 0.9606 - val_loss: 0.1007 - val_accuracy: 0.9662\n",
+            "Epoch 6/10\n",
+            "60000/60000 [==============================] - 9s 158us/sample - loss: 0.1127 - accuracy: 0.9646 - val_loss: 0.0977 - val_accuracy: 0.9666\n",
+            "Epoch 7/10\n",
+            "60000/60000 [==============================] - 9s 158us/sample - loss: 0.1028 - accuracy: 0.9681 - val_loss: 0.0785 - val_accuracy: 0.9739\n",
+            "Epoch 8/10\n",
+            "60000/60000 [==============================] - 9s 152us/sample - loss: 0.0942 - accuracy: 0.9713 - val_loss: 0.0787 - val_accuracy: 0.9727\n",
+            "Epoch 9/10\n",
+            "60000/60000 [==============================] - 10s 160us/sample - loss: 0.0868 - accuracy: 0.9723 - val_loss: 0.0732 - val_accuracy: 0.9751\n",
+            "Epoch 10/10\n",
+            "60000/60000 [==============================] - 9s 155us/sample - loss: 0.0809 - accuracy: 0.9758 - val_loss: 0.0723 - val_accuracy: 0.9748\n"
+          ],
+          "name": "stdout"
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "bbPt2gCRQ_8h",
+        "colab_type": "code",
+        "colab": {}
+      },
+      "source": [
+        ""
+      ],
+      "execution_count": 0,
+      "outputs": []
+    }
+  ]
+}
diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@ Introduction to deep learning based on Google's TensorFlow framework.  A fork of
 * [Deep Feedforward Neural Network (Multilayer Perceptron with 2 Hidden Layers O.o)](04_modern_net.ipynb)
 * [Convolutional Neural Network](05_convolutional_net.ipynb)
 * [Autoencoders](06_autoencoder.ipynb)
-* [Recurrent Neural Network (LSTM)](07_lstm.ipynb) (not yet updated)
+* [Recurrent Neural Network (LSTM)](07_lstm.ipynb)
 * [Word2vec](08_word2vec.ipynb) (not yet updated)
 * [TensorBoard](09_tensorboard.ipynb) (not yet updated)
 * [Save and restore net](10_save_restore_net.ipynb) (not yet updated)