Skip to content

Commit b29442f

Browse files
committed
Update notebook 7
1 parent 637d62f commit b29442f

File tree

2 files changed

+202
-154
lines changed

2 files changed

+202
-154
lines changed

07_lstm.ipynb

+201-153
Original file line numberDiff line numberDiff line change
@@ -1,155 +1,203 @@
11
{
2-
"cells": [
3-
{
4-
"cell_type": "code",
5-
"execution_count": null,
6-
"metadata": {
7-
"collapsed": true
8-
},
9-
"outputs": [],
10-
"source": [
11-
"#Inspired by https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3%20-%20Neural%20Networks/recurrent_network.py\n",
12-
"import tensorflow as tf\n",
13-
"import numpy as np\n",
14-
"from tensorflow.contrib import rnn\n",
15-
"from tensorflow.examples.tutorials.mnist import input_data"
16-
]
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "LSTM.ipynb",
7+
"provenance": []
8+
},
9+
"kernelspec": {
10+
"name": "python3",
11+
"display_name": "Python 3"
12+
},
13+
"accelerator": "GPU"
1714
},
18-
{
19-
"cell_type": "code",
20-
"execution_count": null,
21-
"metadata": {
22-
"collapsed": false
23-
},
24-
"outputs": [],
25-
"source": [
26-
"# configuration\n",
27-
"# O * W + b -> 10 labels for each image, O[? 28], W[28 10], B[10]\n",
28-
"# ^ (O: output 28 vec from 28 vec input)\n",
29-
"# |\n",
30-
"# +-+ +-+ +--+\n",
31-
"# |1|->|2|-> ... |28| time_step_size = 28\n",
32-
"# +-+ +-+ +--+\n",
33-
"# ^ ^ ... ^\n",
34-
"# | | |\n",
35-
"# img1:[28] [28] ... [28]\n",
36-
"# img2:[28] [28] ... [28]\n",
37-
"# img3:[28] [28] ... [28]\n",
38-
"# ...\n",
39-
"# img128 or img256 (batch_size or test_size 256)\n",
40-
"# each input size = input_vec_size=lstm_size=28\n",
41-
"\n",
42-
"# configuration variables\n",
43-
"input_vec_size = lstm_size = 28\n",
44-
"time_step_size = 28\n",
45-
"\n",
46-
"batch_size = 128\n",
47-
"test_size = 256\n",
48-
"\n",
49-
"def init_weights(shape):\n",
50-
" return tf.Variable(tf.random_normal(shape, stddev=0.01))\n",
51-
"\n",
52-
"def model(X, W, B, lstm_size):\n",
53-
" # X, input shape: (batch_size, time_step_size, input_vec_size)\n",
54-
" XT = tf.transpose(X, [1, 0, 2]) # permute time_step_size and batch_size\n",
55-
" # XT shape: (time_step_size, batch_size, input_vec_size)\n",
56-
" XR = tf.reshape(XT, [-1, lstm_size]) # each row has input for each lstm cell (lstm_size=input_vec_size)\n",
57-
" # XR shape: (time_step_size * batch_size, input_vec_size)\n",
58-
" X_split = tf.split(XR, time_step_size, 0) # split them to time_step_size (28 arrays)\n",
59-
" # Each array shape: (batch_size, input_vec_size)\n",
60-
"\n",
61-
" # Make lstm with lstm_size (each input vector size)\n",
62-
" lstm = rnn.BasicLSTMCell(lstm_size, forget_bias=1.0, state_is_tuple=True)\n",
63-
"\n",
64-
" # Get lstm cell output, time_step_size (28) arrays with lstm_size output: (batch_size, lstm_size)\n",
65-
" outputs, _states = rnn.static_rnn(lstm, X_split, dtype=tf.float32)\n",
66-
"\n",
67-
" # Linear activation\n",
68-
" # Get the last output\n",
69-
" return tf.matmul(outputs[-1], W) + B, lstm.state_size # State size to initialize the stat\n",
70-
"\n",
71-
"mnist = input_data.read_data_sets(\"MNIST_data/\", one_hot=True)\n",
72-
"trX, trY, teX, teY = mnist.train.images, mnist.train.labels, mnist.test.images, mnist.test.labels\n",
73-
"trX = trX.reshape(-1, 28, 28)\n",
74-
"teX = teX.reshape(-1, 28, 28)"
75-
]
76-
},
77-
{
78-
"cell_type": "code",
79-
"execution_count": null,
80-
"metadata": {
81-
"collapsed": false
82-
},
83-
"outputs": [],
84-
"source": [
85-
"X = tf.placeholder(\"float\", [None, 28, 28])\n",
86-
"Y = tf.placeholder(\"float\", [None, 10])\n",
87-
"\n",
88-
"# get lstm_size and output 10 labels\n",
89-
"W = init_weights([lstm_size, 10])\n",
90-
"B = init_weights([10])\n",
91-
"\n",
92-
"py_x, state_size = model(X, W, B, lstm_size)\n",
93-
"\n",
94-
"cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=py_x, labels=Y))\n",
95-
"train_op = tf.train.RMSPropOptimizer(0.001, 0.9).minimize(cost)\n",
96-
"predict_op = tf.argmax(py_x, 1)"
97-
]
98-
},
99-
{
100-
"cell_type": "code",
101-
"execution_count": null,
102-
"metadata": {
103-
"collapsed": false
104-
},
105-
"outputs": [],
106-
"source": [
107-
"# Launch the graph in a session\n",
108-
"with tf.Session() as sess:\n",
109-
" # you need to initialize all variables\n",
110-
" tf.global_variables_initializer().run()\n",
111-
"\n",
112-
" for i in range(100):\n",
113-
" for start, end in zip(range(0, len(trX), batch_size), range(batch_size, len(trX)+1, batch_size)):\n",
114-
" sess.run(train_op, feed_dict={X: trX[start:end], Y: trY[start:end]})\n",
115-
"\n",
116-
" test_indices = np.arange(len(teX)) # Get A Test Batch\n",
117-
" np.random.shuffle(test_indices)\n",
118-
" test_indices = test_indices[0:test_size]\n",
119-
"\n",
120-
" print(i, np.mean(np.argmax(teY[test_indices], axis=1) ==\n",
121-
" sess.run(predict_op, feed_dict={X: teX[test_indices]})))"
122-
]
123-
},
124-
{
125-
"cell_type": "code",
126-
"execution_count": null,
127-
"metadata": {
128-
"collapsed": true
129-
},
130-
"outputs": [],
131-
"source": []
132-
}
133-
],
134-
"metadata": {
135-
"kernelspec": {
136-
"display_name": "Python 2",
137-
"language": "python",
138-
"name": "python2"
139-
},
140-
"language_info": {
141-
"codemirror_mode": {
142-
"name": "ipython",
143-
"version": 2
144-
},
145-
"file_extension": ".py",
146-
"mimetype": "text/x-python",
147-
"name": "python",
148-
"nbconvert_exporter": "python",
149-
"pygments_lexer": "ipython2",
150-
"version": "2.7.13"
151-
}
152-
},
153-
"nbformat": 4,
154-
"nbformat_minor": 0
155-
}
15+
"cells": [
16+
{
17+
"cell_type": "markdown",
18+
"metadata": {
19+
"id": "DElOK_LVQBmw",
20+
"colab_type": "text"
21+
},
22+
"source": [
23+
"# Long short-term memory\n",
24+
"\n",
25+
"Inspired by https://www.tensorflow.org/guide/keras/rnn"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"metadata": {
31+
"id": "zAlbiCVkPxTT",
32+
"colab_type": "code",
33+
"colab": {
34+
"base_uri": "https://localhost:8080/",
35+
"height": 34
36+
},
37+
"outputId": "523dbae7-5197-43aa-ffa8-4ef97e201a61"
38+
},
39+
"source": [
40+
"%tensorflow_version 2.x"
41+
],
42+
"execution_count": 1,
43+
"outputs": [
44+
{
45+
"output_type": "stream",
46+
"text": [
47+
"TensorFlow 2.x selected.\n"
48+
],
49+
"name": "stdout"
50+
}
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"metadata": {
56+
"id": "41IzE0EMQNrC",
57+
"colab_type": "code",
58+
"colab": {
59+
"base_uri": "https://localhost:8080/",
60+
"height": 34
61+
},
62+
"outputId": "1713b179-2d8b-4356-8c19-7ee01913a23f"
63+
},
64+
"source": [
65+
"%pylab inline"
66+
],
67+
"execution_count": 2,
68+
"outputs": [
69+
{
70+
"output_type": "stream",
71+
"text": [
72+
"Populating the interactive namespace from numpy and matplotlib\n"
73+
],
74+
"name": "stdout"
75+
}
76+
]
77+
},
78+
{
79+
"cell_type": "code",
80+
"metadata": {
81+
"id": "aglsiJuLQOv7",
82+
"colab_type": "code",
83+
"colab": {}
84+
},
85+
"source": [
86+
"import tensorflow as tf"
87+
],
88+
"execution_count": 0,
89+
"outputs": []
90+
},
91+
{
92+
"cell_type": "code",
93+
"metadata": {
94+
"id": "qYeg56-0QvvM",
95+
"colab_type": "code",
96+
"colab": {}
97+
},
98+
"source": [
99+
"(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
100+
"x_train = x_train / 255\n",
101+
"x_test = x_test / 255"
102+
],
103+
"execution_count": 0,
104+
"outputs": []
105+
},
106+
{
107+
"cell_type": "code",
108+
"metadata": {
109+
"id": "adkoUZ-jQPqF",
110+
"colab_type": "code",
111+
"colab": {}
112+
},
113+
"source": [
114+
"model = tf.keras.Sequential([\n",
115+
" tf.keras.layers.LSTM(64, input_shape=(None, 28)),\n",
116+
" tf.keras.layers.BatchNormalization(),\n",
117+
" tf.keras.layers.Dense(10, activation='softmax')\n",
118+
"])"
119+
],
120+
"execution_count": 0,
121+
"outputs": []
122+
},
123+
{
124+
"cell_type": "code",
125+
"metadata": {
126+
"id": "2IyJnHkEQvKA",
127+
"colab_type": "code",
128+
"colab": {}
129+
},
130+
"source": [
131+
"model.compile(\n",
132+
" optimizer=tf.keras.optimizers.SGD(lr=0.01),\n",
133+
" loss=tf.keras.losses.sparse_categorical_crossentropy,\n",
134+
" metrics=['accuracy']\n",
135+
")"
136+
],
137+
"execution_count": 0,
138+
"outputs": []
139+
},
140+
{
141+
"cell_type": "code",
142+
"metadata": {
143+
"id": "roj1wljnQ2NM",
144+
"colab_type": "code",
145+
"colab": {
146+
"base_uri": "https://localhost:8080/",
147+
"height": 374
148+
},
149+
"outputId": "e39349a5-e790-4331-caa6-e15d3eb4babe"
150+
},
151+
"source": [
152+
"history = model.fit(\n",
153+
" x_train, y_train,\n",
154+
" epochs=10,\n",
155+
" validation_data=(x_test, y_test)\n",
156+
")"
157+
],
158+
"execution_count": 7,
159+
"outputs": [
160+
{
161+
"output_type": "stream",
162+
"text": [
163+
"Train on 60000 samples, validate on 10000 samples\n",
164+
"Epoch 1/10\n",
165+
"60000/60000 [==============================] - 12s 208us/sample - loss: 0.7070 - accuracy: 0.7732 - val_loss: 0.3934 - val_accuracy: 0.8707\n",
166+
"Epoch 2/10\n",
167+
"60000/60000 [==============================] - 9s 156us/sample - loss: 0.2560 - accuracy: 0.9209 - val_loss: 0.2281 - val_accuracy: 0.9229\n",
168+
"Epoch 3/10\n",
169+
"60000/60000 [==============================] - 10s 159us/sample - loss: 0.1845 - accuracy: 0.9430 - val_loss: 0.1797 - val_accuracy: 0.9450\n",
170+
"Epoch 4/10\n",
171+
"60000/60000 [==============================] - 9s 156us/sample - loss: 0.1517 - accuracy: 0.9525 - val_loss: 0.1557 - val_accuracy: 0.9513\n",
172+
"Epoch 5/10\n",
173+
"60000/60000 [==============================] - 9s 158us/sample - loss: 0.1289 - accuracy: 0.9606 - val_loss: 0.1007 - val_accuracy: 0.9662\n",
174+
"Epoch 6/10\n",
175+
"60000/60000 [==============================] - 9s 158us/sample - loss: 0.1127 - accuracy: 0.9646 - val_loss: 0.0977 - val_accuracy: 0.9666\n",
176+
"Epoch 7/10\n",
177+
"60000/60000 [==============================] - 9s 158us/sample - loss: 0.1028 - accuracy: 0.9681 - val_loss: 0.0785 - val_accuracy: 0.9739\n",
178+
"Epoch 8/10\n",
179+
"60000/60000 [==============================] - 9s 152us/sample - loss: 0.0942 - accuracy: 0.9713 - val_loss: 0.0787 - val_accuracy: 0.9727\n",
180+
"Epoch 9/10\n",
181+
"60000/60000 [==============================] - 10s 160us/sample - loss: 0.0868 - accuracy: 0.9723 - val_loss: 0.0732 - val_accuracy: 0.9751\n",
182+
"Epoch 10/10\n",
183+
"60000/60000 [==============================] - 9s 155us/sample - loss: 0.0809 - accuracy: 0.9758 - val_loss: 0.0723 - val_accuracy: 0.9748\n"
184+
],
185+
"name": "stdout"
186+
}
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"metadata": {
192+
"id": "bbPt2gCRQ_8h",
193+
"colab_type": "code",
194+
"colab": {}
195+
},
196+
"source": [
197+
""
198+
],
199+
"execution_count": 0,
200+
"outputs": []
201+
}
202+
]
203+
}

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ Introduction to deep learning based on Google's TensorFlow framework. A fork of
1010
* [Deep Feedforward Neural Network (Multilayer Perceptron with 2 Hidden Layers O.o)](04_modern_net.ipynb)
1111
* [Convolutional Neural Network](05_convolutional_net.ipynb)
1212
* [Autoencoders](06_autoencoder.ipynb)
13-
* [Recurrent Neural Network (LSTM)](07_lstm.ipynb) (not yet updated)
13+
* [Recurrent Neural Network (LSTM)](07_lstm.ipynb)
1414
* [Word2vec](08_word2vec.ipynb) (not yet updated)
1515
* [TensorBoard](09_tensorboard.ipynb) (not yet updated)
1616
* [Save and restore net](10_save_restore_net.ipynb) (not yet updated)

0 commit comments

Comments
 (0)