Skip to content

Commit 01dfd2d

Browse files
authored
Add files via upload
1 parent 4bab0d1 commit 01dfd2d

File tree

1 file changed

+297
-0
lines changed

1 file changed

+297
-0
lines changed

DNC.py

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
import os
4+
5+
#assosication of different data(text,images and whatever else)
6+
7+
8+
class DNC:
9+
def __init__(self, input_size, output_size, seq_len, num_words=256, word_size=64, num_heads=4):
10+
# define data
11+
# input data - [[1 0] [0 1] [0 0] [0 0]]
12+
self.input_size = input_size # X
13+
# output data [[0 0] [0 0] [1 0] [0 1]]
14+
self.output_size = output_size # Y
15+
16+
# define read + write (heads) vector size--->> size of memory matrix=N*W
17+
# 10
18+
self.num_words = num_words # N
19+
# 4 characters
20+
self.word_size = word_size # W
21+
22+
# define number of read+write heads
23+
self.num_heads = num_heads # R
24+
25+
# size of output vector from controller that defines interactions with memory matrix
26+
self.interface_size = num_heads * word_size + 3 * word_size + 5 * num_heads + 3
27+
28+
# the actual size of the neural network input after flatenning and
29+
# concatenating the input vector with the previously read vctors from memory
30+
self.nn_input_size = num_heads * word_size + input_size
31+
32+
# size of output
33+
self.nn_output_size = output_size + self.interface_size
34+
35+
# gaussian normal distribution for both outputs
36+
self.nn_out = tf.truncated_normal([1, self.output_size], stddev=0.1)
37+
self.interface_vec = tf.truncated_normal([1, self.interface_size], stddev=0.1)
38+
39+
# Create MEMORY matrix
40+
self.mem_mat = tf.zeros([num_words, word_size]) # N*W
41+
42+
# other variables
43+
# The usage vector records which locations have been used so far,
44+
self.usage_vec = tf.fill([num_words, 1], 1e-6) # N*1
45+
# a temporal link matrix records the order in which locations were written;
46+
self.link_mat = tf.zeros([num_words, num_words]) # N*N
47+
# represents degrees to which last location was written to
48+
self.precedence_weight = tf.zeros([num_words, 1]) # N*1
49+
50+
# Read and write head weights variables
51+
self.read_weights = tf.fill([num_words, num_heads], 1e-6) # N*R
52+
self.write_weights = tf.fill([num_words, 1], 1e-6) # N*1
53+
self.read_vecs = tf.fill([num_heads, word_size], 1e-6) # R*W
54+
55+
56+
###NETWORK VARIABLES
57+
# gateways into the computation graph for input output pairs
58+
self.i_data = tf.placeholder(tf.float32, [seq_len * 2, self.input_size], name='input_node')
59+
self.o_data = tf.placeholder(tf.float32, [seq_len * 2, self.output_size], name='output_node')
60+
61+
# 2 layer feedforwarded network
62+
self.W1 = tf.Variable(tf.truncated_normal([self.nn_input_size, 32], stddev=0.1), name='layer1_weights',
63+
dtype=tf.float32)
64+
self.b1 = tf.Variable(tf.zeros([32]), name='layer1_bias', dtype=tf.float32)
65+
self.W2 = tf.Variable(tf.truncated_normal([32, self.nn_output_size], stddev=0.1), name='layer2_weights',
66+
dtype=tf.float32)
67+
self.b2 = tf.Variable(tf.zeros([self.nn_output_size]), name='layer2_bias', dtype=tf.float32)
68+
69+
###DNC OUTPUT WEIGHTS
70+
self.nn_out_weights = tf.Variable(tf.truncated_normal([self.nn_output_size, self.output_size], stddev=0.1),
71+
name='net_output_weights')
72+
self.interface_weights = tf.Variable(
73+
tf.truncated_normal([self.nn_output_size, self.interface_size], stddev=0.1), name='interface_weights')
74+
75+
self.read_vecs_out_weight = tf.Variable(
76+
tf.truncated_normal([self.num_heads * self.word_size, self.output_size], stddev=0.1),
77+
name='read_vector_weights')
78+
79+
# 3 attention mechanisms for read/writes to memory
80+
81+
# 1
82+
# a key vector emitted by the controller is compared to the
83+
# content of each location in memory according to a similarity measure
84+
# The similarity scores determine a weighting that can be used by the read heads
85+
# for associative recall1 or by the write head to modify an existing vector in memory.
86+
def content_lookup(self, key, str):
87+
# The l2 norm of a vector is the square root of the sum of the
88+
# absolute values squared
89+
norm_mem = tf.nn.l2_normalize(self.mem_mat, 1) # N*W
90+
norm_key = tf.nn.l2_normalize(key, 0) # 1*W for write or R*W for read
91+
# get similarity measure between both vectors, transpose before multiplicaiton
92+
##(N*W,W*1)->N*1 for write
93+
# (N*W,W*R)->N*R for read
94+
sim = tf.matmul(norm_mem, norm_key, transpose_b=True)
95+
# str is 1*1 or 1*R
96+
# returns similarity measure
97+
return tf.nn.softmax(sim * str, 0) # N*1 or N*R
98+
99+
# 2
100+
# retreives the writing allocation weighting based on the usage free list
101+
# The ‘usage’ of each location is represented as a number between 0 and 1,
102+
# and a weighting that picks out unused locations is delivered to the write head.
103+
104+
# independent of the size and contents of the memory, meaning that
105+
# DNCs can be trained to solve a task using one size of memory and later
106+
# upgraded to a larger memory without retraining
107+
def allocation_weighting(self):
108+
# sorted usage - the usage vector sorted ascndingly
109+
# the original indices of the sorted usage vector
110+
sorted_usage_vec, free_list = tf.nn.top_k(-1 * self.usage_vec, k=self.num_words)
111+
sorted_usage_vec *= -1
112+
cumprod = tf.cumprod(sorted_usage_vec, axis=0, exclusive=True)
113+
unorder = (1 - sorted_usage_vec) * cumprod
114+
115+
alloc_weights = tf.zeros([self.num_words])
116+
I = tf.constant(np.identity(self.num_words, dtype=np.float32))
117+
118+
# for each usage vec
119+
for pos, idx in enumerate(tf.unstack(free_list[0])):
120+
# flatten
121+
m = tf.squeeze(tf.slice(I, [idx, 0], [1, -1]))
122+
# add to weight matrix
123+
alloc_weights += m * unorder[0, pos]
124+
# the allocation weighting for each row in memory
125+
return tf.reshape(alloc_weights, [self.num_words, 1])
126+
127+
# at every time step the controller receives input vector from dataset and emits output vector.
128+
# it also recieves a set of read vectors from the memory matrix at the previous time step via
129+
# the read heads. then it emits an interface vector that defines its interactions with the memory
130+
# at the current time step
131+
def step_m(self, x):
132+
133+
# reshape input
134+
input = tf.concat([x, tf.reshape(self.read_vecs, [1, self.num_heads * self.word_size])], 1)
135+
136+
# forward propagation
137+
l1_out = tf.matmul(input, self.W1) + self.b1
138+
l1_act = tf.nn.tanh(l1_out)
139+
l2_out = tf.matmul(l1_act, self.W2) + self.b2
140+
l2_act = tf.nn.tanh(l2_out)
141+
142+
# output vector
143+
self.nn_out = tf.matmul(l2_act, self.nn_out_weights) # (1*eta+Y, eta+Y*Y)->(1*Y)
144+
# interaction vector - how to interact with memory
145+
self.interface_vec = tf.matmul(l2_act, self.interface_weights) # (1*eta+Y, eta+Y*eta)->(1*eta)
146+
147+
partition = tf.constant(
148+
[[0] * (self.num_heads * self.word_size) + [1] * (self.num_heads) + [2] * (self.word_size) + [3] + \
149+
[4] * (self.word_size) + [5] * (self.word_size) + \
150+
[6] * (self.num_heads) + [7] + [8] + [9] * (self.num_heads * 3)], dtype=tf.int32)
151+
152+
# convert interface vector into a set of read write vectors
153+
# using tf.dynamic_partitions(Partitions interface_vec into 10 tensors using indices from partition)
154+
(read_keys, read_str, write_key, write_str,
155+
erase_vec, write_vec, free_gates, alloc_gate, write_gate, read_modes) = \
156+
tf.dynamic_partition(self.interface_vec, partition, 10)
157+
158+
# read vectors
159+
read_keys = tf.reshape(read_keys, [self.num_heads, self.word_size]) # R*W
160+
read_str = 1 + tf.nn.softplus(tf.expand_dims(read_str, 0)) # 1*R
161+
162+
# write vectors
163+
write_key = tf.expand_dims(write_key, 0) # 1*W
164+
# help init our write weights
165+
write_str = 1 + tf.nn.softplus(tf.expand_dims(write_str, 0)) # 1*1
166+
erase_vec = tf.nn.sigmoid(tf.expand_dims(erase_vec, 0)) # 1*W
167+
write_vec = tf.expand_dims(write_vec, 0) # 1*W
168+
169+
# the degree to which locations at read heads will be freed
170+
free_gates = tf.nn.sigmoid(tf.expand_dims(free_gates, 0)) # 1*R
171+
# the fraction of writing that is being allocated in a new location
172+
alloc_gate = tf.nn.sigmoid(alloc_gate) # 1
173+
# the amount of information to be written to memory
174+
write_gate = tf.nn.sigmoid(write_gate) # 1
175+
# the softmax distribution between the three read modes (backward, forward, lookup)
176+
# The read heads can use gates called read modes to switch between content lookup
177+
# using a read key and reading out locations either forwards or backwards
178+
# in the order they were written.
179+
read_modes = tf.nn.softmax(tf.reshape(read_modes, [3, self.num_heads])) # 3*R
180+
181+
182+
#WRITE
183+
# used to calculate usage vector, what's available to write to?
184+
retention_vec = tf.reduce_prod(1 - free_gates * self.read_weights, reduction_indices=1)
185+
# used to dynamically allocate memory
186+
self.usage_vec = (self.usage_vec + self.write_weights - self.usage_vec * self.write_weights) * retention_vec
187+
188+
##retreives the writing allocation weighting
189+
alloc_weights = self.allocation_weighting() # N*1
190+
# where to write to??
191+
write_lookup_weights = self.content_lookup(write_key, write_str) # N*1
192+
# define our write weights now that we know how much space to allocate for them and where to write to
193+
self.write_weights = write_gate * (alloc_gate * alloc_weights + (1 - alloc_gate) * write_lookup_weights)
194+
195+
# write erase, then write to memory!
196+
self.mem_mat = self.mem_mat * (1 - tf.matmul(self.write_weights, erase_vec)) + \
197+
tf.matmul(self.write_weights, write_vec)
198+
199+
# As well as writing, the controller can read from multiple locations in memory.
200+
# Memory can be searched based on the content of each location, or the associative
201+
# temporal links can be followed forward and backward to recall information written
202+
# in sequence or in reverse. (3rd attention mechanism)
203+
204+
205+
# updates and returns the temporal link matrix for the latest write
206+
# given the precedence vector and the link matrix from previous step
207+
nnweight_vec = tf.matmul(self.write_weights, tf.ones([1, self.num_words])) # N*N
208+
self.link_mat = (1 - nnweight_vec - tf.transpose(nnweight_vec)) * self.link_mat + \
209+
tf.matmul(self.write_weights, self.precedence_weight, transpose_b=True)
210+
self.link_mat *= tf.ones([self.num_words, self.num_words]) - tf.constant(
211+
np.identity(self.num_words, dtype=np.float32))
212+
213+
214+
self.precedence_weight = (1 - tf.reduce_sum(self.write_weights, reduction_indices=0)) * \
215+
self.precedence_weight + self.write_weights
216+
217+
218+
#READ
219+
# 3 modes - forward, backward, content lookup
220+
forw_w = read_modes[2] * tf.matmul(self.link_mat, self.read_weights) # (N*N,N*R)->N*R
221+
look_w = read_modes[1] * self.content_lookup(read_keys, read_str) # N*R
222+
back_w = read_modes[0] * tf.matmul(self.link_mat, self.read_weights, transpose_a=True) # N*R
223+
224+
# use them to initialize read weights
225+
self.read_weights = back_w + look_w + forw_w # N*R
226+
# create read vectors by applying read weights to memory matrix
227+
self.read_vecs = tf.transpose(tf.matmul(self.mem_mat, self.read_weights, transpose_a=True)) # (W*N,N*R)^T->R*W
228+
229+
# multiply them together
230+
read_vec_mut = tf.matmul(tf.reshape(self.read_vecs, [1, self.num_heads * self.word_size]),
231+
self.read_vecs_out_weight) # (1*RW, RW*Y)-> (1*Y)
232+
233+
# return output + read vecs product
234+
return self.nn_out + read_vec_mut
235+
236+
# output list of numbers (one hot encoded) by running the step function
237+
def run(self):
238+
big_out = []
239+
for t, seq in enumerate(tf.unstack(self.i_data, axis=0)):
240+
seq = tf.expand_dims(seq, 0)
241+
y = self.step_m(seq)
242+
big_out.append(y)
243+
return tf.stack(big_out, axis=0)
244+
245+
246+
def main(argv=None):
247+
# generate the input output sequences, randomly intialized
248+
num_seq = 10
249+
seq_len = 6
250+
seq_width = 4
251+
iterations = 1000
252+
con = np.random.randint(0, seq_width, size=seq_len)
253+
seq = np.zeros((seq_len, seq_width))
254+
seq[np.arange(seq_len), con] = 1
255+
end = np.asarray([[-1] * seq_width])
256+
zer = np.zeros((seq_len, seq_width))
257+
258+
graph = tf.Graph()
259+
260+
with graph.as_default():
261+
262+
with tf.Session() as sess:
263+
#
264+
dnc = DNC(input_size=seq_width, output_size=seq_width, seq_len=seq_len, num_words=10, word_size=4,
265+
num_heads=1)
266+
267+
268+
output = tf.squeeze(dnc.run())
269+
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=dnc.o_data))
270+
271+
regularizers = (tf.nn.l2_loss(dnc.W1) + tf.nn.l2_loss(dnc.W2) +
272+
tf.nn.l2_loss(dnc.b1) + tf.nn.l2_loss(dnc.b2))
273+
# to help the loss convergence faster
274+
loss += 5e-4 * regularizers
275+
276+
optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
277+
278+
# output= nn_output+interface
279+
tf.initialize_all_variables().run()
280+
final_i_data = np.concatenate((seq, zer), axis=0)
281+
final_o_data = np.concatenate((zer, seq), axis=0)
282+
283+
for i in range(0, iterations + 1):
284+
285+
feed_dict = {dnc.i_data: final_i_data, dnc.o_data: final_o_data}
286+
287+
l, _, predictions = sess.run([loss, optimizer, output], feed_dict=feed_dict)
288+
if i % 100 == 0:
289+
print(i, l)
290+
291+
print(final_i_data)
292+
print(final_o_data)
293+
print(predictions)
294+
295+
296+
if __name__ == '__main__':
297+
tf.app.run()

0 commit comments

Comments
 (0)