1
+ import numpy as np
2
+ import tensorflow as tf
3
+ import os
4
+
5
+ #assosication of different data(text,images and whatever else)
6
+
7
+
8
+ class DNC :
9
+ def __init__ (self , input_size , output_size , seq_len , num_words = 256 , word_size = 64 , num_heads = 4 ):
10
+ # define data
11
+ # input data - [[1 0] [0 1] [0 0] [0 0]]
12
+ self .input_size = input_size # X
13
+ # output data [[0 0] [0 0] [1 0] [0 1]]
14
+ self .output_size = output_size # Y
15
+
16
+ # define read + write (heads) vector size--->> size of memory matrix=N*W
17
+ # 10
18
+ self .num_words = num_words # N
19
+ # 4 characters
20
+ self .word_size = word_size # W
21
+
22
+ # define number of read+write heads
23
+ self .num_heads = num_heads # R
24
+
25
+ # size of output vector from controller that defines interactions with memory matrix
26
+ self .interface_size = num_heads * word_size + 3 * word_size + 5 * num_heads + 3
27
+
28
+ # the actual size of the neural network input after flatenning and
29
+ # concatenating the input vector with the previously read vctors from memory
30
+ self .nn_input_size = num_heads * word_size + input_size
31
+
32
+ # size of output
33
+ self .nn_output_size = output_size + self .interface_size
34
+
35
+ # gaussian normal distribution for both outputs
36
+ self .nn_out = tf .truncated_normal ([1 , self .output_size ], stddev = 0.1 )
37
+ self .interface_vec = tf .truncated_normal ([1 , self .interface_size ], stddev = 0.1 )
38
+
39
+ # Create MEMORY matrix
40
+ self .mem_mat = tf .zeros ([num_words , word_size ]) # N*W
41
+
42
+ # other variables
43
+ # The usage vector records which locations have been used so far,
44
+ self .usage_vec = tf .fill ([num_words , 1 ], 1e-6 ) # N*1
45
+ # a temporal link matrix records the order in which locations were written;
46
+ self .link_mat = tf .zeros ([num_words , num_words ]) # N*N
47
+ # represents degrees to which last location was written to
48
+ self .precedence_weight = tf .zeros ([num_words , 1 ]) # N*1
49
+
50
+ # Read and write head weights variables
51
+ self .read_weights = tf .fill ([num_words , num_heads ], 1e-6 ) # N*R
52
+ self .write_weights = tf .fill ([num_words , 1 ], 1e-6 ) # N*1
53
+ self .read_vecs = tf .fill ([num_heads , word_size ], 1e-6 ) # R*W
54
+
55
+
56
+ ###NETWORK VARIABLES
57
+ # gateways into the computation graph for input output pairs
58
+ self .i_data = tf .placeholder (tf .float32 , [seq_len * 2 , self .input_size ], name = 'input_node' )
59
+ self .o_data = tf .placeholder (tf .float32 , [seq_len * 2 , self .output_size ], name = 'output_node' )
60
+
61
+ # 2 layer feedforwarded network
62
+ self .W1 = tf .Variable (tf .truncated_normal ([self .nn_input_size , 32 ], stddev = 0.1 ), name = 'layer1_weights' ,
63
+ dtype = tf .float32 )
64
+ self .b1 = tf .Variable (tf .zeros ([32 ]), name = 'layer1_bias' , dtype = tf .float32 )
65
+ self .W2 = tf .Variable (tf .truncated_normal ([32 , self .nn_output_size ], stddev = 0.1 ), name = 'layer2_weights' ,
66
+ dtype = tf .float32 )
67
+ self .b2 = tf .Variable (tf .zeros ([self .nn_output_size ]), name = 'layer2_bias' , dtype = tf .float32 )
68
+
69
+ ###DNC OUTPUT WEIGHTS
70
+ self .nn_out_weights = tf .Variable (tf .truncated_normal ([self .nn_output_size , self .output_size ], stddev = 0.1 ),
71
+ name = 'net_output_weights' )
72
+ self .interface_weights = tf .Variable (
73
+ tf .truncated_normal ([self .nn_output_size , self .interface_size ], stddev = 0.1 ), name = 'interface_weights' )
74
+
75
+ self .read_vecs_out_weight = tf .Variable (
76
+ tf .truncated_normal ([self .num_heads * self .word_size , self .output_size ], stddev = 0.1 ),
77
+ name = 'read_vector_weights' )
78
+
79
+ # 3 attention mechanisms for read/writes to memory
80
+
81
+ # 1
82
+ # a key vector emitted by the controller is compared to the
83
+ # content of each location in memory according to a similarity measure
84
+ # The similarity scores determine a weighting that can be used by the read heads
85
+ # for associative recall1 or by the write head to modify an existing vector in memory.
86
+ def content_lookup (self , key , str ):
87
+ # The l2 norm of a vector is the square root of the sum of the
88
+ # absolute values squared
89
+ norm_mem = tf .nn .l2_normalize (self .mem_mat , 1 ) # N*W
90
+ norm_key = tf .nn .l2_normalize (key , 0 ) # 1*W for write or R*W for read
91
+ # get similarity measure between both vectors, transpose before multiplicaiton
92
+ ##(N*W,W*1)->N*1 for write
93
+ # (N*W,W*R)->N*R for read
94
+ sim = tf .matmul (norm_mem , norm_key , transpose_b = True )
95
+ # str is 1*1 or 1*R
96
+ # returns similarity measure
97
+ return tf .nn .softmax (sim * str , 0 ) # N*1 or N*R
98
+
99
+ # 2
100
+ # retreives the writing allocation weighting based on the usage free list
101
+ # The ‘usage’ of each location is represented as a number between 0 and 1,
102
+ # and a weighting that picks out unused locations is delivered to the write head.
103
+
104
+ # independent of the size and contents of the memory, meaning that
105
+ # DNCs can be trained to solve a task using one size of memory and later
106
+ # upgraded to a larger memory without retraining
107
+ def allocation_weighting (self ):
108
+ # sorted usage - the usage vector sorted ascndingly
109
+ # the original indices of the sorted usage vector
110
+ sorted_usage_vec , free_list = tf .nn .top_k (- 1 * self .usage_vec , k = self .num_words )
111
+ sorted_usage_vec *= - 1
112
+ cumprod = tf .cumprod (sorted_usage_vec , axis = 0 , exclusive = True )
113
+ unorder = (1 - sorted_usage_vec ) * cumprod
114
+
115
+ alloc_weights = tf .zeros ([self .num_words ])
116
+ I = tf .constant (np .identity (self .num_words , dtype = np .float32 ))
117
+
118
+ # for each usage vec
119
+ for pos , idx in enumerate (tf .unstack (free_list [0 ])):
120
+ # flatten
121
+ m = tf .squeeze (tf .slice (I , [idx , 0 ], [1 , - 1 ]))
122
+ # add to weight matrix
123
+ alloc_weights += m * unorder [0 , pos ]
124
+ # the allocation weighting for each row in memory
125
+ return tf .reshape (alloc_weights , [self .num_words , 1 ])
126
+
127
+ # at every time step the controller receives input vector from dataset and emits output vector.
128
+ # it also recieves a set of read vectors from the memory matrix at the previous time step via
129
+ # the read heads. then it emits an interface vector that defines its interactions with the memory
130
+ # at the current time step
131
+ def step_m (self , x ):
132
+
133
+ # reshape input
134
+ input = tf .concat ([x , tf .reshape (self .read_vecs , [1 , self .num_heads * self .word_size ])], 1 )
135
+
136
+ # forward propagation
137
+ l1_out = tf .matmul (input , self .W1 ) + self .b1
138
+ l1_act = tf .nn .tanh (l1_out )
139
+ l2_out = tf .matmul (l1_act , self .W2 ) + self .b2
140
+ l2_act = tf .nn .tanh (l2_out )
141
+
142
+ # output vector
143
+ self .nn_out = tf .matmul (l2_act , self .nn_out_weights ) # (1*eta+Y, eta+Y*Y)->(1*Y)
144
+ # interaction vector - how to interact with memory
145
+ self .interface_vec = tf .matmul (l2_act , self .interface_weights ) # (1*eta+Y, eta+Y*eta)->(1*eta)
146
+
147
+ partition = tf .constant (
148
+ [[0 ] * (self .num_heads * self .word_size ) + [1 ] * (self .num_heads ) + [2 ] * (self .word_size ) + [3 ] + \
149
+ [4 ] * (self .word_size ) + [5 ] * (self .word_size ) + \
150
+ [6 ] * (self .num_heads ) + [7 ] + [8 ] + [9 ] * (self .num_heads * 3 )], dtype = tf .int32 )
151
+
152
+ # convert interface vector into a set of read write vectors
153
+ # using tf.dynamic_partitions(Partitions interface_vec into 10 tensors using indices from partition)
154
+ (read_keys , read_str , write_key , write_str ,
155
+ erase_vec , write_vec , free_gates , alloc_gate , write_gate , read_modes ) = \
156
+ tf .dynamic_partition (self .interface_vec , partition , 10 )
157
+
158
+ # read vectors
159
+ read_keys = tf .reshape (read_keys , [self .num_heads , self .word_size ]) # R*W
160
+ read_str = 1 + tf .nn .softplus (tf .expand_dims (read_str , 0 )) # 1*R
161
+
162
+ # write vectors
163
+ write_key = tf .expand_dims (write_key , 0 ) # 1*W
164
+ # help init our write weights
165
+ write_str = 1 + tf .nn .softplus (tf .expand_dims (write_str , 0 )) # 1*1
166
+ erase_vec = tf .nn .sigmoid (tf .expand_dims (erase_vec , 0 )) # 1*W
167
+ write_vec = tf .expand_dims (write_vec , 0 ) # 1*W
168
+
169
+ # the degree to which locations at read heads will be freed
170
+ free_gates = tf .nn .sigmoid (tf .expand_dims (free_gates , 0 )) # 1*R
171
+ # the fraction of writing that is being allocated in a new location
172
+ alloc_gate = tf .nn .sigmoid (alloc_gate ) # 1
173
+ # the amount of information to be written to memory
174
+ write_gate = tf .nn .sigmoid (write_gate ) # 1
175
+ # the softmax distribution between the three read modes (backward, forward, lookup)
176
+ # The read heads can use gates called read modes to switch between content lookup
177
+ # using a read key and reading out locations either forwards or backwards
178
+ # in the order they were written.
179
+ read_modes = tf .nn .softmax (tf .reshape (read_modes , [3 , self .num_heads ])) # 3*R
180
+
181
+
182
+ #WRITE
183
+ # used to calculate usage vector, what's available to write to?
184
+ retention_vec = tf .reduce_prod (1 - free_gates * self .read_weights , reduction_indices = 1 )
185
+ # used to dynamically allocate memory
186
+ self .usage_vec = (self .usage_vec + self .write_weights - self .usage_vec * self .write_weights ) * retention_vec
187
+
188
+ ##retreives the writing allocation weighting
189
+ alloc_weights = self .allocation_weighting () # N*1
190
+ # where to write to??
191
+ write_lookup_weights = self .content_lookup (write_key , write_str ) # N*1
192
+ # define our write weights now that we know how much space to allocate for them and where to write to
193
+ self .write_weights = write_gate * (alloc_gate * alloc_weights + (1 - alloc_gate ) * write_lookup_weights )
194
+
195
+ # write erase, then write to memory!
196
+ self .mem_mat = self .mem_mat * (1 - tf .matmul (self .write_weights , erase_vec )) + \
197
+ tf .matmul (self .write_weights , write_vec )
198
+
199
+ # As well as writing, the controller can read from multiple locations in memory.
200
+ # Memory can be searched based on the content of each location, or the associative
201
+ # temporal links can be followed forward and backward to recall information written
202
+ # in sequence or in reverse. (3rd attention mechanism)
203
+
204
+
205
+ # updates and returns the temporal link matrix for the latest write
206
+ # given the precedence vector and the link matrix from previous step
207
+ nnweight_vec = tf .matmul (self .write_weights , tf .ones ([1 , self .num_words ])) # N*N
208
+ self .link_mat = (1 - nnweight_vec - tf .transpose (nnweight_vec )) * self .link_mat + \
209
+ tf .matmul (self .write_weights , self .precedence_weight , transpose_b = True )
210
+ self .link_mat *= tf .ones ([self .num_words , self .num_words ]) - tf .constant (
211
+ np .identity (self .num_words , dtype = np .float32 ))
212
+
213
+
214
+ self .precedence_weight = (1 - tf .reduce_sum (self .write_weights , reduction_indices = 0 )) * \
215
+ self .precedence_weight + self .write_weights
216
+
217
+
218
+ #READ
219
+ # 3 modes - forward, backward, content lookup
220
+ forw_w = read_modes [2 ] * tf .matmul (self .link_mat , self .read_weights ) # (N*N,N*R)->N*R
221
+ look_w = read_modes [1 ] * self .content_lookup (read_keys , read_str ) # N*R
222
+ back_w = read_modes [0 ] * tf .matmul (self .link_mat , self .read_weights , transpose_a = True ) # N*R
223
+
224
+ # use them to initialize read weights
225
+ self .read_weights = back_w + look_w + forw_w # N*R
226
+ # create read vectors by applying read weights to memory matrix
227
+ self .read_vecs = tf .transpose (tf .matmul (self .mem_mat , self .read_weights , transpose_a = True )) # (W*N,N*R)^T->R*W
228
+
229
+ # multiply them together
230
+ read_vec_mut = tf .matmul (tf .reshape (self .read_vecs , [1 , self .num_heads * self .word_size ]),
231
+ self .read_vecs_out_weight ) # (1*RW, RW*Y)-> (1*Y)
232
+
233
+ # return output + read vecs product
234
+ return self .nn_out + read_vec_mut
235
+
236
+ # output list of numbers (one hot encoded) by running the step function
237
+ def run (self ):
238
+ big_out = []
239
+ for t , seq in enumerate (tf .unstack (self .i_data , axis = 0 )):
240
+ seq = tf .expand_dims (seq , 0 )
241
+ y = self .step_m (seq )
242
+ big_out .append (y )
243
+ return tf .stack (big_out , axis = 0 )
244
+
245
+
246
+ def main (argv = None ):
247
+ # generate the input output sequences, randomly intialized
248
+ num_seq = 10
249
+ seq_len = 6
250
+ seq_width = 4
251
+ iterations = 1000
252
+ con = np .random .randint (0 , seq_width , size = seq_len )
253
+ seq = np .zeros ((seq_len , seq_width ))
254
+ seq [np .arange (seq_len ), con ] = 1
255
+ end = np .asarray ([[- 1 ] * seq_width ])
256
+ zer = np .zeros ((seq_len , seq_width ))
257
+
258
+ graph = tf .Graph ()
259
+
260
+ with graph .as_default ():
261
+
262
+ with tf .Session () as sess :
263
+ #
264
+ dnc = DNC (input_size = seq_width , output_size = seq_width , seq_len = seq_len , num_words = 10 , word_size = 4 ,
265
+ num_heads = 1 )
266
+
267
+
268
+ output = tf .squeeze (dnc .run ())
269
+ loss = tf .reduce_mean (tf .nn .sigmoid_cross_entropy_with_logits (logits = output , labels = dnc .o_data ))
270
+
271
+ regularizers = (tf .nn .l2_loss (dnc .W1 ) + tf .nn .l2_loss (dnc .W2 ) +
272
+ tf .nn .l2_loss (dnc .b1 ) + tf .nn .l2_loss (dnc .b2 ))
273
+ # to help the loss convergence faster
274
+ loss += 5e-4 * regularizers
275
+
276
+ optimizer = tf .train .AdamOptimizer (learning_rate = 0.001 ).minimize (loss )
277
+
278
+ # output= nn_output+interface
279
+ tf .initialize_all_variables ().run ()
280
+ final_i_data = np .concatenate ((seq , zer ), axis = 0 )
281
+ final_o_data = np .concatenate ((zer , seq ), axis = 0 )
282
+
283
+ for i in range (0 , iterations + 1 ):
284
+
285
+ feed_dict = {dnc .i_data : final_i_data , dnc .o_data : final_o_data }
286
+
287
+ l , _ , predictions = sess .run ([loss , optimizer , output ], feed_dict = feed_dict )
288
+ if i % 100 == 0 :
289
+ print (i , l )
290
+
291
+ print (final_i_data )
292
+ print (final_o_data )
293
+ print (predictions )
294
+
295
+
296
+ if __name__ == '__main__' :
297
+ tf .app .run ()
0 commit comments