Skip to content

Commit 9804687

Browse files
authored
Merge pull request #8 from zhangbo2008/master
Add files via upload
2 parents cbb1745 + be106ae commit 9804687

File tree

2 files changed

+76
-76
lines changed

2 files changed

+76
-76
lines changed

examples/cnn_setence_classification/text_cnn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter
8181
pooled_outputs.append(pool_output) # [None, 1, 1, num_filters]
8282
# Combine all pooled features
8383
num_filters_total = num_filters * len(filter_sizes)
84-
self.h_pool = tf.concat(3, pooled_outputs) # [None, 1, 1, num_filters_total]
84+
self.h_pool = tf.concat( pooled_outputs,3) # [None, 1, 1, num_filters_total]
8585
self.h_pool_flat = tf.reshape(self.h_pool, shape=[-1, num_filters_total]) # [None, num_filters_total]
8686

8787
# The dropout layer
@@ -100,7 +100,7 @@ def __init__(self, seq_len, vocab_size, embedding_size, filter_sizes, num_filter
100100

101101
# The loss
102102
with tf.name_scope("loss"):
103-
losses = tf.nn.softmax_cross_entropy_with_logits(self.scores, self.y)
103+
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.y)
104104
self.loss = tf.reduce_mean(losses) + L2_loss * l2_reg_lambda
105105

106106
# Accuracy
Lines changed: 74 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,74 @@
1-
"""
2-
Test the TextRNN class
3-
2016/12/22
4-
"""
5-
import os
6-
import sys
7-
import numpy as np
8-
import tensorflow as tf
9-
from sklearn.model_selection import train_test_split
10-
from tensorflow.contrib import learn
11-
12-
from data_helpers import load_data_and_labels, batch_iter
13-
from text_cnn import TextCNN
14-
15-
16-
# Load original data
17-
path = sys.path[0]
18-
pos_filename = path + "/data/rt-polarity.pos"
19-
neg_filename = path + "/data/rt-polarity.neg"
20-
21-
X_data, y_data = load_data_and_labels(pos_filename, neg_filename)
22-
max_document_length = max([len(sen.split(" ")) for sen in X_data])
23-
print("Max_document_length:,", max_document_length)
24-
# Create the vacabulary
25-
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
26-
# The idx data
27-
x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)
28-
y = np.array(y_data, dtype=np.int32)
29-
vocabulary_size = len(vocab_processor.vocabulary_)
30-
print("The size of vocabulary:", vocabulary_size)
31-
# Split the data
32-
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)
33-
print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))
34-
print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))
35-
36-
# The parameters of RNN
37-
seq_len = X_train.shape[1]
38-
vocab_size = vocabulary_size
39-
embedding_size = 128
40-
filter_sizes = [2, 3, 4]
41-
num_filters = 128
42-
num_classes = y_train.shape[1]
43-
l2_reg_lambda = 0.0
44-
45-
# Construct RNN model
46-
text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=
47-
filter_sizes, num_filters=num_filters, num_classes=num_classes)
48-
loss = text_rnn_model.loss
49-
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
50-
accuracy = text_rnn_model.accuracy
51-
# The parameters for training
52-
batch_size = 64
53-
training_epochs = 10
54-
dispaly_every = 1
55-
dropout_keep_prob = 0.5
56-
57-
batch_num = int(X_train.shape[0]/batch_size)
58-
59-
sess = tf.Session()
60-
sess.run(tf.global_variables_initializer())
61-
print("Starting training...")
62-
for epoch in range(training_epochs):
63-
avg_cost = 0
64-
for batch in range(batch_num):
65-
_, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],
66-
text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],
67-
text_rnn_model.dropout_keep_prob:dropout_keep_prob})
68-
avg_cost += cost
69-
if epoch % dispaly_every == 0:
70-
cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,
71-
text_rnn_model.y: y_test,
72-
text_rnn_model.dropout_keep_prob: 1.0})
73-
print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))
74-
1+
"""
2+
Test the TextRNN class
3+
2016/12/22
4+
"""
5+
import os
6+
import sys
7+
import numpy as np
8+
import tensorflow as tf
9+
from sklearn.model_selection import train_test_split
10+
from tensorflow.contrib import learn
11+
12+
from data_helpers import load_data_and_labels, batch_iter
13+
from text_cnn import TextCNN
14+
import pudb;pu.db
15+
16+
# Load original data
17+
path = sys.path[0]
18+
pos_filename = path + "/data/rt-polarity.pos"
19+
neg_filename = path + "/data/rt-polarity.neg"
20+
21+
X_data, y_data = load_data_and_labels(pos_filename, neg_filename)
22+
max_document_length = max([len(sen.split(" ")) for sen in X_data])
23+
print("Max_document_length:,", max_document_length)
24+
# Create the vacabulary
25+
vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
26+
# The idx data
27+
x = np.array(list(vocab_processor.fit_transform(X_data)), dtype=np.float32)
28+
y = np.array(y_data, dtype=np.int32)
29+
vocabulary_size = len(vocab_processor.vocabulary_)
30+
print("The size of vocabulary:", vocabulary_size)
31+
# Split the data
32+
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=1111)
33+
print("X_train shape {0}, y_train shape {1}".format(X_train.shape, y_train.shape))
34+
print("X_test shape {0}, y_test shape {1}".format(X_test.shape, y_test.shape))
35+
36+
# The parameters of RNN
37+
seq_len = X_train.shape[1]
38+
vocab_size = vocabulary_size
39+
embedding_size = 128
40+
filter_sizes = [2, 3, 4]
41+
num_filters = 128
42+
num_classes = y_train.shape[1]
43+
l2_reg_lambda = 0.0
44+
45+
# Construct RNN model
46+
text_rnn_model = TextCNN(seq_len=seq_len, vocab_size=vocab_size, embedding_size=embedding_size, filter_sizes=
47+
filter_sizes, num_filters=num_filters, num_classes=num_classes)
48+
loss = text_rnn_model.loss
49+
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
50+
accuracy = text_rnn_model.accuracy
51+
# The parameters for training
52+
batch_size = 64
53+
training_epochs = 10
54+
dispaly_every = 1
55+
dropout_keep_prob = 0.5
56+
57+
batch_num = int(X_train.shape[0]/batch_size)
58+
59+
sess = tf.Session()
60+
sess.run(tf.global_variables_initializer())
61+
print("Starting training...")
62+
for epoch in range(training_epochs):
63+
avg_cost = 0
64+
for batch in range(batch_num):
65+
_, cost = sess.run([train_op, loss], feed_dict={text_rnn_model.x: X_train[batch*batch_size:(batch+1)*batch_size],
66+
text_rnn_model.y: y_train[batch*batch_size:(batch+1)*batch_size],
67+
text_rnn_model.dropout_keep_prob:dropout_keep_prob})
68+
avg_cost += cost
69+
if epoch % dispaly_every == 0:
70+
cost, acc = sess.run([loss, accuracy], feed_dict={text_rnn_model.x: X_test,
71+
text_rnn_model.y: y_test,
72+
text_rnn_model.dropout_keep_prob: 1.0})
73+
print("\nEpoch {0} : loss {1}, accuracy {2}".format(epoch, cost, acc))
74+

0 commit comments

Comments
 (0)