5
5
import itertools
6
6
from sklearn .metrics import f1_score , accuracy_score
7
7
8
+
8
9
def evaluate (net , x_data , y_data , seg_ind , batched_len_list , opt ):
9
10
net .eval ()
10
11
batch_size_eval = opt .batch_size_eval
@@ -53,14 +54,11 @@ def evaluate (net, x_data, y_data, seg_ind, batched_len_list, opt):
53
54
54
55
55
56
hidden = net .init_hidden (bs )
56
-
57
57
output_2d , hidden = net ( x_batch_s , hidden , sorted_vals )
58
58
59
59
pack_y = torch .nn .utils .rnn .pack_padded_sequence (y_batch_s , sorted_vals )
60
60
unpacked_y , unpacked_len = torch .nn .utils .rnn .pad_packed_sequence (pack_y )
61
61
62
-
63
-
64
62
sorted_vals = torch .LongTensor (sorted_vals )
65
63
if opt .USE_CUDA == True :
66
64
sorted_vals = sorted_vals .cuda ()
@@ -83,12 +81,9 @@ def evaluate (net, x_data, y_data, seg_ind, batched_len_list, opt):
83
81
all_labels_paths .extend ( sent_batch_labels )
84
82
all_seg_inds .extend (sent_batch_seg_inds )
85
83
86
-
87
-
88
84
print ('Evaluating batch' , i )
89
85
90
86
new_segments = []
91
-
92
87
for segment in all_segments :
93
88
temp_seg = [(0 ,segment [0 ]- 1 )]
94
89
for j , val in enumerate (segment [1 :],1 ):
@@ -101,7 +96,7 @@ def evaluate (net, x_data, y_data, seg_ind, batched_len_list, opt):
101
96
all_seg_inds ,
102
97
new_segments )
103
98
104
- #Flatten to calculate f1 score on one run for word level
99
+ #Flatten to calculate acc score on one run for word level
105
100
all_words_labels_flat = list (itertools .chain .from_iterable (all_words_labels ))
106
101
all_words_preds_flat = list (itertools .chain .from_iterable (all_words_preds ))
107
102
@@ -111,12 +106,10 @@ def evaluate (net, x_data, y_data, seg_ind, batched_len_list, opt):
111
106
print ('F1 Tokenization' , F1_tok )
112
107
print ('Word level Accuracy: ' , word_acc )
113
108
114
- # print ('Character level F1 score: ' , f1_char)
115
109
return F1_pos_seg
116
110
117
111
118
112
def convert_to_word (all_labels_paths , all_char_paths , seg_ind_s , segments_predicted ):
119
-
120
113
word_2d_labels = []
121
114
word_2d_preds = []
122
115
count_correct_pos_seg = 0
@@ -133,7 +126,6 @@ def convert_to_word(all_labels_paths, all_char_paths, seg_ind_s, segments_predic
133
126
start_ind = j + 1
134
127
idx_list .append (word_range )
135
128
136
-
137
129
char_seg = all_char_paths [i ]
138
130
segments = [ char_seg [s :(e + 1 )] for s ,e in idx_list ]
139
131
word_2d_preds .append ( [ Counter (seg ).most_common ()[0 ][0 ] for seg in segments ] )
@@ -160,9 +152,4 @@ def convert_to_word(all_labels_paths, all_char_paths, seg_ind_s, segments_predic
160
152
pos_seg_recall = count_correct_pos_seg / total_clean_tokens
161
153
F1_pos_seg = (2 * pos_seg_prec * pos_seg_recall ) / (pos_seg_prec + pos_seg_recall )
162
154
163
-
164
- # print ('Tokenization recall' , token_recall)
165
- # print ('Tokenization precision' , token_prec)
166
- # print ('F1 Score Tokenization', F1_tok )
167
- # print ('F1 Score POS & Seg', F1_pos_seg)
168
155
return F1_pos_seg , F1_tok , word_2d_labels , word_2d_preds
0 commit comments