From 528da5f712c575b4d123e67851381bbafbc0d225 Mon Sep 17 00:00:00 2001 From: sam2k13 Date: Sun, 9 Jan 2022 16:42:06 -0500 Subject: [PATCH 1/3] fix --- neuralcoref/neuralcoref.pyx | 13 +++++++++---- test.py | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 test.py diff --git a/neuralcoref/neuralcoref.pyx b/neuralcoref/neuralcoref.pyx index a137d50..4986cdc 100644 --- a/neuralcoref/neuralcoref.pyx +++ b/neuralcoref/neuralcoref.pyx @@ -271,10 +271,15 @@ def get_resolved(doc, clusters): resolved = list(tok.text_with_ws for tok in doc) for cluster in clusters: for coref in cluster: - if coref != cluster.main: - resolved[coref.start] = cluster.main.text + doc[coref.end-1].whitespace_ - for i in range(coref.start+1, coref.end): - resolved[i] = "" + # 1. Don't replace words with PRONOMINALS since they don't improve anything. + if coref != cluster.main and get_span_type(cluster.main) != MENTION_TYPE["PRONOMINAL"]: + # 2. Don't replace "Determiners" since it just creates grammatcal errors. + has_det = any(tok.pos_ == 'DET' for tok in coref) + print (coref, has_det) + if not has_det: + resolved[coref.start] = cluster.main.text + doc[coref.end-1].whitespace_ + for i in range(coref.start+1, coref.end): + resolved[i] = "" return ''.join(resolved) ################################################# diff --git a/test.py b/test.py new file mode 100644 index 0000000..693e3f4 --- /dev/null +++ b/test.py @@ -0,0 +1,18 @@ +import spacy +nlp = spacy.load('en') + +# doc = nlp(u'She likes her classes') + +# for token in doc: +# print(token.text, token.pos_ == 'DET') + +# Add neural coref to SpaCy's pipe +import neuralcoref +neuralcoref.add_to_pipe(nlp) + +# You're done. You can now use NeuralCoref as you usually manipulate a SpaCy document annotations. +doc = nlp(u'Joe is really cool. He received his initial military training during the French and Indian War.') + +doc._.has_coref +doc._.coref_clusters +print(doc._.coref_resolved) \ No newline at end of file From f91e5cfa42466e05059e59a2e3da48e7dbfee783 Mon Sep 17 00:00:00 2001 From: Sam Kleist Date: Sun, 9 Jan 2022 16:50:23 -0500 Subject: [PATCH 2/3] Update neuralcoref.pyx --- neuralcoref/neuralcoref.pyx | 3 --- 1 file changed, 3 deletions(-) diff --git a/neuralcoref/neuralcoref.pyx b/neuralcoref/neuralcoref.pyx index 4986cdc..36dcdbd 100644 --- a/neuralcoref/neuralcoref.pyx +++ b/neuralcoref/neuralcoref.pyx @@ -271,11 +271,8 @@ def get_resolved(doc, clusters): resolved = list(tok.text_with_ws for tok in doc) for cluster in clusters: for coref in cluster: - # 1. Don't replace words with PRONOMINALS since they don't improve anything. if coref != cluster.main and get_span_type(cluster.main) != MENTION_TYPE["PRONOMINAL"]: - # 2. Don't replace "Determiners" since it just creates grammatcal errors. has_det = any(tok.pos_ == 'DET' for tok in coref) - print (coref, has_det) if not has_det: resolved[coref.start] = cluster.main.text + doc[coref.end-1].whitespace_ for i in range(coref.start+1, coref.end): From 6669867461d34ff2acac059b6dfd9edad4e9d59c Mon Sep 17 00:00:00 2001 From: Sam Kleist Date: Sun, 9 Jan 2022 16:51:31 -0500 Subject: [PATCH 3/3] Delete test.py --- test.py | 18 ------------------ 1 file changed, 18 deletions(-) delete mode 100644 test.py diff --git a/test.py b/test.py deleted file mode 100644 index 693e3f4..0000000 --- a/test.py +++ /dev/null @@ -1,18 +0,0 @@ -import spacy -nlp = spacy.load('en') - -# doc = nlp(u'She likes her classes') - -# for token in doc: -# print(token.text, token.pos_ == 'DET') - -# Add neural coref to SpaCy's pipe -import neuralcoref -neuralcoref.add_to_pipe(nlp) - -# You're done. You can now use NeuralCoref as you usually manipulate a SpaCy document annotations. -doc = nlp(u'Joe is really cool. He received his initial military training during the French and Indian War.') - -doc._.has_coref -doc._.coref_clusters -print(doc._.coref_resolved) \ No newline at end of file