-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtagSentences.py
More file actions
55 lines (39 loc) · 1.63 KB
/
tagSentences.py
File metadata and controls
55 lines (39 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from flair.models import *
from flair.data import Sentence
class Tagger:
def __init__(self, tagger):
self.tagger = tagger
def get_POSTAGS(self, sent):
words = []
pos_tags = []
for i, label in enumerate(sent.get_labels('pos')):
pos_tags.append(label.value)
words.append(label.data_point.text)
assert len(words) == len(pos_tags)
return words, pos_tags, len(words)
def get_NERTAGS(self, sent, N):
ner_tags = N*['<UNK>']
for label in sent.get_labels('ner'):
extract = str(label)[:14]
start = int(extract.split(']')[0].split('[')[1].split(':')[0])
end = int(extract.split(']')[0].split('[')[1].split(':')[1]) - 1
if start == end:
ner_tags[start] = label.value
else:
ner_tags[start] = label.value
ner_tags[end] = label.value
return ner_tags
def run(self, sentence):
sent = Sentence(sentence)
self.tagger.predict(sent)
words, pos_tags, N = self.get_POSTAGS(sent)
ner_tags = self.get_NERTAGS(sent, N)
return words, pos_tags, ner_tags
if __name__ == '__main__':
tagger = MultiTagger.load(['pos', 'ner'])
tags = Tagger(tagger)
words, pos_tags, ner_tags = tags.run("architecturally, the school has a catholic character. atop the main building\'s gold dome is a golden statue of the virgin mary.")
print('Words: {}'.format(words))
print('POS Tags: {}'.format(pos_tags))
print('NER Tags: {}'.format(ner_tags))
print(len(words), len(pos_tags), len(ner_tags))