Skip to content

Commit a99db24

Browse files
committed
convert data[6-8].json to example[1-3].ann (standoff)
-overlapping/nested TBMs not handled yet
1 parent f18c5c0 commit a99db24

File tree

7 files changed

+162
-133
lines changed

7 files changed

+162
-133
lines changed
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
un lock able
1+
unlockable
22
T1 BOUND 0 2 un
3-
T2 FREE 4 7 lock
4-
T3 BOUND 8 12 able
3+
T2 FREE 2 6 lock
4+
T3 BOUND 6 10 able
55
R1 unlock controlled:T1 controller:T2
66
R2 unlockable controller:R1 controlled:T3

data/example2.ann

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
1-
Induction of p21 by p53 following DNA damage inhibits both Cdk4 and Cdk2 activities
2-
T1 Gene_or_gene_product 13 16 p21
3-
T2 Gene_or_gene_product 20 23 p53
4-
T3 BioProcess 34 44 DNA damage
5-
T4 Gene_or_gene_product 59 63 Cdk4
6-
T5 Gene_or_gene_product 68 72 Cdk2
7-
T6 Positive_activation 0 9 Induction
8-
T7 Negative_regulation 45 53 inhibits
9-
E1 Positive_activation:T6 Controller:T2 Controlled:T1
10-
E2 Negative_regulation::T7 Controller:E1 Controlled:T4
11-
E3 Negative_regulation::T7 Controller:E1 Controlled:T5
1+
unlockable
2+
T1 BOUND 0 2 un
3+
T2 FREE 2 6 lock
4+
T3 BOUND 6 10 able
5+
R1 lockable controlled:T2 controller:T3
6+
R2 unlockable controller:T1 controlled:R1

data/example3.ann

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Induction of p21 by p53 following DNA damage inhibits both Cdk4 and Cdk2 activities.
2+
T1 Gene_or_gene_product 13 16 p21
3+
T2 Gene_or_gene_product 20 23 p53
4+
T3 BioProcess 34 44 DNA damage
5+
T4 Gene_or_gene_product 59 63 Cdk4
6+
T5 Gene_or_gene_product 68 72 Cdk2
7+
T6 Positive_activation 0 9 Induction
8+
T7 Negative_regulation 45 53 inhibits
9+
E1 Positive_activation:T6 Controller:T2 Controlled:T1
10+
E2 Negative_regulation::T7 Controller:E1 Controlled:T4
11+
E3 Negative_regulation::T7 Controller:E1 Controlled:T5

index.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@
3030
<div id="brat-input" class="modal">
3131
<div>
3232
<header>
33-
<span class="tab active">Custom input in .ann format</span>
33+
<span class="tab active">Load file</span>
3434
</header>
3535
<div class="page active">
3636
<div>
37-
<button><label for="file-input">Load .ann file</label></button>
37+
<button><label for="file-input">Upload</label></button>
3838
<input type="file" id="file-input" style="display:none;">
3939
</div>
4040
<textarea></textarea>

js/ann-parser.js

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ const parseAnn = (function() {
3636

3737
const re = /:+(?=[TER]\d+$)/;
3838

39-
function parseTextBoundMention(tokens, text) {
39+
function parseTextBoundMention(tokens, textLength) {
4040
const id = +tokens[0].slice(1),
4141
label = tokens[1],
4242
charStart = +tokens[2],
4343
charEnd = +tokens[3];
4444

45-
if (id > 0 && charStart >= 0 && charStart < charEnd && charEnd < text.length) {
45+
if (id > 0 && charStart >= 0 && charStart < charEnd && charEnd <= textLength) {
4646
return new TextBoundMention('T' + id, label, charStart, charEnd);
4747
}
4848
}
@@ -113,6 +113,8 @@ const parseAnn = (function() {
113113
relations: [],
114114
attributes: [],
115115
unparsedLines: [],
116+
text: null,
117+
tokens: [],
116118
mentions: {}
117119
}
118120

@@ -124,6 +126,7 @@ const parseAnn = (function() {
124126
return output;
125127
}
126128

129+
let textLength = text.length;
127130
let unparsedLines = [];
128131
let mentions = {};
129132

@@ -147,31 +150,35 @@ const parseAnn = (function() {
147150

148151
switch (tokens[0].charAt(0)) {
149152
case 'T':
150-
let tbm = parseTextBoundMention(tokens, text);
153+
let tbm = parseTextBoundMention(tokens, textLength);
151154
if (tbm) {
152155
output.texts.push(tbm);
153156
mentions[tbm.id] = tbm;
157+
parseIsSuccessful = true;
154158
}
155159
break;
156160
case 'E':
157161
let em = parseEventMention(tokens, mentions);
158162
if (em) {
159163
output.events.push(em);
160164
mentions[em.id] = em;
165+
parseIsSuccessful = true;
161166
}
162167
break;
163168
case 'R':
164169
let rm = parseRelationMention(tokens, mentions);
165170
if (rm) {
166171
output.relations.push(rm);
167172
mentions[rm.id] = rm;
173+
parseIsSuccessful = true;
168174
}
169175
break;
170176
case 'A':
171177
let a = parseAttribute(tokens, mentions);
172178
if (a) {
173179
output.attributes.push(a);
174180
mentions[a.id] = a;
181+
parseIsSuccessful = true;
175182
}
176183
break;
177184
}
@@ -181,6 +188,58 @@ const parseAnn = (function() {
181188
}
182189
}
183190

191+
// split text into tokens
192+
output.texts.sort((a,b) => {
193+
if (a.charEnd - b.charEnd != 0) {
194+
return a.charEnd - b.charEnd;
195+
}
196+
else {
197+
return a.charStart - b.charStart;
198+
}
199+
});
200+
201+
let tokens = [];
202+
let tbm_i = 0;
203+
let token_start = 0;
204+
for (let ch = 0; ch < textLength; ++ch) {
205+
let tbm = output.texts[tbm_i];
206+
while (text[token_start] === ' ') {
207+
++token_start;
208+
}
209+
if (tbm && tbm.charStart <= ch) {
210+
tokens.push({
211+
word: text.slice(tbm.charStart, tbm.charEnd),
212+
start: tbm.charStart,
213+
end: tbm.charEnd
214+
});
215+
token_start = tbm.charEnd;
216+
217+
while(output.texts[tbm_i] && output.texts[tbm_i].charStart <= ch){
218+
output.texts[tbm_i].tokenId = tokens.length - 1;
219+
++tbm_i;
220+
}
221+
}
222+
else if (text[ch] === ' ') {
223+
if (token_start < ch) {
224+
tokens.push({
225+
word: text.slice(token_start, ch),
226+
start: token_start,
227+
end: ch
228+
});
229+
token_start = ch + 1;
230+
}
231+
}
232+
}
233+
if (token_start < textLength) {
234+
tokens.push({
235+
word: text.slice(token_start, textLength),
236+
start: token_start,
237+
end: textLength
238+
});
239+
}
240+
241+
output.tokens = tokens;
242+
output.text = text;
184243
output.mentions = mentions;
185244
output.unparsedLines = unparsedLines;
186245

0 commit comments

Comments
 (0)