Skip to content

Commit e316f71

Browse files
committed
2nd attempt at flattening dataset
1 parent decfffb commit e316f71

File tree

2 files changed

+143
-123
lines changed

2 files changed

+143
-123
lines changed

index.html

Lines changed: 138 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,13 @@
186186
annotationStyle: document.querySelector('button.active').id,
187187
setAnnotationStyle: function(type) {
188188
this.annotationStyle = type;
189+
if (type === "POS") {
190+
wordObjs.forEach(w => w.tag = w.data.syntaxData.tag);
191+
}
192+
else {
193+
wordObjs.forEach(w => w.tag = '');
194+
}
195+
redrawWords(wordObjs);
189196
}
190197
};
191198
const buttons = document.querySelectorAll('#header button');
@@ -224,117 +231,155 @@
224231
* - second hierarchy: events--paths | arguments--themes
225232
*/
226233
function parseData(data) {
227-
console.log(data);
228-
229-
let datamap = {}; // map Word instances to indices in data
234+
let wordDataArray = [];
235+
let wordDataMap = {};
236+
let syntaxDataArray = [];
237+
let mentionDataArray = [];
230238

231239
for (var i in data.documents) {
232-
let doc = data.documents[i];
233-
234-
doc.sentences.forEach( function(sentence, j) {
235-
// generate words
236-
sentence.words.forEach( function(word, k) {
237-
let idx = wordObjs.length;
238-
let w = new Word(word, idx);
239-
w.documentId = i;
240-
w.syntaxData = {
241-
tag: sentence.tags[k],
242-
lemma: sentence.lemmas[k],
243-
entity: sentence.entities[k]
244-
}
245-
w.tag = w.syntaxData.tag;
246-
wordObjs.push(w);
247-
datamap[i + '-' + j + '-' + k + '-' + word] = idx;
248-
})
249-
250-
// create links from syntax info
251-
let syntaxType = "stanford-collapsed"; /* "stanford-basic */
252-
sentence.graphs[syntaxType].edges.forEach(function(edge) {
253-
254-
let sourceWord = sentence.words[edge.source],
255-
destinationWord = sentence.words[edge.destination];
256-
257-
let sourceIndex = datamap[i + '-' + j + '-' + edge.source + '-' + sourceWord],
258-
destinationIndex = datamap[i + '-' + j + '-' + edge.destination + '-' + destinationWord];
259-
260-
let style;
261-
switch (edge.relation) {
262-
case 'nsubj':
263-
style = styles.gradientLine1; break;
264-
case 'dobj':
265-
style = styles.gradientLine2; break;
266-
default:
267-
style = styles.noneLine; break;
268-
}
269-
270-
linkObjs.push( new Link(
271-
wordObjs[sourceIndex],
272-
wordObjs[destinationIndex],
273-
sourceIndex < destinationIndex ? 1 : -1,
274-
style,
275-
edge.relation,
276-
texts.linkText
277-
) );
278-
})
279-
})
280-
}
281-
282-
// create links from event info
283-
data.mentions.forEach((mention, i) => {
284-
console.log('\tmention triggered by "' + mention.trigger.text + '"');
285-
console.log('\tevent', mention.trigger.labels[0]);
286-
console.log('\tid', mention.trigger.id);
287-
console.log('\tinterval', mention.tokenInterval.start, mention.tokenInterval.end, '[range of words influenced by event)');
288-
289-
console.log('\n\targuments:');
290-
for (var a in mention.arguments) {
291-
console.log('\t\t' + a);
292-
293-
function listArguments(arg, count) {
294-
let space = new Array(count).join('\t');
295-
console.log(space + ' text', arg.text);
296-
console.log(space + ' label', arg.labels[0]);
297-
console.log(space + ' id', arg.id);
298-
if (arg.arguments) {
299-
console.log(space + ' arguments:')
300-
}
301-
for (var i in arg.arguments) {
302-
console.log(space + '\t' + i);
303-
arg.arguments[i].forEach(l => listArguments(l, count+2));
304-
}
240+
let doc = data.documents[i];
241+
242+
doc.sentences.forEach(function(sentence, j) {
243+
// parse word data
244+
let sentenceData = sentence.words.map(function(word, k) {
245+
246+
let wordDataObject = {
247+
text: word,
248+
documentId: i,
249+
sentenceId: j,
250+
locationInSentence: k,
251+
charLocationInSentence: sentence.startOffsets[k],
252+
syntaxData: {
253+
tag: sentence.tags[k]
305254
}
255+
};
256+
257+
wordDataMap[[i,j,k].join('-')] = wordDataArray.length;
258+
wordDataArray.push(wordDataObject);
259+
return wordDataObject
260+
});
261+
262+
// create POS links
263+
sentence.graphs["stanford-collapsed"].edges.forEach(function(edge) {
264+
syntaxDataArray.push({
265+
destination: sentenceData[edge.destination],
266+
label: edge.relation,
267+
type: edge.relation,
268+
source: sentenceData[edge.source]
269+
})
270+
});
306271

307-
mention.arguments[a].forEach(l => listArguments(l, 4));
308-
}
272+
})
273+
}
309274

310-
console.log('\n\tpaths:');
311-
for (var p in mention.paths) {
312-
console.log('\t\t' + p);
313-
for (var pathId in mention.paths[p]) {
314-
console.log('\t\t\t id ' + pathId);
315-
mention.paths[p][pathId].forEach(path => {
316-
let words = data.documents[mention.document].sentences[mention.sentence].words;
317-
console.log('\t\t\t\t edge', words[path.source] + ' -> ' + path.relation + ' -> ' + words[path.destination]);
318-
})
319-
}
275+
// flatten data.mentions array
276+
let printMention = function(mention, i) {
277+
if (mention.arguments) {
278+
for (var j in mention.arguments) {
279+
mention.arguments[j] = mention.arguments[j].map(printMention)
320280
}
281+
}
282+
283+
switch (mention.type) {
284+
case "CorefTextBoundMention":
285+
// has text(s) only
286+
let start = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.start].join('-')];
287+
let end = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.end].join('-')];
288+
289+
var link = {
290+
sourceId: null,
291+
destinationId: null,
292+
words: wordDataArray.slice(start, end),
293+
label: mention.displayLabel,
294+
id: mention.id,
295+
type: mention.type
296+
};
297+
mentionDataArray.push(link);
298+
return link;
299+
case "CorefRelationMention":
300+
// has argument(s)
301+
// hard-coded the property --- need better data to parse this correctly
302+
303+
let keys = Object.keys(mention.arguments);
304+
if (keys.length != 2 || !mention.arguments.controlled || !mention.arguments.controller) {
305+
console.log("bad data parse: check CorefRelationMention", mention.arguments);
306+
}
307+
var link = {
308+
sourceId: mention.arguments.controller.map(arg => arg.id),
309+
destinationId: {
310+
name: "controlled",
311+
id: mention.arguments.controlled.map(arg => arg.id)
312+
},
313+
label: mention.displayLabel,
314+
id: mention.id,
315+
type: mention.type
316+
};
317+
318+
mentionDataArray.push(link);
319+
return link;
320+
case "CorefEventMention":
321+
// has a trigger & argument(s)
322+
var link = {
323+
sourceId: [mention.trigger.id],
324+
destinationId: Object.keys(mention.arguments).map(key => {
325+
326+
return {
327+
name: key,
328+
id: mention.arguments[key].map(arg => arg.id)
329+
}
330+
331+
}),
332+
label: mention.displayLabel,
333+
id: mention.id,
334+
type: mention.type
335+
};
336+
mentionDataArray.push(link);
337+
return link;
338+
default:
339+
console.log("invalid type", mention.type);
340+
break;
341+
}
342+
}
343+
data.mentions.forEach(printMention);
344+
345+
// done parsing into semi-flat datasets...
346+
347+
console.log(wordDataArray, syntaxDataArray, mentionDataArray);
348+
349+
wordDataArray.forEach(function(word) {
350+
let idx = wordObjs.length;
351+
let w = new Word(word.text, idx);
352+
if (State.annotationStyle == 'POS') {
353+
w.tag = word.syntaxData.tag
354+
}
355+
else {
356+
w.tag = '';
357+
}
358+
w.data = word;
359+
wordObjs.push(w);
321360
})
322361

362+
linkObjs = [];
363+
if (State.annotationStyle == 'POS') {
364+
// syntaxDataArray.forEach
365+
}
366+
else {
367+
mentionDataArray.forEach(function(link) {
368+
369+
})
370+
}
371+
323372

373+
// draw
324374
linkObjs.sort(function(a, b) {
325375
var d1 = Math.abs(a.s.idx - a.e.idx);
326376
var d2 = Math.abs(b.s.idx - b.e.idx);
327377

328378
return d1 - d2;
329379
});
330-
331380
linkObjs.forEach(createLink);
332381

333-
334-
// 2. draw words and boxes around words
335382
drawWords(wordObjs);
336-
337-
// 3. draw each of the links
338383
drawLinks(linkObjs);
339384

340385
changeSizeOfSVGPanel(window.innerWidth - 16, (rows[rows.length - 1].lineBottom.y() ) + 1);

js/render.js

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,40 +24,15 @@ function redrawWords(words) {
2424
words.forEach(function(word) {
2525

2626
if (word.text) {
27-
word.text.remove();
27+
word.text.text(word.val);
2828
}
2929

30-
var textw = 0, tagtextw = 0;
31-
word.text = draw.text(function(add) {
32-
var textwh = getTextWidthAndHeight(word.val, texts.wordText.style);
33-
textw = textwh.w;
34-
35-
add.text(word.val)
36-
.y(word.wy + textpaddingY*2) // - texts.wordText.descent)
37-
.x(word.wx + (word.ww/2) - (textwh.w / 2))
38-
.font(texts.wordText.style);
39-
});
40-
41-
42-
if (word.tagtext) {
43-
word.tagtext.remove();
30+
if (word.tagtext != null) {
31+
word.tagtext.text(word.tag || '');
4432
}
4533

46-
if (word.tag != null) {
47-
tagtextwh = getTextWidthAndHeight(word.tag, texts.tagText.style);
48-
tagtextw = tagtextwh.w;
49-
var tagXPos = word.twx + (word.ww/2) - (tagtextwh.w / 2);
50-
51-
//add in tag text, if the word has an associated tag
52-
word.tagtext = draw.text(function(add) {
53-
54-
add.text(word.tag)
55-
.y(word.wy + textpaddingY/2) // - texts.tagText.descent)
56-
.x(tagXPos)
57-
.font(texts.tagText.style);
58-
});
59-
}
60-
word.maxtextw = Math.max(textw, tagtextw);
34+
// TODO: redefine maxtextw
35+
// word.maxtextw = Math.max(textw, tagtextw);
6136

6237
// rearrange
6338
word.aboveRect.front();

0 commit comments

Comments
 (0)