|
186 | 186 | annotationStyle: document.querySelector('button.active').id, |
187 | 187 | setAnnotationStyle: function(type) { |
188 | 188 | this.annotationStyle = type; |
| 189 | + if (type === "POS") { |
| 190 | + wordObjs.forEach(w => w.tag = w.data.syntaxData.tag); |
| 191 | + } |
| 192 | + else { |
| 193 | + wordObjs.forEach(w => w.tag = ''); |
| 194 | + } |
| 195 | + redrawWords(wordObjs); |
189 | 196 | } |
190 | 197 | }; |
191 | 198 | const buttons = document.querySelectorAll('#header button'); |
|
224 | 231 | * - second hierarchy: events--paths | arguments--themes |
225 | 232 | */ |
226 | 233 | function parseData(data) { |
227 | | - console.log(data); |
228 | | - |
229 | | - let datamap = {}; // map Word instances to indices in data |
| 234 | + let wordDataArray = []; |
| 235 | + let wordDataMap = {}; |
| 236 | + let syntaxDataArray = []; |
| 237 | + let mentionDataArray = []; |
230 | 238 |
|
231 | 239 | for (var i in data.documents) { |
232 | | - let doc = data.documents[i]; |
233 | | - |
234 | | - doc.sentences.forEach( function(sentence, j) { |
235 | | - // generate words |
236 | | - sentence.words.forEach( function(word, k) { |
237 | | - let idx = wordObjs.length; |
238 | | - let w = new Word(word, idx); |
239 | | - w.documentId = i; |
240 | | - w.syntaxData = { |
241 | | - tag: sentence.tags[k], |
242 | | - lemma: sentence.lemmas[k], |
243 | | - entity: sentence.entities[k] |
244 | | - } |
245 | | - w.tag = w.syntaxData.tag; |
246 | | - wordObjs.push(w); |
247 | | - datamap[i + '-' + j + '-' + k + '-' + word] = idx; |
248 | | - }) |
249 | | - |
250 | | - // create links from syntax info |
251 | | - let syntaxType = "stanford-collapsed"; /* "stanford-basic */ |
252 | | - sentence.graphs[syntaxType].edges.forEach(function(edge) { |
253 | | - |
254 | | - let sourceWord = sentence.words[edge.source], |
255 | | - destinationWord = sentence.words[edge.destination]; |
256 | | - |
257 | | - let sourceIndex = datamap[i + '-' + j + '-' + edge.source + '-' + sourceWord], |
258 | | - destinationIndex = datamap[i + '-' + j + '-' + edge.destination + '-' + destinationWord]; |
259 | | - |
260 | | - let style; |
261 | | - switch (edge.relation) { |
262 | | - case 'nsubj': |
263 | | - style = styles.gradientLine1; break; |
264 | | - case 'dobj': |
265 | | - style = styles.gradientLine2; break; |
266 | | - default: |
267 | | - style = styles.noneLine; break; |
268 | | - } |
269 | | - |
270 | | - linkObjs.push( new Link( |
271 | | - wordObjs[sourceIndex], |
272 | | - wordObjs[destinationIndex], |
273 | | - sourceIndex < destinationIndex ? 1 : -1, |
274 | | - style, |
275 | | - edge.relation, |
276 | | - texts.linkText |
277 | | - ) ); |
278 | | - }) |
279 | | - }) |
280 | | - } |
281 | | - |
282 | | - // create links from event info |
283 | | - data.mentions.forEach((mention, i) => { |
284 | | - console.log('\tmention triggered by "' + mention.trigger.text + '"'); |
285 | | - console.log('\tevent', mention.trigger.labels[0]); |
286 | | - console.log('\tid', mention.trigger.id); |
287 | | - console.log('\tinterval', mention.tokenInterval.start, mention.tokenInterval.end, '[range of words influenced by event)'); |
288 | | - |
289 | | - console.log('\n\targuments:'); |
290 | | - for (var a in mention.arguments) { |
291 | | - console.log('\t\t' + a); |
292 | | - |
293 | | - function listArguments(arg, count) { |
294 | | - let space = new Array(count).join('\t'); |
295 | | - console.log(space + ' text', arg.text); |
296 | | - console.log(space + ' label', arg.labels[0]); |
297 | | - console.log(space + ' id', arg.id); |
298 | | - if (arg.arguments) { |
299 | | - console.log(space + ' arguments:') |
300 | | - } |
301 | | - for (var i in arg.arguments) { |
302 | | - console.log(space + '\t' + i); |
303 | | - arg.arguments[i].forEach(l => listArguments(l, count+2)); |
304 | | - } |
| 240 | + let doc = data.documents[i]; |
| 241 | + |
| 242 | + doc.sentences.forEach(function(sentence, j) { |
| 243 | + // parse word data |
| 244 | + let sentenceData = sentence.words.map(function(word, k) { |
| 245 | + |
| 246 | + let wordDataObject = { |
| 247 | + text: word, |
| 248 | + documentId: i, |
| 249 | + sentenceId: j, |
| 250 | + locationInSentence: k, |
| 251 | + charLocationInSentence: sentence.startOffsets[k], |
| 252 | + syntaxData: { |
| 253 | + tag: sentence.tags[k] |
305 | 254 | } |
| 255 | + }; |
| 256 | + |
| 257 | + wordDataMap[[i,j,k].join('-')] = wordDataArray.length; |
| 258 | + wordDataArray.push(wordDataObject); |
| 259 | + return wordDataObject |
| 260 | + }); |
| 261 | + |
| 262 | + // create POS links |
| 263 | + sentence.graphs["stanford-collapsed"].edges.forEach(function(edge) { |
| 264 | + syntaxDataArray.push({ |
| 265 | + destination: sentenceData[edge.destination], |
| 266 | + label: edge.relation, |
| 267 | + type: edge.relation, |
| 268 | + source: sentenceData[edge.source] |
| 269 | + }) |
| 270 | + }); |
306 | 271 |
|
307 | | - mention.arguments[a].forEach(l => listArguments(l, 4)); |
308 | | - } |
| 272 | + }) |
| 273 | + } |
309 | 274 |
|
310 | | - console.log('\n\tpaths:'); |
311 | | - for (var p in mention.paths) { |
312 | | - console.log('\t\t' + p); |
313 | | - for (var pathId in mention.paths[p]) { |
314 | | - console.log('\t\t\t id ' + pathId); |
315 | | - mention.paths[p][pathId].forEach(path => { |
316 | | - let words = data.documents[mention.document].sentences[mention.sentence].words; |
317 | | - console.log('\t\t\t\t edge', words[path.source] + ' -> ' + path.relation + ' -> ' + words[path.destination]); |
318 | | - }) |
319 | | - } |
| 275 | + // flatten data.mentions array |
| 276 | + let printMention = function(mention, i) { |
| 277 | + if (mention.arguments) { |
| 278 | + for (var j in mention.arguments) { |
| 279 | + mention.arguments[j] = mention.arguments[j].map(printMention) |
320 | 280 | } |
| 281 | + } |
| 282 | + |
| 283 | + switch (mention.type) { |
| 284 | + case "CorefTextBoundMention": |
| 285 | + // has text(s) only |
| 286 | + let start = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.start].join('-')]; |
| 287 | + let end = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.end].join('-')]; |
| 288 | + |
| 289 | + var link = { |
| 290 | + sourceId: null, |
| 291 | + destinationId: null, |
| 292 | + words: wordDataArray.slice(start, end), |
| 293 | + label: mention.displayLabel, |
| 294 | + id: mention.id, |
| 295 | + type: mention.type |
| 296 | + }; |
| 297 | + mentionDataArray.push(link); |
| 298 | + return link; |
| 299 | + case "CorefRelationMention": |
| 300 | + // has argument(s) |
| 301 | + // hard-coded the property --- need better data to parse this correctly |
| 302 | + |
| 303 | + let keys = Object.keys(mention.arguments); |
| 304 | + if (keys.length != 2 || !mention.arguments.controlled || !mention.arguments.controller) { |
| 305 | + console.log("bad data parse: check CorefRelationMention", mention.arguments); |
| 306 | + } |
| 307 | + var link = { |
| 308 | + sourceId: mention.arguments.controller.map(arg => arg.id), |
| 309 | + destinationId: { |
| 310 | + name: "controlled", |
| 311 | + id: mention.arguments.controlled.map(arg => arg.id) |
| 312 | + }, |
| 313 | + label: mention.displayLabel, |
| 314 | + id: mention.id, |
| 315 | + type: mention.type |
| 316 | + }; |
| 317 | + |
| 318 | + mentionDataArray.push(link); |
| 319 | + return link; |
| 320 | + case "CorefEventMention": |
| 321 | + // has a trigger & argument(s) |
| 322 | + var link = { |
| 323 | + sourceId: [mention.trigger.id], |
| 324 | + destinationId: Object.keys(mention.arguments).map(key => { |
| 325 | + |
| 326 | + return { |
| 327 | + name: key, |
| 328 | + id: mention.arguments[key].map(arg => arg.id) |
| 329 | + } |
| 330 | + |
| 331 | + }), |
| 332 | + label: mention.displayLabel, |
| 333 | + id: mention.id, |
| 334 | + type: mention.type |
| 335 | + }; |
| 336 | + mentionDataArray.push(link); |
| 337 | + return link; |
| 338 | + default: |
| 339 | + console.log("invalid type", mention.type); |
| 340 | + break; |
| 341 | + } |
| 342 | + } |
| 343 | + data.mentions.forEach(printMention); |
| 344 | + |
| 345 | + // done parsing into semi-flat datasets... |
| 346 | + |
| 347 | + console.log(wordDataArray, syntaxDataArray, mentionDataArray); |
| 348 | + |
| 349 | + wordDataArray.forEach(function(word) { |
| 350 | + let idx = wordObjs.length; |
| 351 | + let w = new Word(word.text, idx); |
| 352 | + if (State.annotationStyle == 'POS') { |
| 353 | + w.tag = word.syntaxData.tag |
| 354 | + } |
| 355 | + else { |
| 356 | + w.tag = ''; |
| 357 | + } |
| 358 | + w.data = word; |
| 359 | + wordObjs.push(w); |
321 | 360 | }) |
322 | 361 |
|
| 362 | + linkObjs = []; |
| 363 | + if (State.annotationStyle == 'POS') { |
| 364 | + // syntaxDataArray.forEach |
| 365 | + } |
| 366 | + else { |
| 367 | + mentionDataArray.forEach(function(link) { |
| 368 | + |
| 369 | + }) |
| 370 | + } |
| 371 | + |
323 | 372 |
|
| 373 | + // draw |
324 | 374 | linkObjs.sort(function(a, b) { |
325 | 375 | var d1 = Math.abs(a.s.idx - a.e.idx); |
326 | 376 | var d2 = Math.abs(b.s.idx - b.e.idx); |
327 | 377 |
|
328 | 378 | return d1 - d2; |
329 | 379 | }); |
330 | | - |
331 | 380 | linkObjs.forEach(createLink); |
332 | 381 |
|
333 | | - |
334 | | - // 2. draw words and boxes around words |
335 | 382 | drawWords(wordObjs); |
336 | | - |
337 | | - // 3. draw each of the links |
338 | 383 | drawLinks(linkObjs); |
339 | 384 |
|
340 | 385 | changeSizeOfSVGPanel(window.innerWidth - 16, (rows[rows.length - 1].lineBottom.y() ) + 1); |
|
0 commit comments