Skip to content

Commit e235d54

Browse files
committed
log parsed/flattened json data on file input
1 parent dd045e1 commit e235d54

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed

index.html

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
<script src="libs/lodash.js"></script>
7979
<script src="libs/chroma.min.js"></script>
8080

81+
<script src="js/parser.js"></script>
8182
<script src="js/GraphLayout.js"></script>
8283
<script src="js/enums.js"></script>
8384
<script src="js/style.js"></script>

js/parser.js

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
function readJson(path) {
2+
3+
var json = path || './data/angus-ex.json';
4+
5+
function loadJSON(path, callback) {
6+
var httpRequest = new XMLHttpRequest();
7+
httpRequest.onreadystatechange = function() {
8+
if (httpRequest.readyState === 4) {
9+
if (httpRequest.status === 200) {
10+
var data = JSON.parse(httpRequest.responseText);
11+
if (callback) callback(data);
12+
}
13+
}
14+
};
15+
httpRequest.open('GET', path);
16+
httpRequest.send();
17+
}
18+
19+
/* parse json file*/
20+
loadJSON(json, parseData);
21+
}
22+
23+
24+
/** init words and links from json data
25+
* - first hierarchy: documents--sentences--words (+associated syntax tags)
26+
* - edges connect parts of speech
27+
* - second hierarchy: events--paths | arguments--themes
28+
*/
29+
function parseData(data) {
30+
let wordDataArray = [];
31+
let wordDataMap = {};
32+
let syntaxDataArray = [];
33+
let mentionDataArray = [];
34+
35+
for (var i in data.documents) {
36+
let doc = data.documents[i];
37+
38+
doc.sentences.forEach(function(sentence, j) {
39+
// parse word data
40+
let sentenceData = sentence.words.map(function(word, k) {
41+
42+
let wordDataObject = {
43+
text: word,
44+
documentId: i,
45+
sentenceId: j,
46+
locationInSentence: k,
47+
charLocationInSentence: sentence.startOffsets[k],
48+
syntaxData: {
49+
tag: sentence.tags[k]
50+
},
51+
bioData: {
52+
tag: ''
53+
}
54+
};
55+
56+
wordDataMap[[i,j,k].join('-')] = wordDataArray.length;
57+
wordDataArray.push(wordDataObject);
58+
return wordDataObject
59+
});
60+
61+
// create POS links
62+
sentence.graphs["stanford-collapsed"].edges.forEach(function(edge) {
63+
syntaxDataArray.push({
64+
destination: sentenceData[edge.destination],
65+
label: edge.relation,
66+
type: edge.relation,
67+
source: sentenceData[edge.source]
68+
})
69+
});
70+
71+
})
72+
}
73+
74+
// flatten data.mentions array
75+
let printMention = function(mention, i) {
76+
if (mention.arguments) {
77+
for (var j in mention.arguments) {
78+
mention.arguments[j] = mention.arguments[j].map(printMention)
79+
}
80+
}
81+
82+
switch (mention.type) {
83+
case "CorefTextBoundMention":
84+
// has text(s) only
85+
let start = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.start].join('-')];
86+
let end = wordDataMap[[mention.document, mention.sentence, mention.tokenInterval.end].join('-')];
87+
88+
var link = {
89+
sourceId: null,
90+
destinationId: null,
91+
words: wordDataArray.slice(start, end),
92+
label: mention.displayLabel,
93+
id: mention.id,
94+
charOffset: mention.characterStartOffset,
95+
type: mention.type
96+
};
97+
mentionDataArray.push(link);
98+
return link;
99+
case "CorefRelationMention":
100+
// has argument(s)
101+
// hard-coded the property --- need better data to parse this correctly
102+
103+
let keys = Object.keys(mention.arguments);
104+
if (keys.length != 2 || !mention.arguments.controlled || !mention.arguments.controller) {
105+
console.log("bad data parse: check CorefRelationMention", mention.arguments);
106+
}
107+
var link = {
108+
sourceId: mention.arguments.controller.map(arg => arg.id),
109+
destinationId: [{
110+
name: mention.displayLabel,
111+
id: mention.arguments.controlled.map(arg => arg.id)
112+
}],
113+
label: mention.displayLabel,
114+
id: mention.id,
115+
charOffset: mention.characterStartOffset,
116+
type: mention.type
117+
};
118+
119+
mentionDataArray.push(link);
120+
return link;
121+
case "CorefEventMention":
122+
// has a trigger & argument(s)
123+
124+
if (mention.trigger.type == "TextBoundMention") {
125+
let start = wordDataMap[[mention.trigger.document, mention.trigger.sentence, mention.trigger.tokenInterval.start].join('-')];
126+
let end = wordDataMap[[mention.trigger.document, mention.trigger.sentence, mention.trigger.tokenInterval.end].join('-')];
127+
128+
var link = {
129+
sourceId: null,
130+
destinationId: null,
131+
words: wordDataArray.slice(start, end),
132+
label: mention.displayLabel,
133+
id: mention.trigger.id,
134+
charOffset: mention.trigger.characterStartOffset,
135+
type: mention.trigger.type
136+
};
137+
mentionDataArray.push(link);
138+
}
139+
var link = {
140+
sourceId: [mention.trigger.id],
141+
destinationId: Object.keys(mention.arguments).map(key => {
142+
143+
return {
144+
name: key,
145+
charOffset: mention.characterStartOffset,
146+
id: mention.arguments[key].map(arg => arg.id)
147+
}
148+
149+
}),
150+
label: mention.displayLabel,
151+
id: mention.id,
152+
type: mention.type
153+
};
154+
mentionDataArray.push(link);
155+
return link;
156+
default:
157+
console.log("invalid type", mention.type);
158+
break;
159+
}
160+
}
161+
data.mentions.forEach(printMention);
162+
163+
// done parsing into semi-flat datasets...
164+
165+
console.log('words',wordDataArray);
166+
console.log('pos',syntaxDataArray);
167+
console.log('events',mentionDataArray);
168+
}
169+
170+
document.querySelector('input[type=file]').onchange = function() {
171+
var fr = new FileReader();
172+
fr.onload = function(e) {
173+
var object = {};
174+
try {
175+
object = JSON.parse(e.target.result);
176+
}
177+
catch(e) {
178+
console.log("error",e);
179+
}
180+
parseData(object);
181+
}
182+
fr.readAsText(this.files[0]);
183+
}

0 commit comments

Comments
 (0)