use of edu.stanford.nlp.coref.statistical.CompressedFeatureVector in project CoreNLP by stanfordnlp.
the class FastNeuralCorefDataExporter method process.
@Override
public void process(int id, Document document) {
JsonArrayBuilder clusters = Json.createArrayBuilder();
for (CorefCluster gold : document.goldCorefClusters.values()) {
JsonArrayBuilder c = Json.createArrayBuilder();
for (Mention m : gold.corefMentions) {
c.add(m.mentionID);
}
clusters.add(c.build());
}
goldClusterWriter.println(Json.createObjectBuilder().add(String.valueOf(id), clusters.build()).build());
Map<Pair<Integer, Integer>, Boolean> allPairs = CorefUtils.getLabeledMentionPairs(document);
Map<Pair<Integer, Integer>, Boolean> pairs = new HashMap<>();
for (Map.Entry<Integer, List<Integer>> e : CorefUtils.heuristicFilter(CorefUtils.getSortedMentions(document), maxMentionDistance, maxMentionDistanceWithStringMatch).entrySet()) {
for (int m1 : e.getValue()) {
Pair<Integer, Integer> pair = new Pair<Integer, Integer>(m1, e.getKey());
pairs.put(pair, allPairs.get(pair));
}
}
JsonArrayBuilder sentences = Json.createArrayBuilder();
for (CoreMap sentence : document.annotation.get(SentencesAnnotation.class)) {
sentences.add(getSentenceArray(sentence.get(CoreAnnotations.TokensAnnotation.class)));
}
JsonObjectBuilder mentions = Json.createObjectBuilder();
for (Mention m : document.predictedMentionsByID.values()) {
Iterator<SemanticGraphEdge> iterator = m.enhancedDependency.incomingEdgeIterator(m.headIndexedWord);
SemanticGraphEdge relation = iterator.hasNext() ? iterator.next() : null;
String depRelation = relation == null ? "no-parent" : relation.getRelation().toString();
String depParent = relation == null ? "<missing>" : relation.getSource().word();
mentions.add(String.valueOf(m.mentionNum), Json.createObjectBuilder().add("doc_id", id).add("mention_id", m.mentionID).add("mention_num", m.mentionNum).add("sent_num", m.sentNum).add("start_index", m.startIndex).add("end_index", m.endIndex).add("head_index", m.headIndex).add("mention_type", m.mentionType.toString()).add("dep_relation", depRelation).add("dep_parent", depParent).add("sentence", getSentenceArray(m.sentenceWords)).build());
}
DocumentExamples examples = extractor.extract(0, document, pairs, compressor);
JsonObjectBuilder mentionFeatures = Json.createObjectBuilder();
for (Map.Entry<Integer, CompressedFeatureVector> e : examples.mentionFeatures.entrySet()) {
JsonObjectBuilder features = Json.createObjectBuilder();
for (int i = 0; i < e.getValue().keys.size(); i++) {
features.add(String.valueOf(e.getValue().keys.get(i)), e.getValue().values.get(i));
}
mentionFeatures.add(String.valueOf(e.getKey()), features);
}
JsonObjectBuilder mentionPairs = Json.createObjectBuilder();
for (Example e : examples.examples) {
JsonObjectBuilder example = Json.createObjectBuilder().add("mid1", e.mentionId1).add("mid2", e.mentionId2);
JsonObjectBuilder features = Json.createObjectBuilder();
for (int i = 0; i < e.pairwiseFeatures.keys.size(); i++) {
features.add(String.valueOf(e.pairwiseFeatures.keys.get(i)), e.pairwiseFeatures.values.get(i));
}
example.add("label", (int) (e.label));
example.add("features", features);
mentionPairs.add(String.valueOf(e.mentionId1) + " " + String.valueOf(e.mentionId2), example);
}
JsonObject docData = Json.createObjectBuilder().add("sentences", sentences.build()).add("mentions", mentions.build()).add("pairs", mentionPairs.build()).add("mention_features", mentionFeatures.build()).build();
dataWriter.println(docData);
System.out.println("Writing " + dataWriter.toString());
}
Aggregations