use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.
the class NeuralCorefDataExporter method process.
@Override
public void process(int id, Document document) {
JsonArrayBuilder clusters = Json.createArrayBuilder();
for (CorefCluster gold : document.goldCorefClusters.values()) {
JsonArrayBuilder c = Json.createArrayBuilder();
for (Mention m : gold.corefMentions) {
c.add(m.mentionID);
}
clusters.add(c.build());
}
goldClusterWriter.println(Json.createObjectBuilder().add(String.valueOf(id), clusters.build()).build());
Map<Pair<Integer, Integer>, Boolean> mentionPairs = CorefUtils.getLabeledMentionPairs(document);
List<Mention> mentionsList = CorefUtils.getSortedMentions(document);
Map<Integer, List<Mention>> mentionsByHeadIndex = new HashMap<>();
for (int i = 0; i < mentionsList.size(); i++) {
Mention m = mentionsList.get(i);
List<Mention> withIndex = mentionsByHeadIndex.get(m.headIndex);
if (withIndex == null) {
withIndex = new ArrayList<>();
mentionsByHeadIndex.put(m.headIndex, withIndex);
}
withIndex.add(m);
}
JsonObjectBuilder docFeatures = Json.createObjectBuilder();
docFeatures.add("doc_id", id);
docFeatures.add("type", document.docType == DocType.ARTICLE ? 1 : 0);
docFeatures.add("source", document.docInfo.get("DOC_ID").split("/")[0]);
JsonArrayBuilder sentences = Json.createArrayBuilder();
for (CoreMap sentence : document.annotation.get(SentencesAnnotation.class)) {
sentences.add(getSentenceArray(sentence.get(CoreAnnotations.TokensAnnotation.class)));
}
JsonObjectBuilder mentions = Json.createObjectBuilder();
for (Mention m : document.predictedMentionsByID.values()) {
Iterator<SemanticGraphEdge> iterator = m.enhancedDependency.incomingEdgeIterator(m.headIndexedWord);
SemanticGraphEdge relation = iterator.hasNext() ? iterator.next() : null;
String depRelation = relation == null ? "no-parent" : relation.getRelation().toString();
String depParent = relation == null ? "<missing>" : relation.getSource().word();
mentions.add(String.valueOf(m.mentionNum), Json.createObjectBuilder().add("doc_id", id).add("mention_id", m.mentionID).add("mention_num", m.mentionNum).add("sent_num", m.sentNum).add("start_index", m.startIndex).add("end_index", m.endIndex).add("head_index", m.headIndex).add("mention_type", m.mentionType.toString()).add("dep_relation", depRelation).add("dep_parent", depParent).add("sentence", getSentenceArray(m.sentenceWords)).add("contained-in-other-mention", mentionsByHeadIndex.get(m.headIndex).stream().anyMatch(m2 -> m != m2 && m.insideIn(m2)) ? 1 : 0).build());
}
JsonArrayBuilder featureNames = Json.createArrayBuilder().add("same-speaker").add("antecedent-is-mention-speaker").add("mention-is-antecedent-speaker").add("relaxed-head-match").add("exact-string-match").add("relaxed-string-match");
JsonObjectBuilder features = Json.createObjectBuilder();
JsonObjectBuilder labels = Json.createObjectBuilder();
for (Map.Entry<Pair<Integer, Integer>, Boolean> e : mentionPairs.entrySet()) {
Mention m1 = document.predictedMentionsByID.get(e.getKey().first);
Mention m2 = document.predictedMentionsByID.get(e.getKey().second);
String key = m1.mentionNum + " " + m2.mentionNum;
JsonArrayBuilder builder = Json.createArrayBuilder();
for (int val : CategoricalFeatureExtractor.pairwiseFeatures(document, m1, m2, dictionaries, conll)) {
builder.add(val);
}
features.add(key, builder.build());
labels.add(key, e.getValue() ? 1 : 0);
}
JsonObject docData = Json.createObjectBuilder().add("sentences", sentences.build()).add("mentions", mentions.build()).add("labels", labels.build()).add("pair_feature_names", featureNames.build()).add("pair_features", features.build()).add("document_features", docFeatures.build()).build();
dataWriter.println(docData);
}
use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.
the class MetadataWriter method process.
@Override
public void process(int id, Document document) {
// Mention types
mentionTypes.put(id, document.predictedMentionsByID.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().mentionType.toString())));
// Gold clusters
List<List<Integer>> clusters = new ArrayList<>();
for (CorefCluster c : document.goldCorefClusters.values()) {
List<Integer> cluster = new ArrayList<>();
for (Mention m : c.getCorefMentions()) {
cluster.add(m.mentionID);
}
clusters.add(cluster);
}
goldClusters.put(id, clusters);
// Word counting
if (countWords && mentionPairs.containsKey(id)) {
Set<Pair<Integer, Integer>> pairs = mentionPairs.get(id).keySet();
Set<Integer> mentions = new HashSet<>();
for (Pair<Integer, Integer> pair : pairs) {
mentions.add(pair.first);
mentions.add(pair.second);
Mention m1 = document.predictedMentionsByID.get(pair.first);
Mention m2 = document.predictedMentionsByID.get(pair.second);
wordCounts.incrementCount("h_" + m1.headWord.word().toLowerCase() + "_" + m2.headWord.word().toLowerCase());
}
Map<Integer, List<CoreLabel>> sentences = new HashMap<>();
for (int mention : mentions) {
Mention m = document.predictedMentionsByID.get(mention);
if (!sentences.containsKey(m.sentNum)) {
sentences.put(m.sentNum, m.sentenceWords);
}
}
for (List<CoreLabel> sentence : sentences.values()) {
for (int i = 0; i < sentence.size(); i++) {
CoreLabel cl = sentence.get(i);
if (cl == null) {
continue;
}
String w = cl.word().toLowerCase();
wordCounts.incrementCount(w);
if (i > 0) {
CoreLabel clp = sentence.get(i - 1);
if (clp == null) {
continue;
}
String wp = clp.word().toLowerCase();
wordCounts.incrementCount(wp + "_" + w);
}
}
}
}
}
use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.
the class MentionDetectionEvaluator method process.
@Override
public void process(int id, Document document) {
for (CorefCluster gold : document.goldCorefClusters.values()) {
for (Mention m : gold.corefMentions) {
if (document.predictedMentionsByID.containsKey(m.mentionID)) {
correctSystemMentions += 1;
}
goldMentions += 1;
}
}
systemMentions += document.predictedMentionsByID.size();
double precision = correctSystemMentions / (double) systemMentions;
double recall = correctSystemMentions / (double) goldMentions;
log.info("Precision: " + correctSystemMentions + " / " + systemMentions + " = " + String.format("%.4f", precision));
log.info("Recall: " + correctSystemMentions + " / " + goldMentions + " = " + String.format("%.4f", recall));
log.info(String.format("F1: %.4f", 2 * precision * recall / (precision + recall)));
}
Aggregations