Search in sources :

Example 11 with CorefCluster

use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.

the class NeuralCorefDataExporter method process.

@Override
public void process(int id, Document document) {
    JsonArrayBuilder clusters = Json.createArrayBuilder();
    for (CorefCluster gold : document.goldCorefClusters.values()) {
        JsonArrayBuilder c = Json.createArrayBuilder();
        for (Mention m : gold.corefMentions) {
            c.add(m.mentionID);
        }
        clusters.add(c.build());
    }
    goldClusterWriter.println(Json.createObjectBuilder().add(String.valueOf(id), clusters.build()).build());
    Map<Pair<Integer, Integer>, Boolean> mentionPairs = CorefUtils.getLabeledMentionPairs(document);
    List<Mention> mentionsList = CorefUtils.getSortedMentions(document);
    Map<Integer, List<Mention>> mentionsByHeadIndex = new HashMap<>();
    for (int i = 0; i < mentionsList.size(); i++) {
        Mention m = mentionsList.get(i);
        List<Mention> withIndex = mentionsByHeadIndex.get(m.headIndex);
        if (withIndex == null) {
            withIndex = new ArrayList<>();
            mentionsByHeadIndex.put(m.headIndex, withIndex);
        }
        withIndex.add(m);
    }
    JsonObjectBuilder docFeatures = Json.createObjectBuilder();
    docFeatures.add("doc_id", id);
    docFeatures.add("type", document.docType == DocType.ARTICLE ? 1 : 0);
    docFeatures.add("source", document.docInfo.get("DOC_ID").split("/")[0]);
    JsonArrayBuilder sentences = Json.createArrayBuilder();
    for (CoreMap sentence : document.annotation.get(SentencesAnnotation.class)) {
        sentences.add(getSentenceArray(sentence.get(CoreAnnotations.TokensAnnotation.class)));
    }
    JsonObjectBuilder mentions = Json.createObjectBuilder();
    for (Mention m : document.predictedMentionsByID.values()) {
        Iterator<SemanticGraphEdge> iterator = m.enhancedDependency.incomingEdgeIterator(m.headIndexedWord);
        SemanticGraphEdge relation = iterator.hasNext() ? iterator.next() : null;
        String depRelation = relation == null ? "no-parent" : relation.getRelation().toString();
        String depParent = relation == null ? "<missing>" : relation.getSource().word();
        mentions.add(String.valueOf(m.mentionNum), Json.createObjectBuilder().add("doc_id", id).add("mention_id", m.mentionID).add("mention_num", m.mentionNum).add("sent_num", m.sentNum).add("start_index", m.startIndex).add("end_index", m.endIndex).add("head_index", m.headIndex).add("mention_type", m.mentionType.toString()).add("dep_relation", depRelation).add("dep_parent", depParent).add("sentence", getSentenceArray(m.sentenceWords)).add("contained-in-other-mention", mentionsByHeadIndex.get(m.headIndex).stream().anyMatch(m2 -> m != m2 && m.insideIn(m2)) ? 1 : 0).build());
    }
    JsonArrayBuilder featureNames = Json.createArrayBuilder().add("same-speaker").add("antecedent-is-mention-speaker").add("mention-is-antecedent-speaker").add("relaxed-head-match").add("exact-string-match").add("relaxed-string-match");
    JsonObjectBuilder features = Json.createObjectBuilder();
    JsonObjectBuilder labels = Json.createObjectBuilder();
    for (Map.Entry<Pair<Integer, Integer>, Boolean> e : mentionPairs.entrySet()) {
        Mention m1 = document.predictedMentionsByID.get(e.getKey().first);
        Mention m2 = document.predictedMentionsByID.get(e.getKey().second);
        String key = m1.mentionNum + " " + m2.mentionNum;
        JsonArrayBuilder builder = Json.createArrayBuilder();
        for (int val : CategoricalFeatureExtractor.pairwiseFeatures(document, m1, m2, dictionaries, conll)) {
            builder.add(val);
        }
        features.add(key, builder.build());
        labels.add(key, e.getValue() ? 1 : 0);
    }
    JsonObject docData = Json.createObjectBuilder().add("sentences", sentences.build()).add("mentions", mentions.build()).add("labels", labels.build()).add("pair_feature_names", featureNames.build()).add("pair_features", features.build()).add("document_features", docFeatures.build()).build();
    dataWriter.println(docData);
}
Also used : HashMap(java.util.HashMap) JsonObject(javax.json.JsonObject) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge) CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) JsonArrayBuilder(javax.json.JsonArrayBuilder) JsonObjectBuilder(javax.json.JsonObjectBuilder) CoreMap(edu.stanford.nlp.util.CoreMap) HashMap(java.util.HashMap) Map(java.util.Map) CoreMap(edu.stanford.nlp.util.CoreMap) Pair(edu.stanford.nlp.util.Pair)

Example 12 with CorefCluster

use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.

the class MetadataWriter method process.

@Override
public void process(int id, Document document) {
    // Mention types
    mentionTypes.put(id, document.predictedMentionsByID.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, e -> e.getValue().mentionType.toString())));
    // Gold clusters
    List<List<Integer>> clusters = new ArrayList<>();
    for (CorefCluster c : document.goldCorefClusters.values()) {
        List<Integer> cluster = new ArrayList<>();
        for (Mention m : c.getCorefMentions()) {
            cluster.add(m.mentionID);
        }
        clusters.add(cluster);
    }
    goldClusters.put(id, clusters);
    // Word counting
    if (countWords && mentionPairs.containsKey(id)) {
        Set<Pair<Integer, Integer>> pairs = mentionPairs.get(id).keySet();
        Set<Integer> mentions = new HashSet<>();
        for (Pair<Integer, Integer> pair : pairs) {
            mentions.add(pair.first);
            mentions.add(pair.second);
            Mention m1 = document.predictedMentionsByID.get(pair.first);
            Mention m2 = document.predictedMentionsByID.get(pair.second);
            wordCounts.incrementCount("h_" + m1.headWord.word().toLowerCase() + "_" + m2.headWord.word().toLowerCase());
        }
        Map<Integer, List<CoreLabel>> sentences = new HashMap<>();
        for (int mention : mentions) {
            Mention m = document.predictedMentionsByID.get(mention);
            if (!sentences.containsKey(m.sentNum)) {
                sentences.put(m.sentNum, m.sentenceWords);
            }
        }
        for (List<CoreLabel> sentence : sentences.values()) {
            for (int i = 0; i < sentence.size(); i++) {
                CoreLabel cl = sentence.get(i);
                if (cl == null) {
                    continue;
                }
                String w = cl.word().toLowerCase();
                wordCounts.incrementCount(w);
                if (i > 0) {
                    CoreLabel clp = sentence.get(i - 1);
                    if (clp == null) {
                        continue;
                    }
                    String wp = clp.word().toLowerCase();
                    wordCounts.incrementCount(wp + "_" + w);
                }
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList) List(java.util.List) Pair(edu.stanford.nlp.util.Pair) HashSet(java.util.HashSet)

Example 13 with CorefCluster

use of edu.stanford.nlp.coref.data.CorefCluster in project CoreNLP by stanfordnlp.

the class MentionDetectionEvaluator method process.

@Override
public void process(int id, Document document) {
    for (CorefCluster gold : document.goldCorefClusters.values()) {
        for (Mention m : gold.corefMentions) {
            if (document.predictedMentionsByID.containsKey(m.mentionID)) {
                correctSystemMentions += 1;
            }
            goldMentions += 1;
        }
    }
    systemMentions += document.predictedMentionsByID.size();
    double precision = correctSystemMentions / (double) systemMentions;
    double recall = correctSystemMentions / (double) goldMentions;
    log.info("Precision: " + correctSystemMentions + " / " + systemMentions + " = " + String.format("%.4f", precision));
    log.info("Recall: " + correctSystemMentions + " / " + goldMentions + " = " + String.format("%.4f", recall));
    log.info(String.format("F1: %.4f", 2 * precision * recall / (precision + recall)));
}
Also used : CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention)

Aggregations

CorefCluster (edu.stanford.nlp.coref.data.CorefCluster)13 Mention (edu.stanford.nlp.coref.data.Mention)11 ArrayList (java.util.ArrayList)6 CoreLabel (edu.stanford.nlp.ling.CoreLabel)4 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)3 HashMap (java.util.HashMap)3 CorefChain (edu.stanford.nlp.coref.data.CorefChain)2 Document (edu.stanford.nlp.coref.data.Document)2 SpeakerAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SpeakerAnnotation)2 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)2 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)2 Tree (edu.stanford.nlp.trees.Tree)2 Pair (edu.stanford.nlp.util.Pair)2 HashSet (java.util.HashSet)2 List (java.util.List)2 Map (java.util.Map)2 Dataset (edu.stanford.nlp.classify.Dataset)1 GeneralDataset (edu.stanford.nlp.classify.GeneralDataset)1 Dictionaries (edu.stanford.nlp.coref.data.Dictionaries)1 Animacy (edu.stanford.nlp.coref.data.Dictionaries.Animacy)1