Search in sources :

Example 31 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class RuleBasedCorefMentionFinder method filterPredictedMentions.

/** When mention boundaries are given */
public List<List<Mention>> filterPredictedMentions(List<List<Mention>> allGoldMentions, Annotation doc, Dictionaries dict, Properties props) {
    List<List<Mention>> predictedMentions = new ArrayList<>();
    for (int i = 0; i < allGoldMentions.size(); i++) {
        CoreMap s = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i);
        List<Mention> goldMentions = allGoldMentions.get(i);
        List<Mention> mentions = new ArrayList<>();
        predictedMentions.add(mentions);
        mentions.addAll(goldMentions);
        findHead(s, mentions);
        // todo [cdm 2013]: This block seems to do nothing - the two sets are never used
        Set<IntPair> mentionSpanSet = Generics.newHashSet();
        Set<IntPair> namedEntitySpanSet = Generics.newHashSet();
        for (Mention m : mentions) {
            mentionSpanSet.add(new IntPair(m.startIndex, m.endIndex));
            if (!m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O")) {
                namedEntitySpanSet.add(new IntPair(m.startIndex, m.endIndex));
            }
        }
        setBarePlural(mentions);
    }
    removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang);
    return predictedMentions;
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap) IntPair(edu.stanford.nlp.util.IntPair)

Example 32 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class SingletonPredictor method generateFeatureVectors.

/**
   * Generate the training features from the CoNLL input file.
   * @return Dataset of feature vectors
   * @throws Exception
   */
private static GeneralDataset<String, String> generateFeatureVectors(Properties props) throws Exception {
    GeneralDataset<String, String> dataset = new Dataset<>();
    Dictionaries dict = new Dictionaries(props);
    DocumentMaker docMaker = new DocumentMaker(props, dict);
    Document document;
    while ((document = docMaker.nextDoc()) != null) {
        setTokenIndices(document);
        Map<Integer, CorefCluster> entities = document.goldCorefClusters;
        // Generate features for coreferent mentions with class label 1
        for (CorefCluster entity : entities.values()) {
            for (Mention mention : entity.getCorefMentions()) {
                // Ignore verbal mentions
                if (mention.headWord.tag().startsWith("V"))
                    continue;
                IndexedWord head = mention.enhancedDependency.getNodeByIndexSafe(mention.headWord.index());
                if (head == null)
                    continue;
                ArrayList<String> feats = mention.getSingletonFeatures(dict);
                dataset.add(new BasicDatum<>(feats, "1"));
            }
        }
        // Generate features for singletons with class label 0
        ArrayList<CoreLabel> gold_heads = new ArrayList<>();
        for (Mention gold_men : document.goldMentionsByID.values()) {
            gold_heads.add(gold_men.headWord);
        }
        for (Mention predicted_men : document.predictedMentionsByID.values()) {
            SemanticGraph dep = predicted_men.enhancedDependency;
            IndexedWord head = dep.getNodeByIndexSafe(predicted_men.headWord.index());
            if (head == null || !dep.vertexSet().contains(head))
                continue;
            // Ignore verbal mentions
            if (predicted_men.headWord.tag().startsWith("V"))
                continue;
            // If the mention is in the gold set, it is not a singleton and thus ignore
            if (gold_heads.contains(predicted_men.headWord))
                continue;
            dataset.add(new BasicDatum<>(predicted_men.getSingletonFeatures(dict), "0"));
        }
    }
    dataset.summaryStatistics();
    return dataset;
}
Also used : Dictionaries(edu.stanford.nlp.coref.data.Dictionaries) GeneralDataset(edu.stanford.nlp.classify.GeneralDataset) Dataset(edu.stanford.nlp.classify.Dataset) ArrayList(java.util.ArrayList) Document(edu.stanford.nlp.coref.data.Document) CoreLabel(edu.stanford.nlp.ling.CoreLabel) DocumentMaker(edu.stanford.nlp.coref.data.DocumentMaker) CorefCluster(edu.stanford.nlp.coref.data.CorefCluster) Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 33 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class CategoricalFeatureExtractor method getPairFeatures.

public SimpleMatrix getPairFeatures(Pair<Integer, Integer> pair, Document document, Map<Integer, List<Mention>> mentionsByHeadIndex) {
    Mention m1 = document.predictedMentionsByID.get(pair.first);
    Mention m2 = document.predictedMentionsByID.get(pair.second);
    List<Integer> featureVals = pairwiseFeatures(document, m1, m2, dictionaries, conll);
    SimpleMatrix features = new SimpleMatrix(featureVals.size(), 1);
    for (int i = 0; i < featureVals.size(); i++) {
        features.set(i, featureVals.get(i));
    }
    features = NeuralUtils.concatenate(features, encodeDistance(m2.sentNum - m1.sentNum), encodeDistance(m2.mentionNum - m1.mentionNum - 1), new SimpleMatrix(new double[][] { { m1.sentNum == m2.sentNum && m1.endIndex > m2.startIndex ? 1 : 0 } }), getMentionFeatures(m1, document, mentionsByHeadIndex), getMentionFeatures(m2, document, mentionsByHeadIndex), encodeGenre(document));
    return features;
}
Also used : SimpleMatrix(org.ejml.simple.SimpleMatrix) Mention(edu.stanford.nlp.coref.data.Mention)

Example 34 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class EmbeddingExtractor method getDocumentEmbedding.

public SimpleMatrix getDocumentEmbedding(Document document) {
    if (!conll) {
        return new SimpleMatrix(staticWordEmbeddings.getEmbeddingSize(), 1);
    }
    List<CoreLabel> words = new ArrayList<>();
    Set<Integer> seenSentences = new HashSet<>();
    for (Mention m : document.predictedMentionsByID.values()) {
        if (!seenSentences.contains(m.sentNum)) {
            seenSentences.add(m.sentNum);
            words.addAll(m.sentenceWords);
        }
    }
    return getAverageEmbedding(words);
}
Also used : SimpleMatrix(org.ejml.simple.SimpleMatrix) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet)

Example 35 with Mention

use of edu.stanford.nlp.coref.data.Mention in project CoreNLP by stanfordnlp.

the class NeuralCorefAlgorithm method runCoref.

@Override
public void runCoref(Document document) {
    List<Mention> sortedMentions = CorefUtils.getSortedMentions(document);
    Map<Integer, List<Mention>> mentionsByHeadIndex = new HashMap<>();
    for (Mention m : sortedMentions) {
        List<Mention> withIndex = mentionsByHeadIndex.get(m.headIndex);
        if (withIndex == null) {
            withIndex = new ArrayList<>();
            mentionsByHeadIndex.put(m.headIndex, withIndex);
        }
        withIndex.add(m);
    }
    SimpleMatrix documentEmbedding = embeddingExtractor.getDocumentEmbedding(document);
    Map<Integer, SimpleMatrix> antecedentEmbeddings = new HashMap<>();
    Map<Integer, SimpleMatrix> anaphorEmbeddings = new HashMap<>();
    Counter<Integer> anaphoricityScores = new ClassicCounter<>();
    for (Mention m : sortedMentions) {
        SimpleMatrix mentionEmbedding = embeddingExtractor.getMentionEmbeddings(m, documentEmbedding);
        antecedentEmbeddings.put(m.mentionID, model.getAntecedentEmbedding(mentionEmbedding));
        anaphorEmbeddings.put(m.mentionID, model.getAnaphorEmbedding(mentionEmbedding));
        anaphoricityScores.incrementCount(m.mentionID, model.getAnaphoricityScore(mentionEmbedding, featureExtractor.getAnaphoricityFeatures(m, document, mentionsByHeadIndex)));
    }
    Map<Integer, List<Integer>> mentionToCandidateAntecedents = CorefUtils.heuristicFilter(sortedMentions, maxMentionDistance, maxMentionDistanceWithStringMatch);
    for (Map.Entry<Integer, List<Integer>> e : mentionToCandidateAntecedents.entrySet()) {
        double bestScore = anaphoricityScores.getCount(e.getKey()) - 50 * (greedyness - 0.5);
        int m = e.getKey();
        Integer antecedent = null;
        for (int ca : e.getValue()) {
            double score = model.getPairwiseScore(antecedentEmbeddings.get(ca), anaphorEmbeddings.get(m), featureExtractor.getPairFeatures(new Pair<>(ca, m), document, mentionsByHeadIndex));
            if (score > bestScore) {
                bestScore = score;
                antecedent = ca;
            }
        }
        if (antecedent != null) {
            CorefUtils.mergeCoreferenceClusters(new Pair<>(antecedent, m), document);
        }
    }
}
Also used : HashMap(java.util.HashMap) SimpleMatrix(org.ejml.simple.SimpleMatrix) Mention(edu.stanford.nlp.coref.data.Mention) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ArrayList(java.util.ArrayList) List(java.util.List) HashMap(java.util.HashMap) Map(java.util.Map) Pair(edu.stanford.nlp.util.Pair)

Aggregations

Mention (edu.stanford.nlp.coref.data.Mention)62 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)27 CoreLabel (edu.stanford.nlp.ling.CoreLabel)27 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)21 ArrayList (java.util.ArrayList)20 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)17 CoreMap (edu.stanford.nlp.util.CoreMap)17 List (java.util.List)15 Tree (edu.stanford.nlp.trees.Tree)14 IntPair (edu.stanford.nlp.util.IntPair)14 CorefCluster (edu.stanford.nlp.coref.data.CorefCluster)12 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)10 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)9 EntityMention (edu.stanford.nlp.ie.machinereading.structure.EntityMention)7 RelationMention (edu.stanford.nlp.ie.machinereading.structure.RelationMention)7 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)7 HashMap (java.util.HashMap)7 HashSet (java.util.HashSet)7 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)6 Map (java.util.Map)6