Search in sources :

Example 36 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method extractGoldMentions.

// extract gold mentions (mention span, mention ID, cluster ID)
public List<List<Mention>> extractGoldMentions(CoNLLDocument conllDoc) {
    List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allGoldMentions = new ArrayList<>();
    CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
    for (int i = 0; i < sentences.size(); i++) {
        allGoldMentions.add(new ArrayList<>());
    }
    for (String corefIdStr : corefChainMap.keySet()) {
        int id = Integer.parseInt(corefIdStr);
        for (CoreMap m : corefChainMap.get(corefIdStr)) {
            Mention mention = new Mention();
            mention.goldCorefClusterID = id;
            int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
            CoreMap sent = sentences.get(sentIndex);
            mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);
            allGoldMentions.get(sentIndex).add(mention);
        }
    }
    return allGoldMentions;
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) ArrayList(java.util.ArrayList) List(java.util.List) ArrayList(java.util.ArrayList) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 37 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReader method main.

/** Reads and dumps output, mainly for debugging. */
public static void main(String[] args) throws IOException {
    Properties props = StringUtils.argsToProperties(args);
    boolean debug = false;
    String filepath = props.getProperty("i");
    String outfile = props.getProperty("o");
    if (filepath == null || outfile == null) {
        usage();
        System.exit(-1);
    }
    PrintWriter fout = new PrintWriter(outfile);
    logger.info("Writing to " + outfile);
    String ext = props.getProperty("ext");
    Options options;
    if (ext != null) {
        options = new Options(".*" + ext + "$");
    } else {
        options = new Options();
    }
    options.annotateTreeCoref = true;
    options.annotateTreeNer = true;
    CorpusStats corpusStats = new CorpusStats();
    CoNLLDocumentReader reader = new CoNLLDocumentReader(filepath, options);
    int docCnt = 0;
    int sentCnt = 0;
    int tokenCnt = 0;
    for (CoNLLDocument doc; (doc = reader.getNextDocument()) != null; ) {
        corpusStats.process(doc);
        docCnt++;
        Annotation anno = doc.getAnnotation();
        if (debug)
            System.out.println("Document " + docCnt + ": " + anno.get(CoreAnnotations.DocIDAnnotation.class));
        for (CoreMap sentence : anno.get(CoreAnnotations.SentencesAnnotation.class)) {
            if (debug)
                System.out.println("Parse: " + sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
            if (debug)
                System.out.println("Sentence Tokens: " + StringUtils.join(sentence.get(CoreAnnotations.TokensAnnotation.class), ","));
            writeTabSep(fout, sentence, doc.corefChainMap);
            sentCnt++;
            tokenCnt += sentence.get(CoreAnnotations.TokensAnnotation.class).size();
        }
        if (debug) {
            for (CoreMap ner : doc.nerChunks) {
                System.out.println("NER Chunk: " + ner);
            }
            for (String id : doc.corefChainMap.keySet()) {
                System.out.println("Coref: " + id + " = " + StringUtils.join(doc.corefChainMap.get(id), ";"));
            }
        }
    }
    fout.close();
    System.out.println("Total document count: " + docCnt);
    System.out.println("Total sentence count: " + sentCnt);
    System.out.println("Total token count: " + tokenCnt);
    System.out.println(corpusStats);
}
Also used : Properties(java.util.Properties) SentencesAnnotation(edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap) PrintWriter(java.io.PrintWriter)

Example 38 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method printMentionDetectionLog.

public static String printMentionDetectionLog(Document document) {
    StringBuilder sbLog = new StringBuilder();
    List<CoreMap> sentences = document.annotation.get(SentencesAnnotation.class);
    sbLog.append("\nERROR START-----------------------------------------------------------------------\n");
    for (int i = 0; i < sentences.size(); i++) {
        sbLog.append("\nSENT ").append(i).append(" GOLD   : ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, true, false)).append("\n");
        sbLog.append("SENT ").append(i).append(" PREDICT: ").append(HybridCorefPrinter.sentenceStringWithMention(i, document, false, false)).append("\n");
        for (Mention p : document.predictedMentions.get(i)) {
            sbLog.append("\n");
            if (!p.hasTwin)
                sbLog.append("\tSPURIOUS");
            sbLog.append("\tmention: ").append(p.spanToString()).append("\t\t\theadword: ").append(p.headString).append("\tPOS: ").append(p.headWord.tag()).append("\tmentiontype: ").append(p.mentionType).append("\tnumber: ").append(p.number).append("\tgender: ").append(p.gender).append("\tanimacy: ").append(p.animacy).append("\tperson: ").append(p.person).append("\tNE: ").append(p.nerString);
        }
        sbLog.append("\n");
        for (Mention g : document.goldMentions.get(i)) {
            if (!g.hasTwin) {
                sbLog.append("\tmissed gold: ").append(g.spanToString()).append("\tPOS: ").append(g.headWord.tag()).append("\tmentiontype: ").append(g.mentionType).append("\theadword: ").append(g.headString).append("\tnumber: ").append(g.number).append("\tgender: ").append(g.gender).append("\tanimacy: ").append(g.animacy).append("\tperson: ").append(g.person).append("\tNE: ").append(g.nerString).append("\n");
                if (g.sentenceWords != null)
                    if (g.sentenceWords.size() > g.endIndex)
                        sbLog.append("\tnextword: ").append(g.sentenceWords.get(g.endIndex)).append("\t").append(g.sentenceWords.get(g.endIndex).tag()).append("\n");
                if (g.contextParseTree != null)
                    sbLog.append(g.contextParseTree.pennString()).append("\n\n");
                else
                    sbLog.append("\n\n");
            }
        }
        if (sentences.get(i).get(TreeAnnotation.class) != null)
            sbLog.append("\n\tparse: \n").append(sentences.get(i).get(TreeAnnotation.class).pennString());
        sbLog.append("\n\tcollapsedDependency: \n").append(sentences.get(i).get(BasicDependenciesAnnotation.class));
    }
    sbLog.append("ERROR END -----------------------------------------------------------------------\n");
    return sbLog.toString();
}
Also used : Mention(edu.stanford.nlp.coref.data.Mention) BasicDependenciesAnnotation(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.BasicDependenciesAnnotation) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 39 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class HybridCorefPrinter method sentenceStringWithMention.

public static String sentenceStringWithMention(int i, Document document, boolean gold, boolean printClusterID) {
    StringBuilder sentStr = new StringBuilder();
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldMentions;
    } else {
        allMentions = document.predictedMentions;
    }
    //    String filename = document.annotation.get()
    int previousOffset = 0;
    CoreMap sentence = sentences.get(i);
    List<Mention> mentions = allMentions.get(i);
    List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
    String speaker = t.get(0).get(SpeakerAnnotation.class);
    if (NumberMatchingRegex.isDecimalInteger(speaker))
        speaker = speaker + ": " + document.predictedMentionsByID.get(Integer.parseInt(speaker)).spanToString();
    sentStr.append("\tspeaker: " + speaker + " (" + t.get(0).get(UtteranceAnnotation.class) + ") ");
    String[] tokens = new String[t.size()];
    for (CoreLabel c : t) {
        tokens[c.index() - 1] = c.word();
    }
    //    if(previousOffset+2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && printClusterID) {
    //      sentStr.append("\n");
    //    }
    previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
    Counter<Integer> startCounts = new ClassicCounter<>();
    Counter<Integer> endCounts = new ClassicCounter<>();
    Map<Integer, Deque<Mention>> endMentions = Generics.newHashMap();
    for (Mention m : mentions) {
        //      if(!gold && (document.corefClusters.get(m.corefClusterID)==null || document.corefClusters.get(m.corefClusterID).getCorefMentions().size()<=1)) {
        //        continue;
        //      }
        startCounts.incrementCount(m.startIndex);
        endCounts.incrementCount(m.endIndex);
        if (!endMentions.containsKey(m.endIndex))
            endMentions.put(m.endIndex, new ArrayDeque<>());
        endMentions.get(m.endIndex).push(m);
    }
    for (int j = 0; j < tokens.length; j++) {
        if (endMentions.containsKey(j)) {
            for (Mention m : endMentions.get(j)) {
                int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                id = (printClusterID) ? id : m.mentionID;
                sentStr.append("]_").append(id);
            }
        }
        for (int k = 0; k < startCounts.getCount(j); k++) {
            if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
                sentStr.append(" ");
            sentStr.append("[");
        }
        if (sentStr.length() > 0 && sentStr.charAt(sentStr.length() - 1) != '[')
            sentStr.append(" ");
        sentStr.append(tokens[j]);
    }
    if (endMentions.containsKey(tokens.length)) {
        for (Mention m : endMentions.get(tokens.length)) {
            int id = (gold) ? m.goldCorefClusterID : m.corefClusterID;
            id = (printClusterID) ? id : m.mentionID;
            //append("_").append(m.mentionID);
            sentStr.append("]_").append(id);
        }
    }
    return sentStr.toString();
}
Also used : UtteranceAnnotation(edu.stanford.nlp.ling.CoreAnnotations.UtteranceAnnotation) Deque(java.util.Deque) ArrayDeque(java.util.ArrayDeque) ArrayDeque(java.util.ArrayDeque) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Mention(edu.stanford.nlp.coref.data.Mention) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) ArrayList(java.util.ArrayList) List(java.util.List) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 40 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class CorefMentionFinder method parse.

private Tree parse(List<CoreLabel> tokens, List<ParserConstraint> constraints) {
    CoreMap sent = new Annotation("");
    sent.set(CoreAnnotations.TokensAnnotation.class, tokens);
    sent.set(ParserAnnotations.ConstraintAnnotation.class, constraints);
    Annotation doc = new Annotation("");
    List<CoreMap> sents = new ArrayList<>(1);
    sents.add(sent);
    doc.set(CoreAnnotations.SentencesAnnotation.class, sents);
    getParser().annotate(doc);
    sents = doc.get(CoreAnnotations.SentencesAnnotation.class);
    return sents.get(0).get(TreeCoreAnnotations.TreeAnnotation.class);
}
Also used : ParserAnnotations(edu.stanford.nlp.parser.common.ParserAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) ArrayList(java.util.ArrayList) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TreeAnnotation(edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation) TokensAnnotation(edu.stanford.nlp.ling.CoreAnnotations.TokensAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)253 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)172 CoreLabel (edu.stanford.nlp.ling.CoreLabel)102 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)53 Annotation (edu.stanford.nlp.pipeline.Annotation)49 Tree (edu.stanford.nlp.trees.Tree)28 Properties (java.util.Properties)23 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)20 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9