Search in sources :

Example 61 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class SurfacePatternFactory method getPatternsAroundTokens.

public static Map<Integer, Set> getPatternsAroundTokens(DataInstance sent, Set<CandidatePhrase> stopWords) {
    Map<Integer, Set> p = new HashMap<>();
    List<CoreLabel> tokens = sent.getTokens();
    for (int i = 0; i < tokens.size(); i++) {
        //          p.put(
        //              i,
        //              new Triple<Set<Integer>, Set<Integer>, Set<Integer>>(
        //                  new HashSet<Integer>(), new HashSet<Integer>(),
        //                  new HashSet<Integer>()));
        p.put(i, new HashSet<SurfacePattern>());
        CoreLabel token = tokens.get(i);
        // do not create patterns around stop words!
        if (PatternFactory.doNotUse(token.word(), stopWords)) {
            continue;
        }
        Set<SurfacePattern> pat = getContext(sent.getTokens(), i, stopWords);
        p.put(i, pat);
    }
    return p;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel)

Example 62 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Tree method percolateHeadAnnotations.

/**
   * Finds the head words of each tree and assigns
   * HeadWordLabelAnnotation on each node pointing to the correct
   * CoreLabel.  This relies on the nodes being CoreLabels, so it
   * throws an IllegalArgumentException if this is ever not true.
   */
public void percolateHeadAnnotations(HeadFinder hf) {
    if (!(label() instanceof CoreLabel)) {
        throw new IllegalArgumentException("Expected CoreLabels in the trees");
    }
    CoreLabel nodeLabel = (CoreLabel) label();
    if (isLeaf()) {
        return;
    }
    if (isPreTerminal()) {
        nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) children()[0].label());
        nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, nodeLabel);
        return;
    }
    for (Tree kid : children()) {
        kid.percolateHeadAnnotations(hf);
    }
    final Tree head = hf.determineHead(this);
    if (head == null) {
        throw new NullPointerException("HeadFinder " + hf + " returned null for " + this);
    } else if (head.isLeaf()) {
        nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.label());
        nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.parent(this).label());
    } else if (head.isPreTerminal()) {
        nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.children()[0].label());
        nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.label());
    } else {
        if (!(head.label() instanceof CoreLabel)) {
            throw new AssertionError("Horrible bug");
        }
        CoreLabel headLabel = (CoreLabel) head.label();
        nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
        nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
    }
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel)

Example 63 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class Tree method yieldHasWord.

@SuppressWarnings("unchecked")
public <X extends HasWord> ArrayList<X> yieldHasWord(ArrayList<X> y) {
    if (isLeaf()) {
        Label lab = label();
        // LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf().
        if (lab instanceof HasWord) {
            if (lab instanceof CoreLabel) {
                CoreLabel cl = (CoreLabel) lab;
                if (cl.word() == null)
                    cl.setWord(cl.value());
                y.add((X) cl);
            } else {
                y.add((X) lab);
            }
        } else {
            y.add((X) new Word(lab));
        }
    } else {
        Tree[] kids = children();
        for (Tree kid : kids) {
            kid.yield(y);
        }
    }
    return y;
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) Word(edu.stanford.nlp.ling.Word) LabeledWord(edu.stanford.nlp.ling.LabeledWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label)

Example 64 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class StanfordCoreNLPITest method test.

public void test() throws Exception {
    // create a properties that enables all the annotators
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
    // run an annotation through the pipeline
    String text = "Dan Ramage is working for\nMicrosoft. He's in Seattle! \n";
    Annotation document = new Annotation(text);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);
    // check that tokens are present
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens);
    Assert.assertEquals(12, tokens.size());
    // check that sentences are present
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Assert.assertNotNull(sentences);
    Assert.assertEquals(2, sentences.size());
    // check that pos, lemma and ner and parses are present
    for (CoreMap sentence : sentences) {
        List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
        Assert.assertNotNull(sentenceTokens);
        for (CoreLabel token : sentenceTokens) {
            Assert.assertNotNull(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
            Assert.assertNotNull(token.get(CoreAnnotations.LemmaAnnotation.class));
            Assert.assertNotNull(token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
        }
        // check for parse tree
        Assert.assertNotNull(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
        // check that dependency graph Labels have word()
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            Assert.assertNotNull(vertex.word());
            Assert.assertEquals(vertex.word(), vertex.value());
        }
    }
    // test pretty print
    StringWriter stringWriter = new StringWriter();
    pipeline.prettyPrint(document, new PrintWriter(stringWriter));
    String result = stringWriter.getBuffer().toString();
    Assert.assertTrue("Tokens are wrong in " + result, StringUtils.find(result, "\\[Text=Dan .*PartOfSpeech=NNP Lemma=Dan NamedEntityTag=PERSON\\]"));
    Assert.assertTrue("Parses are wrong in " + result, result.contains("(NP (PRP He))"));
    Assert.assertTrue("Parses are wrong in " + result, result.contains("(VP (VBZ 's)"));
    Assert.assertTrue("Sentence header is wrong in " + result, result.contains("Sentence #1 (7 tokens)"));
    Assert.assertTrue("Dependencies are wrong in " + result, result.contains("nsubj(working-4, Ramage-2)"));
    // test XML
    ByteArrayOutputStream os = new ByteArrayOutputStream();
    pipeline.xmlPrint(document, os);
    result = new String(os.toByteArray(), "UTF-8");
    Assert.assertTrue("XML header is wrong in " + result, result.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
    Assert.assertTrue("XML root is wrong in " + result, result.contains("<?xml-stylesheet href=\"CoreNLP-to-HTML.xsl\" type=\"text/xsl\"?>"));
    Assert.assertTrue("XML word info is wrong in " + result, StringUtils.find(result, "<token id=\"2\">\\s*" + "<word>Ramage</word>\\s*" + "<lemma>Ramage</lemma>\\s*" + "<CharacterOffsetBegin>4</CharacterOffsetBegin>\\s*" + "<CharacterOffsetEnd>10</CharacterOffsetEnd>\\s*" + "<POS>NNP</POS>\\s*" + "<NER>PERSON</NER>"));
    Assert.assertTrue("XML dependencies are wrong in " + result, StringUtils.find(result, "<dep type=\"compound\">\\s*<governor idx=\"2\">" + "Ramage</governor>\\s*<dependent idx=\"1\">Dan</dependent>\\s*</dep>"));
}
Also used : SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Properties(java.util.Properties) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 65 with CoreLabel

use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.

the class StanfordCoreNLPITest method testSentenceNewlinesThree.

public void testSentenceNewlinesThree() {
    // create a properties that enables all the annotators
    Properties props = new Properties();
    props.setProperty("annotators", "tokenize,ssplit,pos");
    // run an annotation through the pipeline
    String text = "At least a few female committee members\nare from Scandinavia.\n";
    Annotation document = new Annotation(text);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);
    // check that tokens are present
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(tokens);
    Assert.assertEquals("Wrong number of tokens: " + tokens, 11, tokens.size());
    // check that sentences are present
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Assert.assertNotNull(sentences);
    Assert.assertEquals("Wrong number of sentences", 1, sentences.size());
    CoreMap firstSentence = sentences.get(0);
    List<CoreLabel> sentTokens = firstSentence.get(CoreAnnotations.TokensAnnotation.class);
    Assert.assertNotNull(sentTokens);
    Assert.assertEquals("Wrong number of sentTokens: " + sentTokens, 11, sentTokens.size());
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) Properties(java.util.Properties) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)533 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)310 CoreMap (edu.stanford.nlp.util.CoreMap)102 ArrayList (java.util.ArrayList)101 Tree (edu.stanford.nlp.trees.Tree)98 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)96 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)63 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)53 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)41 IndexedWord (edu.stanford.nlp.ling.IndexedWord)38 List (java.util.List)33 Annotation (edu.stanford.nlp.pipeline.Annotation)31 Mention (edu.stanford.nlp.coref.data.Mention)29 Label (edu.stanford.nlp.ling.Label)28 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)26 Properties (java.util.Properties)24 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)21 CoreAnnotation (edu.stanford.nlp.ling.CoreAnnotation)19 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)18 StringReader (java.io.StringReader)18