use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class SurfacePatternFactory method getPatternsAroundTokens.
public static Map<Integer, Set> getPatternsAroundTokens(DataInstance sent, Set<CandidatePhrase> stopWords) {
Map<Integer, Set> p = new HashMap<>();
List<CoreLabel> tokens = sent.getTokens();
for (int i = 0; i < tokens.size(); i++) {
// p.put(
// i,
// new Triple<Set<Integer>, Set<Integer>, Set<Integer>>(
// new HashSet<Integer>(), new HashSet<Integer>(),
// new HashSet<Integer>()));
p.put(i, new HashSet<SurfacePattern>());
CoreLabel token = tokens.get(i);
// do not create patterns around stop words!
if (PatternFactory.doNotUse(token.word(), stopWords)) {
continue;
}
Set<SurfacePattern> pat = getContext(sent.getTokens(), i, stopWords);
p.put(i, pat);
}
return p;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Tree method percolateHeadAnnotations.
/**
* Finds the head words of each tree and assigns
* HeadWordLabelAnnotation on each node pointing to the correct
* CoreLabel. This relies on the nodes being CoreLabels, so it
* throws an IllegalArgumentException if this is ever not true.
*/
public void percolateHeadAnnotations(HeadFinder hf) {
if (!(label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Expected CoreLabels in the trees");
}
CoreLabel nodeLabel = (CoreLabel) label();
if (isLeaf()) {
return;
}
if (isPreTerminal()) {
nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) children()[0].label());
nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, nodeLabel);
return;
}
for (Tree kid : children()) {
kid.percolateHeadAnnotations(hf);
}
final Tree head = hf.determineHead(this);
if (head == null) {
throw new NullPointerException("HeadFinder " + hf + " returned null for " + this);
} else if (head.isLeaf()) {
nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.label());
nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.parent(this).label());
} else if (head.isPreTerminal()) {
nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, (CoreLabel) head.children()[0].label());
nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, (CoreLabel) head.label());
} else {
if (!(head.label() instanceof CoreLabel)) {
throw new AssertionError("Horrible bug");
}
CoreLabel headLabel = (CoreLabel) head.label();
nodeLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
nodeLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
}
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class Tree method yieldHasWord.
@SuppressWarnings("unchecked")
public <X extends HasWord> ArrayList<X> yieldHasWord(ArrayList<X> y) {
if (isLeaf()) {
Label lab = label();
// LabeledScoredTreeFactory but passes in a StringLabel to e.g. newLeaf().
if (lab instanceof HasWord) {
if (lab instanceof CoreLabel) {
CoreLabel cl = (CoreLabel) lab;
if (cl.word() == null)
cl.setWord(cl.value());
y.add((X) cl);
} else {
y.add((X) lab);
}
} else {
y.add((X) new Word(lab));
}
} else {
Tree[] kids = children();
for (Tree kid : kids) {
kid.yield(y);
}
}
return y;
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class StanfordCoreNLPITest method test.
public void test() throws Exception {
// create a properties that enables all the annotators
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos,lemma,ner,parse");
// run an annotation through the pipeline
String text = "Dan Ramage is working for\nMicrosoft. He's in Seattle! \n";
Annotation document = new Annotation(text);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(document);
// check that tokens are present
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(tokens);
Assert.assertEquals(12, tokens.size());
// check that sentences are present
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Assert.assertNotNull(sentences);
Assert.assertEquals(2, sentences.size());
// check that pos, lemma and ner and parses are present
for (CoreMap sentence : sentences) {
List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(sentenceTokens);
for (CoreLabel token : sentenceTokens) {
Assert.assertNotNull(token.get(CoreAnnotations.PartOfSpeechAnnotation.class));
Assert.assertNotNull(token.get(CoreAnnotations.LemmaAnnotation.class));
Assert.assertNotNull(token.get(CoreAnnotations.NamedEntityTagAnnotation.class));
}
// check for parse tree
Assert.assertNotNull(sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
// check that dependency graph Labels have word()
SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
for (IndexedWord vertex : deps.vertexSet()) {
Assert.assertNotNull(vertex.word());
Assert.assertEquals(vertex.word(), vertex.value());
}
}
// test pretty print
StringWriter stringWriter = new StringWriter();
pipeline.prettyPrint(document, new PrintWriter(stringWriter));
String result = stringWriter.getBuffer().toString();
Assert.assertTrue("Tokens are wrong in " + result, StringUtils.find(result, "\\[Text=Dan .*PartOfSpeech=NNP Lemma=Dan NamedEntityTag=PERSON\\]"));
Assert.assertTrue("Parses are wrong in " + result, result.contains("(NP (PRP He))"));
Assert.assertTrue("Parses are wrong in " + result, result.contains("(VP (VBZ 's)"));
Assert.assertTrue("Sentence header is wrong in " + result, result.contains("Sentence #1 (7 tokens)"));
Assert.assertTrue("Dependencies are wrong in " + result, result.contains("nsubj(working-4, Ramage-2)"));
// test XML
ByteArrayOutputStream os = new ByteArrayOutputStream();
pipeline.xmlPrint(document, os);
result = new String(os.toByteArray(), "UTF-8");
Assert.assertTrue("XML header is wrong in " + result, result.startsWith("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"));
Assert.assertTrue("XML root is wrong in " + result, result.contains("<?xml-stylesheet href=\"CoreNLP-to-HTML.xsl\" type=\"text/xsl\"?>"));
Assert.assertTrue("XML word info is wrong in " + result, StringUtils.find(result, "<token id=\"2\">\\s*" + "<word>Ramage</word>\\s*" + "<lemma>Ramage</lemma>\\s*" + "<CharacterOffsetBegin>4</CharacterOffsetBegin>\\s*" + "<CharacterOffsetEnd>10</CharacterOffsetEnd>\\s*" + "<POS>NNP</POS>\\s*" + "<NER>PERSON</NER>"));
Assert.assertTrue("XML dependencies are wrong in " + result, StringUtils.find(result, "<dep type=\"compound\">\\s*<governor idx=\"2\">" + "Ramage</governor>\\s*<dependent idx=\"1\">Dan</dependent>\\s*</dep>"));
}
use of edu.stanford.nlp.ling.CoreLabel in project CoreNLP by stanfordnlp.
the class StanfordCoreNLPITest method testSentenceNewlinesThree.
public void testSentenceNewlinesThree() {
// create a properties that enables all the annotators
Properties props = new Properties();
props.setProperty("annotators", "tokenize,ssplit,pos");
// run an annotation through the pipeline
String text = "At least a few female committee members\nare from Scandinavia.\n";
Annotation document = new Annotation(text);
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
pipeline.annotate(document);
// check that tokens are present
List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(tokens);
Assert.assertEquals("Wrong number of tokens: " + tokens, 11, tokens.size());
// check that sentences are present
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
Assert.assertNotNull(sentences);
Assert.assertEquals("Wrong number of sentences", 1, sentences.size());
CoreMap firstSentence = sentences.get(0);
List<CoreLabel> sentTokens = firstSentence.get(CoreAnnotations.TokensAnnotation.class);
Assert.assertNotNull(sentTokens);
Assert.assertEquals("Wrong number of sentTokens: " + sentTokens, 11, sentTokens.size());
}
Aggregations