Search in sources :

Example 16 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcherNonOverlapping.

public void testTokenSequenceMatcherNonOverlapping() throws IOException {
    String text = "DATE1 PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3 DATE2 PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6";
    CoreMap doc = createDocument(text);
    TokenSequencePattern p = TokenSequencePattern.compile("(/DATE.*/) ((/PROD.*/ /PRICE.*/)+)");
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(3, m.groupCount());
    assertEquals("DATE1", m.group(1));
    assertEquals("PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3", m.group(2));
    assertEquals("PROD3 PRICE3", m.group(3));
    match = m.find();
    assertTrue(match);
    assertEquals(3, m.groupCount());
    assertEquals("DATE2", m.group(1));
    assertEquals("PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6", m.group(2));
    assertEquals("PROD6 PRICE6", m.group(3));
    match = m.find();
    assertFalse(match);
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 17 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcherNumber.

public void testTokenSequenceMatcherNumber() throws IOException {
    CoreMap doc = createDocument("It happened on January 3, 2002");
    // Test sequence with groups
    TokenSequencePattern p = TokenSequencePattern.compile("[ { word::IS_NUM } ]+");
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("3", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word>=2002 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word>2002 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertFalse(match);
    // Check no {} with or
    p = TokenSequencePattern.compile("[ word > 2002 | word==2002 ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    // Check no {} with and
    p = TokenSequencePattern.compile("[ word>2002 & word==2002 ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word>2000 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word<=2002 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("3", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word<2002 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("3", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { word==2002 } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { ner:DATE } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("January 3, 2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { ner::NOT_NIL } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("January 3, 2002", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ { ner::IS_NIL } ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("It happened on", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("[ {{ word=~/2002/ }} ]+");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("2002", m.group());
    match = m.find();
    assertFalse(match);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 18 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testCaseInsensitive1.

// // This does not work!!!
//  public void testNoBindingCompile(){
//    Env env = TokenSequencePattern.getNewEnv();
//    String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}";
//    TokenSequencePattern p = TokenSequencePattern.compile(env, s);
//  }
public void testCaseInsensitive1() {
    Env env = TokenSequencePattern.getNewEnv();
    env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE);
    env.setDefaultStringMatchFlags(NodePattern.CASE_INSENSITIVE);
    String s = "for /President/";
    CoreMap doc = createDocument("for president");
    TokenSequencePattern p = TokenSequencePattern.compile(env, s);
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 19 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcher9.

public void testTokenSequenceMatcher9() throws IOException {
    CoreMap doc = createDocument(testText1);
    // Test sequence with groups
    //    TokenSequencePattern p = TokenSequencePattern.compile( "(?$contextprev /.*/) (?$treat [{{treat}} & /.*/]) (?$contextnext [/.*/])");
    TokenSequencePattern p = TokenSequencePattern.compile("(?$contextprev /.*/) (?$test [{tag:NNP} & /.*/]) (?$contextnext [/.*/])");
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(3, m.groupCount());
    assertEquals("first Bishop of", m.group());
    assertEquals("first", m.group(1));
    assertEquals("Bishop", m.group(2));
    assertEquals("of", m.group(3));
    assertEquals("first", m.group("$contextprev"));
    assertEquals("Bishop", m.group("$test"));
    assertEquals("of", m.group("$contextnext"));
    assertEquals("first", m.group(" $contextprev"));
    assertEquals("Bishop", m.group("$test "));
    assertEquals(null, m.group("$contex tnext"));
    assertEquals(3, m.start("$contextprev"));
    assertEquals(4, m.end("$contextprev"));
    assertEquals(4, m.start("$test"));
    assertEquals(5, m.end("$test"));
    assertEquals(5, m.start("$contextnext"));
    assertEquals(6, m.end("$contextnext"));
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 20 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcher4.

public void testTokenSequenceMatcher4() throws IOException {
    CoreMap doc = createDocument(testText1);
    // Test sequence with groups
    TokenSequencePattern p = TokenSequencePattern.compile(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, -1));
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Mellitus was the first Bishop of London", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("the third Archbishop of Canterbury", m.group());
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 0, -1), getSequencePatternExpr("Mellitus", "was")));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Mellitus was", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, -1), getSequencePatternExpr("Mellitus", "was")));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertFalse(match);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)251 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)170 CoreLabel (edu.stanford.nlp.ling.CoreLabel)101 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)52 Annotation (edu.stanford.nlp.pipeline.Annotation)47 Tree (edu.stanford.nlp.trees.Tree)27 Properties (java.util.Properties)22 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)19 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9