use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcherNonOverlapping.
public void testTokenSequenceMatcherNonOverlapping() throws IOException {
String text = "DATE1 PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3 DATE2 PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6";
CoreMap doc = createDocument(text);
TokenSequencePattern p = TokenSequencePattern.compile("(/DATE.*/) ((/PROD.*/ /PRICE.*/)+)");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(3, m.groupCount());
assertEquals("DATE1", m.group(1));
assertEquals("PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3", m.group(2));
assertEquals("PROD3 PRICE3", m.group(3));
match = m.find();
assertTrue(match);
assertEquals(3, m.groupCount());
assertEquals("DATE2", m.group(1));
assertEquals("PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6", m.group(2));
assertEquals("PROD6 PRICE6", m.group(3));
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcherNumber.
public void testTokenSequenceMatcherNumber() throws IOException {
CoreMap doc = createDocument("It happened on January 3, 2002");
// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile("[ { word::IS_NUM } ]+");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("3", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word>=2002 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word>2002 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
// Check no {} with or
p = TokenSequencePattern.compile("[ word > 2002 | word==2002 ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
// Check no {} with and
p = TokenSequencePattern.compile("[ word>2002 & word==2002 ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word>2000 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word<=2002 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("3", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word<2002 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("3", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { word==2002 } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { ner:DATE } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { ner::NOT_NIL } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("January 3, 2002", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ { ner::IS_NIL } ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("It happened on", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("[ {{ word=~/2002/ }} ]+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("2002", m.group());
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testCaseInsensitive1.
// // This does not work!!!
// public void testNoBindingCompile(){
// Env env = TokenSequencePattern.getNewEnv();
// String s = "[" + CoreAnnotations.TextAnnotation.class.getName()+":\"name\"]{1,2}";
// TokenSequencePattern p = TokenSequencePattern.compile(env, s);
// }
public void testCaseInsensitive1() {
Env env = TokenSequencePattern.getNewEnv();
env.setDefaultStringPatternFlags(Pattern.CASE_INSENSITIVE);
env.setDefaultStringMatchFlags(NodePattern.CASE_INSENSITIVE);
String s = "for /President/";
CoreMap doc = createDocument("for president");
TokenSequencePattern p = TokenSequencePattern.compile(env, s);
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher9.
public void testTokenSequenceMatcher9() throws IOException {
CoreMap doc = createDocument(testText1);
// Test sequence with groups
// TokenSequencePattern p = TokenSequencePattern.compile( "(?$contextprev /.*/) (?$treat [{{treat}} & /.*/]) (?$contextnext [/.*/])");
TokenSequencePattern p = TokenSequencePattern.compile("(?$contextprev /.*/) (?$test [{tag:NNP} & /.*/]) (?$contextnext [/.*/])");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(3, m.groupCount());
assertEquals("first Bishop of", m.group());
assertEquals("first", m.group(1));
assertEquals("Bishop", m.group(2));
assertEquals("of", m.group(3));
assertEquals("first", m.group("$contextprev"));
assertEquals("Bishop", m.group("$test"));
assertEquals("of", m.group("$contextnext"));
assertEquals("first", m.group(" $contextprev"));
assertEquals("Bishop", m.group("$test "));
assertEquals(null, m.group("$contex tnext"));
assertEquals(3, m.start("$contextprev"));
assertEquals(4, m.end("$contextprev"));
assertEquals(4, m.start("$test"));
assertEquals(5, m.end("$test"));
assertEquals(5, m.start("$contextnext"));
assertEquals(6, m.end("$contextnext"));
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher4.
public void testTokenSequenceMatcher4() throws IOException {
CoreMap doc = createDocument(testText1);
// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, -1));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Mellitus was the first Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("the third Archbishop of Canterbury", m.group());
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 0, -1), getSequencePatternExpr("Mellitus", "was")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Mellitus was", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, -1), getSequencePatternExpr("Mellitus", "was")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
}
Aggregations