use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testCompile.
//just to test if a pattern is compiling or not
public void testCompile() {
String s = "(?$se \"matching\" \"this\"|\"don't\")";
CoreMap doc = createDocument("does this do matching this");
TokenSequencePattern p = TokenSequencePattern.compile(s);
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
//assertEquals(m.group(), "matching this");
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher1.
public void testTokenSequenceMatcher1() throws IOException {
CoreMap doc = createDocument(testText1);
// Test simple sequence
TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr("Archbishop", "of", "Canterbury"));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals("Archbishop of Canterbury", m.group());
match = m.find();
assertFalse(match);
m.reset();
match = m.find();
assertTrue(match);
assertEquals("Archbishop of Canterbury", m.group());
m.reset();
match = m.matches();
assertFalse(match);
// Test sequence with or
p = TokenSequencePattern.compile(new SequencePattern.OrPatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), getSequencePatternExpr("Bishop", "of", "London")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Bishop of London", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Archbishop", "of", "Canterbury")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Mellitus", "was", "the")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Mellitus was the", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), SequencePattern.SEQ_END_PATTERN_EXPR));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("London", "in", "604", "."), SequencePattern.SEQ_END_PATTERN_EXPR));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("London in 604.", m.group());
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher2.
public void testTokenSequenceMatcher2() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr(".*", ".*", "of", ".*"));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("first Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("a member of the", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("as Bishop of London", m.group());
match = m.find();
assertFalse(match);
// Test sequence with groups
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*", ".*")), getNodePatternExpr("of"), new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*"))));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher6.
public void testTokenSequenceMatcher6() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile("[ /.*/ ] [ /.*/ ] [/of/] [/.*/]");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("first Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("a member of the", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("as Bishop of London", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("([ /.*/ ] [ /.*/ ]) [/of/] ([/.*/])");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcherAll.
public void testTokenSequenceMatcherAll() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile("(/[A-Za-z]+/{1,2}) /of/ (/[A-Za-z]+/{1,3}?) ");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
m.setFindType(SequenceMatcher.FindType.FIND_ALL);
// Test finding of ALL matching sequences
// NOTE: when using FIND_ALL greedy/reluctant modifiers are not enforced
// perhaps should add syntax where some of them are enforced...
boolean match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("Bishop of London", m.group());
assertEquals("Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("Archbishop of Canterbury", m.group());
assertEquals("Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the Gregorian", m.group());
assertEquals("a member", m.group(1));
assertEquals("the Gregorian", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the Gregorian mission", m.group());
assertEquals("a member", m.group(1));
assertEquals("the Gregorian mission", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("member of the", m.group());
assertEquals("member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("member of the Gregorian", m.group());
assertEquals("member", m.group(1));
assertEquals("the Gregorian", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("member of the Gregorian mission", m.group());
assertEquals("member", m.group(1));
assertEquals("the Gregorian mission", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London in", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London in", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("Bishop of London", m.group());
assertEquals("Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("Bishop of London in", m.group());
assertEquals("Bishop", m.group(1));
assertEquals("London in", m.group(2));
match = m.find();
assertFalse(match);
}
Aggregations