use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testMultiPatternMatcher.
public void testMultiPatternMatcher() throws IOException {
CoreMap doc = createDocument(testText1);
// Test simple sequence
TokenSequencePattern p1 = TokenSequencePattern.compile("/Archbishop/ /of/ /Canterbury/");
p1.setPriority(1);
TokenSequencePattern p2 = TokenSequencePattern.compile("/[a-zA-Z]+/{1,2} /of/ /[a-zA-Z]+/+");
MultiPatternMatcher<CoreMap> m = new MultiPatternMatcher<CoreMap>(p2, p1);
List<SequenceMatchResult<CoreMap>> matched = m.findNonOverlapping(doc.get(CoreAnnotations.TokensAnnotation.class));
assertEquals(4, matched.size());
assertEquals("first Bishop of London", matched.get(0).group());
assertEquals("Archbishop of Canterbury", matched.get(1).group());
assertEquals("a member of the Gregorian mission sent to England to convert the", matched.get(2).group());
assertEquals("as Bishop of London in", matched.get(3).group());
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcherABs.
public void testTokenSequenceMatcherABs() throws IOException {
CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A");
// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile("/A/+ B");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("A A A A A A A B", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("A A B", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("A A A A A A A A A A A B", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("(/A/+ B)+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A B A A B", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A A A A A B", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("( A+ ( /B/+ )? )*");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("A A A A A A A B A A B A", m.group());
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("A", m.group());
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("A A A A A A A A A A A B A A A", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("(/A/+ /B/+ )+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A B A A B", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A A A A A B", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("(/A/+ /C/? /A/* )+");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A C A", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A A A A A A A A A", m.group());
match = m.find();
assertTrue(match);
assertEquals(1, m.groupCount());
assertEquals("A A A", m.group());
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher3.
public void testTokenSequenceMatcher3() throws IOException {
CoreMap doc = createDocument(testText1);
// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.GroupPatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, 2)), getNodePatternExpr("of"), new SequencePattern.GroupPatternExpr(new SequencePattern.RepeatPatternExpr(getSequencePatternExpr("[A-Za-z]+"), 1, 3))));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the Gregorian mission", m.group());
assertEquals("a member", m.group(1));
assertEquals("the Gregorian mission", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London in", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London in", m.group(2));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.GroupPatternExpr(new SequencePattern.RepeatPatternExpr(getNodePatternExpr("[A-Za-z]+"), 2, 2)), getNodePatternExpr("of"), new SequencePattern.GroupPatternExpr(new SequencePattern.RepeatPatternExpr(getNodePatternExpr("[A-Za-z]+"), 1, 3, false))));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method _testTokenSequenceFindsWildcard.
public void _testTokenSequenceFindsWildcard() throws IOException {
CoreMap doc = createDocument("word1 word2");
// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile("[]{2}|[]");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);
// Reverse order
p = TokenSequencePattern.compile("[]|[]{2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);
// Using {1,2}
p = TokenSequencePattern.compile("[]{2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcherNonOverlapping.
public void testTokenSequenceMatcherNonOverlapping() throws IOException {
String text = "DATE1 PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3 DATE2 PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6";
CoreMap doc = createDocument(text);
TokenSequencePattern p = TokenSequencePattern.compile("(/DATE.*/) ((/PROD.*/ /PRICE.*/)+)");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(3, m.groupCount());
assertEquals("DATE1", m.group(1));
assertEquals("PROD1 PRICE1 PROD2 PRICE2 PROD3 PRICE3", m.group(2));
assertEquals("PROD3 PRICE3", m.group(3));
match = m.find();
assertTrue(match);
assertEquals(3, m.groupCount());
assertEquals("DATE2", m.group(1));
assertEquals("PROD4 PRICE4 PROD5 PRICE5 PROD6 PRICE6", m.group(2));
assertEquals("PROD6 PRICE6", m.group(3));
match = m.find();
assertFalse(match);
}
Aggregations