use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class NumberNormalizerITest method testOrdinals.
public void testOrdinals() throws IOException {
// Set up test text
String testText = "0th, 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th, 10th\n" + "zeroth, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth\n" + "11th, 12th, 13th, 14th, 15th, 16th, 17th, 18th, 19th, 20th\n" + "Eleventh, twelfth, thirteenth, Fourteenth, fifteenth, Sixteenth, seventeenth, eighteenth, nineteenth, twentieth\n" + "Twenty-first, twenty first, twenty second, twenty third, twenty fourth\n" + "thirtieth, thirty first, thirty-second," + "fortieth, one hundredth, two hundredth, one hundred and fifty first, one hundred fifty first";
// TODO: Fix consistency of number representation
// set up expected results
Iterator<? extends Number> expectedNumbers = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21.0, 21.0, 22.0, 23.0, 24.0, 30, 31.0, 32.0, 40, 100.0, 200.0, 151.0, 151.0).iterator();
Iterator<String> expectedTexts = Arrays.asList(testText.split("\\s*[,\\n]+\\s*")).iterator();
// create document
Annotation document = createDocument(testText);
// Annotate numbers
NumberNormalizer.findAndAnnotateNumericExpressions(document);
// Check answers
for (CoreMap num : document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
Number expectedNumber = expectedNumbers.next();
String expectedType = "ORDINAL";
String expectedText = expectedTexts.next();
String text = document.get(CoreAnnotations.TextAnnotation.class).substring(num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
assertEquals(expectedText, text);
assertEquals("Type for " + expectedText, expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
}
}
assertFalse(expectedNumbers.hasNext());
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testCompile.
//just to test if a pattern is compiling or not
public void testCompile() {
String s = "(?$se \"matching\" \"this\"|\"don't\")";
CoreMap doc = createDocument("does this do matching this");
TokenSequencePattern p = TokenSequencePattern.compile(s);
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
//assertEquals(m.group(), "matching this");
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher1.
public void testTokenSequenceMatcher1() throws IOException {
CoreMap doc = createDocument(testText1);
// Test simple sequence
TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr("Archbishop", "of", "Canterbury"));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals("Archbishop of Canterbury", m.group());
match = m.find();
assertFalse(match);
m.reset();
match = m.find();
assertTrue(match);
assertEquals("Archbishop of Canterbury", m.group());
m.reset();
match = m.matches();
assertFalse(match);
// Test sequence with or
p = TokenSequencePattern.compile(new SequencePattern.OrPatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), getSequencePatternExpr("Bishop", "of", "London")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Bishop of London", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Archbishop", "of", "Canterbury")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Mellitus", "was", "the")));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("Mellitus was the", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), SequencePattern.SEQ_END_PATTERN_EXPR));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("London", "in", "604", "."), SequencePattern.SEQ_END_PATTERN_EXPR));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("London in 604.", m.group());
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher2.
public void testTokenSequenceMatcher2() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr(".*", ".*", "of", ".*"));
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("first Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("a member of the", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("as Bishop of London", m.group());
match = m.find();
assertFalse(match);
// Test sequence with groups
p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*", ".*")), getNodePatternExpr("of"), new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*"))));
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertFalse(match);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TokenSequenceMatcherITest method testTokenSequenceMatcher6.
public void testTokenSequenceMatcher6() throws IOException {
CoreMap doc = createDocument(testText1);
TokenSequencePattern p = TokenSequencePattern.compile("[ /.*/ ] [ /.*/ ] [/of/] [/.*/]");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("first Bishop of London", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("a member of the", m.group());
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("as Bishop of London", m.group());
match = m.find();
assertFalse(match);
p = TokenSequencePattern.compile("([ /.*/ ] [ /.*/ ]) [/of/] ([/.*/])");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("first Bishop of London", m.group());
assertEquals("first Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("third Archbishop of Canterbury", m.group());
assertEquals("third Archbishop", m.group(1));
assertEquals("Canterbury", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("a member of the", m.group());
assertEquals("a member", m.group(1));
assertEquals("the", m.group(2));
match = m.find();
assertTrue(match);
assertEquals(2, m.groupCount());
assertEquals("as Bishop of London", m.group());
assertEquals("as Bishop", m.group(1));
assertEquals("London", m.group(2));
match = m.find();
assertFalse(match);
}
Aggregations