Search in sources :

Example 6 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class NumberNormalizerITest method testOrdinals.

public void testOrdinals() throws IOException {
    // Set up test text
    String testText = "0th, 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th, 10th\n" + "zeroth, first, second, third, fourth, fifth, sixth, seventh, eighth, ninth, tenth\n" + "11th, 12th, 13th, 14th, 15th, 16th, 17th, 18th, 19th, 20th\n" + "Eleventh, twelfth, thirteenth, Fourteenth, fifteenth, Sixteenth, seventeenth, eighteenth, nineteenth, twentieth\n" + "Twenty-first, twenty first, twenty second, twenty third, twenty fourth\n" + "thirtieth, thirty first, thirty-second," + "fortieth, one hundredth, two hundredth, one hundred and fifty first, one hundred fifty first";
    // TODO: Fix consistency of number representation
    // set up expected results
    Iterator<? extends Number> expectedNumbers = Arrays.asList(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21.0, 21.0, 22.0, 23.0, 24.0, 30, 31.0, 32.0, 40, 100.0, 200.0, 151.0, 151.0).iterator();
    Iterator<String> expectedTexts = Arrays.asList(testText.split("\\s*[,\\n]+\\s*")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Annotate numbers
    NumberNormalizer.findAndAnnotateNumericExpressions(document);
    // Check answers
    for (CoreMap num : document.get(CoreAnnotations.NumerizedTokensAnnotation.class)) {
        if (num.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) {
            Number expectedNumber = expectedNumbers.next();
            String expectedType = "ORDINAL";
            String expectedText = expectedTexts.next();
            String text = document.get(CoreAnnotations.TextAnnotation.class).substring(num.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), num.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            assertEquals(expectedText, text);
            assertEquals("Type for " + expectedText, expectedType, num.get(CoreAnnotations.NumericCompositeTypeAnnotation.class));
            assertEquals(expectedNumber.toString(), num.get(CoreAnnotations.NumericCompositeValueAnnotation.class).toString());
        }
    }
    assertFalse(expectedNumbers.hasNext());
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 7 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testCompile.

//just to test if a pattern is compiling or not
public void testCompile() {
    String s = "(?$se \"matching\" \"this\"|\"don't\")";
    CoreMap doc = createDocument("does this do matching this");
    TokenSequencePattern p = TokenSequencePattern.compile(s);
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
//assertEquals(m.group(), "matching this");
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 8 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcher1.

public void testTokenSequenceMatcher1() throws IOException {
    CoreMap doc = createDocument(testText1);
    // Test simple sequence
    TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr("Archbishop", "of", "Canterbury"));
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals("Archbishop of Canterbury", m.group());
    match = m.find();
    assertFalse(match);
    m.reset();
    match = m.find();
    assertTrue(match);
    assertEquals("Archbishop of Canterbury", m.group());
    m.reset();
    match = m.matches();
    assertFalse(match);
    // Test sequence with or
    p = TokenSequencePattern.compile(new SequencePattern.OrPatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), getSequencePatternExpr("Bishop", "of", "London")));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Bishop of London", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Archbishop of Canterbury", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Bishop of London", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Archbishop", "of", "Canterbury")));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(SequencePattern.SEQ_BEGIN_PATTERN_EXPR, getSequencePatternExpr("Mellitus", "was", "the")));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("Mellitus was the", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("Archbishop", "of", "Canterbury"), SequencePattern.SEQ_END_PATTERN_EXPR));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(getSequencePatternExpr("London", "in", "604", "."), SequencePattern.SEQ_END_PATTERN_EXPR));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("London in 604.", m.group());
    match = m.find();
    assertFalse(match);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 9 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcher2.

public void testTokenSequenceMatcher2() throws IOException {
    CoreMap doc = createDocument(testText1);
    TokenSequencePattern p = TokenSequencePattern.compile(getSequencePatternExpr(".*", ".*", "of", ".*"));
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("first Bishop of London", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("third Archbishop of Canterbury", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("a member of the", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("as Bishop of London", m.group());
    match = m.find();
    assertFalse(match);
    // Test sequence with groups
    p = TokenSequencePattern.compile(new SequencePattern.SequencePatternExpr(new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*", ".*")), getNodePatternExpr("of"), new SequencePattern.GroupPatternExpr(getSequencePatternExpr(".*"))));
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("first Bishop of London", m.group());
    assertEquals("first Bishop", m.group(1));
    assertEquals("London", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("third Archbishop of Canterbury", m.group());
    assertEquals("third Archbishop", m.group(1));
    assertEquals("Canterbury", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("a member of the", m.group());
    assertEquals("a member", m.group(1));
    assertEquals("the", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("as Bishop of London", m.group());
    assertEquals("as Bishop", m.group(1));
    assertEquals("London", m.group(2));
    match = m.find();
    assertFalse(match);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 10 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TokenSequenceMatcherITest method testTokenSequenceMatcher6.

public void testTokenSequenceMatcher6() throws IOException {
    CoreMap doc = createDocument(testText1);
    TokenSequencePattern p = TokenSequencePattern.compile("[ /.*/ ] [ /.*/ ] [/of/] [/.*/]");
    TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    boolean match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("first Bishop of London", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("third Archbishop of Canterbury", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("a member of the", m.group());
    match = m.find();
    assertTrue(match);
    assertEquals(0, m.groupCount());
    assertEquals("as Bishop of London", m.group());
    match = m.find();
    assertFalse(match);
    p = TokenSequencePattern.compile("([ /.*/ ] [ /.*/ ]) [/of/] ([/.*/])");
    m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("first Bishop of London", m.group());
    assertEquals("first Bishop", m.group(1));
    assertEquals("London", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("third Archbishop of Canterbury", m.group());
    assertEquals("third Archbishop", m.group(1));
    assertEquals("Canterbury", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("a member of the", m.group());
    assertEquals("a member", m.group(1));
    assertEquals("the", m.group(2));
    match = m.find();
    assertTrue(match);
    assertEquals(2, m.groupCount());
    assertEquals("as Bishop of London", m.group());
    assertEquals("as Bishop", m.group(1));
    assertEquals("London", m.group(2));
    match = m.find();
    assertFalse(match);
}
Also used : CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)251 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)170 CoreLabel (edu.stanford.nlp.ling.CoreLabel)101 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)52 Annotation (edu.stanford.nlp.pipeline.Annotation)47 Tree (edu.stanford.nlp.trees.Tree)27 Properties (java.util.Properties)22 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)19 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9