Search in sources :

Example 41 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class TimexTreeAnnotator method beginOffset.

private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
    CoreMap label = (CoreMap) tree.label();
    int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
    return beginOffset(tokens.get(beginToken));
}
Also used : TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap)

Example 42 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class SUTimeITest method testSUTimeDateTime.

public void testSUTimeDateTime() throws IOException {
    // Set up test text
    String testText = "The vase of 14 fell early Friday evening.\n" + "The bus is not coming until 8:00 pm.\n" + "They were to have lunch at 12:15 on Thursday.\n" + "Or was it quarter to twelve on Wed?\n" + "He got home at twelve o'clock midnight.\n" + "Next Tuesday is Tuesday the 18th.\n" + "It happened early yesterday morning.\n" + "It happened late afternoon.\n" + "It happened late this afternoon.\n" + "It happened at 1800 hours.\n" + "The early nineteen fifties.\n" + "The story broke in the last week of October.\n" + "It was 7pm and then 7:20pm.";
    // set up expected results
    Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"XXXX-WXX-5TEV\" type=\"TIME\" mod=\"EARLY\">early Friday evening</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"T20:00\" type=\"TIME\">8:00 pm</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"XXXX-WXX-4T12:15\" type=\"TIME\">12:15 on Thursday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"XXXX-WXX-3T11:45\" type=\"TIME\">quarter to twelve on Wed</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"T00:00\" type=\"TIME\">twelve o'clock midnight</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" alt_value=\"XXXX-WXX-2 OFFSET P1W\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t7\">Next Tuesday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"XXXX-WXX-2\" type=\"DATE\">Tuesday the 18th</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t9\" alt_value=\"OFFSET P-1D INTERSECT MO\" type=\"DATE\" mod=\"EARLY\" temporalFunction=\"true\" valueFromFunction=\"tf1\" anchorTimeID=\"t10\">early yesterday morning</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"TAF\" type=\"TIME\" mod=\"LATE\">late afternoon</TIMEX3>"), // TODO: time
    Timex.fromXml("<TIMEX3 tid=\"t12\" alt_value=\"THIS AF\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf2\" anchorTimeID=\"t0\">late this afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"T18:00\" type=\"TIME\">1800 hours</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"), // TODO: the period should be dropped
    Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>")).iterator();
    Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2005-08-12TEV\" type=\"TIME\" mod=\"EARLY\">early Friday evening</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2005-08-12T20:00\" type=\"TIME\">8:00 pm</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2005-08-11T12:15\" type=\"TIME\">12:15 on Thursday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"2005-08-10T11:45\" type=\"TIME\">quarter to twelve on Wed</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"2005-08-12T00:00\" type=\"TIME\">twelve o'clock midnight</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"2005-08-16\" type=\"DATE\">Next Tuesday</TIMEX3>"), // TODO: Tuesday, the 18th  flag inconsistency (18th is thursday)
    Timex.fromXml("<TIMEX3 tid=\"t7\" value=\"2005-08-18\" type=\"DATE\">Tuesday the 18th</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"2005-08-11TMO\" type=\"TIME\" mod=\"EARLY\">early yesterday morning</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t9\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late this afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"2005-08-12T18:00\" type=\"TIME\">1800 hours</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"), // TODO: Resolve
    Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"), // TODO: the period should be dropped
    Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Time annotate
    TimeAnnotator sutime = getTimeAnnotator();
    sutime.annotate(document);
    // Check answers
    for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexes.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
    Annotation documentWithRefTime = createDocument(testText, "20050812");
    sutime.annotate(documentWithRefTime);
    for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexesResolved.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 43 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class SUTimeITest method testSUTimeDurations.

// TODO: better to have file of test text, and expected results?
public void testSUTimeDurations() throws IOException {
    // Set up test text
    String testText = "It was a 3-year long drought.\n" + "The four-month old baby slept peacefully.\n" + "Over the past twenty four years, the number of crashes has decreased.\n" + // following the crash not included (TODO in perl)
    "In the 2 months following the crash, the investigators checked all the records.\n" + // before leaving not included (TODO in perl)
    "He was preoccupied for ten days before leaving.\n" + "Sales rose for the fifth straight year.\n" + "Business was slow for the third straight month in a row.\n" + "There are no more than 60 days.\n" + "In no more than 20 years, the city completely changed.\n" + "It has been more than 60 days.\n" + "Has it been more than 20 years?\n" + "There was at least sixty days.\n" + "The book was completed in four years.\n" + "That took a decade.\n" + "After a few decades, old memories faded.\n" + "After a few hundred decades, everything changed.\n" + "It has been warm in recent weeks.\n" + "Did it rain on the ninth day consecutively?\n" + "The meeting was two days ago.\n";
    // set up expected results
    Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"P3Y\" type=\"DURATION\">3-year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"P4M\" type=\"DURATION\">four-month old</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"P24Y\" type=\"DURATION\" beginPoint=\"t3\" endPoint=\"t0\">the past twenty four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"P2M\" type=\"DURATION\">the 2 months</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"P10D\" type=\"DURATION\">ten days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"P5Y\" type=\"DURATION\" beginPoint=\"t7\" endPoint=\"t0\">the fifth straight year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"P3M\" type=\"DURATION\" beginPoint=\"t9\" endPoint=\"t0\">the third straight month in a row</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"P20Y\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"P60D\" type=\"DURATION\" mod=\"MORE_THAN\">more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"P20Y\" type=\"DURATION\" mod=\"MORE_THAN\">more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\">at least sixty days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"P4Y\" type=\"DURATION\">four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"P10Y\" type=\"DURATION\">a decade</TIMEX3>"), // TODO: Expect PX0Y?
    Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"PXY\" type=\"DURATION\">a few decades</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t19\" value=\"P1000Y\" type=\"DURATION\">hundred decades</TIMEX3>"), // TODO: Expect PXD?
    Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"PXW\" type=\"DURATION\">recent weeks</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t22\" value=\"P9D\" type=\"DURATION\" beginPoint=\"t21\" endPoint=\"t0\">the ninth day consecutively</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t23\" alt_value=\"OFFSET P-2D\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t0\">two days ago</TIMEX3>")).iterator();
    Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"P3Y\" type=\"DURATION\">3-year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"P4M\" type=\"DURATION\">four-month old</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"P24Y\" type=\"DURATION\" beginPoint=\"t3\" endPoint=\"t0\">the past twenty four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"P2M\" type=\"DURATION\">the 2 months</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"P10D\" type=\"DURATION\">ten days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"P5Y\" type=\"DURATION\" beginPoint=\"t7\" endPoint=\"t0\">the fifth straight year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"P3M\" type=\"DURATION\" beginPoint=\"t9\" endPoint=\"t0\">the third straight month in a row</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"P20Y\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"P60D\" type=\"DURATION\" mod=\"MORE_THAN\">more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"P20Y\" type=\"DURATION\" mod=\"MORE_THAN\">more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\">at least sixty days</TIMEX3>"), //                    Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\" beginPoint=\"t0\" endPoint=\"t19\">at least sixty days</TIMEX3>"),
    Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"P4Y\" type=\"DURATION\">four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"P10Y\" type=\"DURATION\">a decade</TIMEX3>"), // TODO: Expect PX0Y?
    Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"PXY\" type=\"DURATION\">a few decades</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t19\" value=\"P1000Y\" type=\"DURATION\">hundred decades</TIMEX3>"), // TODO: Expect PXD?
    Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"PXW\" type=\"DURATION\">recent weeks</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t22\" value=\"P9D\" type=\"DURATION\" beginPoint=\"t21\" endPoint=\"t0\">the ninth day consecutively</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t23\" value=\"2010-02-15\" type=\"DATE\">two days ago</TIMEX3>")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Time annotate
    TimeAnnotator sutime = getTimeAnnotator();
    sutime.annotate(document);
    // Check answers
    for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexes.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    Annotation documentWithRefTime = createDocument(testText, "20100217");
    sutime.annotate(documentWithRefTime);
    for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexesResolved.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 44 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class SUTimeITest method testSUTimeHolidays.

public void testSUTimeHolidays() throws IOException {
    // Set up test text
    String testText = "When is mother's day 2012?\n" + "When is Christmas 2010?\n" + "When is Easter 2011?\n";
    // set up expected results
    Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2012-05-13\" type=\"DATE\">mother's day 2012</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2010-12-25\" type=\"DATE\">Christmas 2010</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2011-04-24\" type=\"DATE\">Easter 2011</TIMEX3>")).iterator();
    Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2012-05-13\" type=\"DATE\">mother's day 2012</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2010-12-25\" type=\"DATE\">Christmas 2010</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2011-04-24\" type=\"DATE\">Easter 2011</TIMEX3>")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Time annotate
    TimeAnnotator sutime = getTimeAnnotator();
    sutime.annotate(document);
    // Check answers
    for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexes.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
    Annotation documentWithRefTime = createDocument(testText, "20100217");
    sutime.annotate(documentWithRefTime);
    for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexesResolved.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Example 45 with CoreMap

use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.

the class SUTimeITest method testSUTimeInexactTime.

public void testSUTimeInexactTime() throws IOException {
    String testText = "The morning of January 31 was very cold.\n" + "He arrived late last night.\n" + "He arrived last night.\n";
    Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"XXXX-01-31TMO\" type=\"TIME\">The morning of January 31</TIMEX3>"), // TODO: time
    Timex.fromXml("<TIMEX3 tid=\"t2\" alt_value=\"TNI OFFSET P-1D\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t3\">late last night</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" alt_value=\"TNI OFFSET P-1D\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf1\" anchorTimeID=\"t5\">last night</TIMEX3>")).iterator();
    Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2003-01-31TMO\" type=\"TIME\">The morning of January 31</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2003-04-13TNI\" type=\"TIME\" mod=\"LATE\">late last night</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2003-04-13TNI\" type=\"TIME\">last night</TIMEX3>")).iterator();
    // create document
    Annotation document = createDocument(testText);
    // Time annotate
    TimeAnnotator sutime = getTimeAnnotator();
    sutime.annotate(document);
    // Check answers
    for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexes.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
    Annotation documentWithRefTime = createDocument(testText, "20030414");
    sutime.annotate(documentWithRefTime);
    for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
        Timex expectedTimex = expectedTimexesResolved.next();
        checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
    }
    assertFalse(expectedTimexes.hasNext());
}
Also used : CoreMap(edu.stanford.nlp.util.CoreMap)

Aggregations

CoreMap (edu.stanford.nlp.util.CoreMap)251 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)170 CoreLabel (edu.stanford.nlp.ling.CoreLabel)101 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)61 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)53 ArrayList (java.util.ArrayList)52 Annotation (edu.stanford.nlp.pipeline.Annotation)47 Tree (edu.stanford.nlp.trees.Tree)27 Properties (java.util.Properties)22 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)20 List (java.util.List)20 StanfordCoreNLP (edu.stanford.nlp.pipeline.StanfordCoreNLP)19 Mention (edu.stanford.nlp.coref.data.Mention)17 ArrayCoreMap (edu.stanford.nlp.util.ArrayCoreMap)17 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)13 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)12 SentencesAnnotation (edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation)11 MachineReadingAnnotations (edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations)9 IndexedWord (edu.stanford.nlp.ling.IndexedWord)9 IntPair (edu.stanford.nlp.util.IntPair)9