use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class TimexTreeAnnotator method beginOffset.
private static int beginOffset(Tree tree, List<CoreLabel> tokens) {
CoreMap label = (CoreMap) tree.label();
int beginToken = label.get(CoreAnnotations.BeginIndexAnnotation.class);
return beginOffset(tokens.get(beginToken));
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SUTimeITest method testSUTimeDateTime.
public void testSUTimeDateTime() throws IOException {
// Set up test text
String testText = "The vase of 14 fell early Friday evening.\n" + "The bus is not coming until 8:00 pm.\n" + "They were to have lunch at 12:15 on Thursday.\n" + "Or was it quarter to twelve on Wed?\n" + "He got home at twelve o'clock midnight.\n" + "Next Tuesday is Tuesday the 18th.\n" + "It happened early yesterday morning.\n" + "It happened late afternoon.\n" + "It happened late this afternoon.\n" + "It happened at 1800 hours.\n" + "The early nineteen fifties.\n" + "The story broke in the last week of October.\n" + "It was 7pm and then 7:20pm.";
// set up expected results
Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"XXXX-WXX-5TEV\" type=\"TIME\" mod=\"EARLY\">early Friday evening</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"T20:00\" type=\"TIME\">8:00 pm</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"XXXX-WXX-4T12:15\" type=\"TIME\">12:15 on Thursday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"XXXX-WXX-3T11:45\" type=\"TIME\">quarter to twelve on Wed</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"T00:00\" type=\"TIME\">twelve o'clock midnight</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" alt_value=\"XXXX-WXX-2 OFFSET P1W\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t7\">Next Tuesday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"XXXX-WXX-2\" type=\"DATE\">Tuesday the 18th</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t9\" alt_value=\"OFFSET P-1D INTERSECT MO\" type=\"DATE\" mod=\"EARLY\" temporalFunction=\"true\" valueFromFunction=\"tf1\" anchorTimeID=\"t10\">early yesterday morning</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"TAF\" type=\"TIME\" mod=\"LATE\">late afternoon</TIMEX3>"), // TODO: time
Timex.fromXml("<TIMEX3 tid=\"t12\" alt_value=\"THIS AF\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf2\" anchorTimeID=\"t0\">late this afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"T18:00\" type=\"TIME\">1800 hours</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"), // TODO: the period should be dropped
Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>")).iterator();
Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2005-08-12TEV\" type=\"TIME\" mod=\"EARLY\">early Friday evening</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2005-08-12T20:00\" type=\"TIME\">8:00 pm</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2005-08-11T12:15\" type=\"TIME\">12:15 on Thursday</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"2005-08-10T11:45\" type=\"TIME\">quarter to twelve on Wed</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"2005-08-12T00:00\" type=\"TIME\">twelve o'clock midnight</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"2005-08-16\" type=\"DATE\">Next Tuesday</TIMEX3>"), // TODO: Tuesday, the 18th flag inconsistency (18th is thursday)
Timex.fromXml("<TIMEX3 tid=\"t7\" value=\"2005-08-18\" type=\"DATE\">Tuesday the 18th</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"2005-08-11TMO\" type=\"TIME\" mod=\"EARLY\">early yesterday morning</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t9\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late this afternoon</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"2005-08-12T18:00\" type=\"TIME\">1800 hours</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"), // TODO: Resolve
Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"), // TODO: the period should be dropped
Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>")).iterator();
// create document
Annotation document = createDocument(testText);
// Time annotate
TimeAnnotator sutime = getTimeAnnotator();
sutime.annotate(document);
// Check answers
for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexes.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
Annotation documentWithRefTime = createDocument(testText, "20050812");
sutime.annotate(documentWithRefTime);
for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexesResolved.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SUTimeITest method testSUTimeDurations.
// TODO: better to have file of test text, and expected results?
public void testSUTimeDurations() throws IOException {
// Set up test text
String testText = "It was a 3-year long drought.\n" + "The four-month old baby slept peacefully.\n" + "Over the past twenty four years, the number of crashes has decreased.\n" + // following the crash not included (TODO in perl)
"In the 2 months following the crash, the investigators checked all the records.\n" + // before leaving not included (TODO in perl)
"He was preoccupied for ten days before leaving.\n" + "Sales rose for the fifth straight year.\n" + "Business was slow for the third straight month in a row.\n" + "There are no more than 60 days.\n" + "In no more than 20 years, the city completely changed.\n" + "It has been more than 60 days.\n" + "Has it been more than 20 years?\n" + "There was at least sixty days.\n" + "The book was completed in four years.\n" + "That took a decade.\n" + "After a few decades, old memories faded.\n" + "After a few hundred decades, everything changed.\n" + "It has been warm in recent weeks.\n" + "Did it rain on the ninth day consecutively?\n" + "The meeting was two days ago.\n";
// set up expected results
Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"P3Y\" type=\"DURATION\">3-year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"P4M\" type=\"DURATION\">four-month old</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"P24Y\" type=\"DURATION\" beginPoint=\"t3\" endPoint=\"t0\">the past twenty four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"P2M\" type=\"DURATION\">the 2 months</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"P10D\" type=\"DURATION\">ten days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"P5Y\" type=\"DURATION\" beginPoint=\"t7\" endPoint=\"t0\">the fifth straight year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"P3M\" type=\"DURATION\" beginPoint=\"t9\" endPoint=\"t0\">the third straight month in a row</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"P20Y\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"P60D\" type=\"DURATION\" mod=\"MORE_THAN\">more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"P20Y\" type=\"DURATION\" mod=\"MORE_THAN\">more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\">at least sixty days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"P4Y\" type=\"DURATION\">four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"P10Y\" type=\"DURATION\">a decade</TIMEX3>"), // TODO: Expect PX0Y?
Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"PXY\" type=\"DURATION\">a few decades</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t19\" value=\"P1000Y\" type=\"DURATION\">hundred decades</TIMEX3>"), // TODO: Expect PXD?
Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"PXW\" type=\"DURATION\">recent weeks</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t22\" value=\"P9D\" type=\"DURATION\" beginPoint=\"t21\" endPoint=\"t0\">the ninth day consecutively</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t23\" alt_value=\"OFFSET P-2D\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t0\">two days ago</TIMEX3>")).iterator();
Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"P3Y\" type=\"DURATION\">3-year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"P4M\" type=\"DURATION\">four-month old</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" value=\"P24Y\" type=\"DURATION\" beginPoint=\"t3\" endPoint=\"t0\">the past twenty four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t5\" value=\"P2M\" type=\"DURATION\">the 2 months</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t6\" value=\"P10D\" type=\"DURATION\">ten days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t8\" value=\"P5Y\" type=\"DURATION\" beginPoint=\"t7\" endPoint=\"t0\">the fifth straight year</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"P3M\" type=\"DURATION\" beginPoint=\"t9\" endPoint=\"t0\">the third straight month in a row</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"P20Y\" type=\"DURATION\" mod=\"EQUAL_OR_LESS\">no more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"P60D\" type=\"DURATION\" mod=\"MORE_THAN\">more than 60 days</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"P20Y\" type=\"DURATION\" mod=\"MORE_THAN\">more than 20 years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\">at least sixty days</TIMEX3>"), // Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"P60D\" type=\"DURATION\" mod=\"EQUAL_OR_MORE\" beginPoint=\"t0\" endPoint=\"t19\">at least sixty days</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"P4Y\" type=\"DURATION\">four years</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"P10Y\" type=\"DURATION\">a decade</TIMEX3>"), // TODO: Expect PX0Y?
Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"PXY\" type=\"DURATION\">a few decades</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t19\" value=\"P1000Y\" type=\"DURATION\">hundred decades</TIMEX3>"), // TODO: Expect PXD?
Timex.fromXml("<TIMEX3 tid=\"t20\" value=\"PXW\" type=\"DURATION\">recent weeks</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t22\" value=\"P9D\" type=\"DURATION\" beginPoint=\"t21\" endPoint=\"t0\">the ninth day consecutively</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t23\" value=\"2010-02-15\" type=\"DATE\">two days ago</TIMEX3>")).iterator();
// create document
Annotation document = createDocument(testText);
// Time annotate
TimeAnnotator sutime = getTimeAnnotator();
sutime.annotate(document);
// Check answers
for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexes.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
Annotation documentWithRefTime = createDocument(testText, "20100217");
sutime.annotate(documentWithRefTime);
for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexesResolved.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SUTimeITest method testSUTimeHolidays.
public void testSUTimeHolidays() throws IOException {
// Set up test text
String testText = "When is mother's day 2012?\n" + "When is Christmas 2010?\n" + "When is Easter 2011?\n";
// set up expected results
Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2012-05-13\" type=\"DATE\">mother's day 2012</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2010-12-25\" type=\"DATE\">Christmas 2010</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2011-04-24\" type=\"DATE\">Easter 2011</TIMEX3>")).iterator();
Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2012-05-13\" type=\"DATE\">mother's day 2012</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2010-12-25\" type=\"DATE\">Christmas 2010</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2011-04-24\" type=\"DATE\">Easter 2011</TIMEX3>")).iterator();
// create document
Annotation document = createDocument(testText);
// Time annotate
TimeAnnotator sutime = getTimeAnnotator();
sutime.annotate(document);
// Check answers
for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexes.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
Annotation documentWithRefTime = createDocument(testText, "20100217");
sutime.annotate(documentWithRefTime);
for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexesResolved.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SUTimeITest method testSUTimeInexactTime.
public void testSUTimeInexactTime() throws IOException {
String testText = "The morning of January 31 was very cold.\n" + "He arrived late last night.\n" + "He arrived last night.\n";
Iterator<Timex> expectedTimexes = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"XXXX-01-31TMO\" type=\"TIME\">The morning of January 31</TIMEX3>"), // TODO: time
Timex.fromXml("<TIMEX3 tid=\"t2\" alt_value=\"TNI OFFSET P-1D\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t3\">late last night</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t4\" alt_value=\"TNI OFFSET P-1D\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf1\" anchorTimeID=\"t5\">last night</TIMEX3>")).iterator();
Iterator<Timex> expectedTimexesResolved = Arrays.asList(Timex.fromXml("<TIMEX3 tid=\"t1\" value=\"2003-01-31TMO\" type=\"TIME\">The morning of January 31</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t2\" value=\"2003-04-13TNI\" type=\"TIME\" mod=\"LATE\">late last night</TIMEX3>"), Timex.fromXml("<TIMEX3 tid=\"t3\" value=\"2003-04-13TNI\" type=\"TIME\">last night</TIMEX3>")).iterator();
// create document
Annotation document = createDocument(testText);
// Time annotate
TimeAnnotator sutime = getTimeAnnotator();
sutime.annotate(document);
// Check answers
for (CoreMap timexAnn : document.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexes.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
Annotation documentWithRefTime = createDocument(testText, "20030414");
sutime.annotate(documentWithRefTime);
for (CoreMap timexAnn : documentWithRefTime.get(TimeAnnotations.TimexAnnotations.class)) {
Timex expectedTimex = expectedTimexesResolved.next();
checkTimex(testText, expectedTimex.text(), expectedTimex, timexAnn);
}
assertFalse(expectedTimexes.hasNext());
}
Aggregations