use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class TemporalNormalizerBenchmark method testTemporalChunker.
/**
* Normalize the dataset using our Chunker for temporal phrases extraction
* @param outputFolder
* @param verbose
* @throws Exception
*/
public void testTemporalChunker(String outputFolder, boolean verbose) throws Exception {
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
ResourceManager nerRm = new TemporalChunkerConfigurator().getDefaultConfig();
IOUtilities.existsInClasspath(TemporalChunkerAnnotator.class, nerRm.getString("modelDirPath"));
java.util.logging.Logger.getLogger("HeidelTimeStandalone").setLevel(Level.OFF);
List<TextAnnotation> taList = new ArrayList<>();
long preprocessTime = System.currentTimeMillis();
POSAnnotator annotator = new POSAnnotator();
for (int j = 0; j < testText.size(); j++) {
TextAnnotation ta = tab.createTextAnnotation("corpus", "id", testText.get(j));
try {
annotator.getView(ta);
} catch (AnnotatorException e) {
fail("AnnotatorException thrown!\n" + e.getMessage());
}
taList.add(ta);
}
if (verbose) {
System.out.println("Start");
}
long startTime = System.currentTimeMillis();
File outDir = new File(outputFolder);
if (!outDir.exists()) {
outDir.mkdir();
}
for (int j = 0; j < testText.size(); j++) {
tca.addDocumentCreationTime(DCTs.get(j));
TextAnnotation ta = taList.get(j);
try {
tca.addView(ta);
} catch (AnnotatorException e) {
fail("Exception while adding TIMEX3 VIEW " + e.getStackTrace());
}
String outputFileName = "./" + outputFolder + "/" + docIDs.get(j) + ".tml";
if (verbose) {
System.out.println(docIDs.get(j));
for (TimexChunk tc : tca.getTimex()) {
System.out.println(tc.toTIMEXString());
}
System.out.println("\n");
}
tca.write2Text(outputFileName, docIDs.get(j), testText.get(j));
tca.deleteTimex();
}
long endTime = System.currentTimeMillis();
long totalTime = endTime - startTime;
if (verbose) {
System.out.println("Process time: " + totalTime);
System.out.println("Preprocess + process time: " + (endTime - preprocessTime));
}
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class TemporalNormalizerBenchmark method testNormalizationWithTrueExtraction.
/**
* Normalize the dataset using real extraction
* @param outputFolder
* @param verbose
* @throws Exception
*/
public void testNormalizationWithTrueExtraction(String outputFolder, boolean verbose) throws Exception {
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
System.out.println("Working Directory = " + System.getProperty("user.dir"));
ResourceManager nerRm = new TemporalChunkerConfigurator().getDefaultConfig();
IOUtilities.existsInClasspath(TemporalChunkerAnnotator.class, nerRm.getString("modelDirPath"));
java.util.logging.Logger.getLogger("HeidelTimeStandalone").setLevel(Level.OFF);
long preprocessTime = System.currentTimeMillis();
List<TextAnnotation> taList = new ArrayList<>();
POSAnnotator annotator = new POSAnnotator();
for (int j = 0; j < te3inputText.size(); j++) {
String text = testText.get(j);
text = text.replaceAll("\\n", " ");
TextAnnotation ta = tab.createTextAnnotation("corpus", "id", text);
try {
annotator.getView(ta);
} catch (AnnotatorException e) {
fail("AnnotatorException thrown!\n" + e.getMessage());
}
taList.add(ta);
}
long startTime = System.currentTimeMillis();
int numTimex = 0;
File outDir = new File(outputFolder);
if (!outDir.exists()) {
outDir.mkdir();
}
for (int j = 0; j < te3inputText.size(); j++) {
TextAnnotation ta = taList.get(j);
tca.addDocumentCreationTime(DCTs.get(j));
if (verbose) {
System.out.println(docIDs.get(j));
}
try {
List<TimexChunk> timex = tca.extractTimexFromFile(te3inputText.get(j), testText.get(j), ta, verbose);
tca.setTimex(timex);
String outputFileName = outputFolder + "/" + docIDs.get(j) + ".tml";
tca.write2Text(outputFileName, docIDs.get(j), testText.get(j));
numTimex += timex.size();
tca.deleteTimex();
} catch (AnnotatorException e) {
fail("Exception while adding TIMEX3 VIEW " + e.getStackTrace());
}
}
long endTime = System.currentTimeMillis();
long totalTime = endTime - startTime;
System.out.println("Process time: " + totalTime);
System.out.println("Preprocess + process time: " + (endTime - preprocessTime));
System.out.println("Total timex3: " + numTimex);
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class TestTemporalChunker method testTemporalChunkerWithPlainText.
@Test
public void testTemporalChunkerWithPlainText() throws Exception {
String text = "The flu season is winding down, and it has killed 105 children so far - about the average toll.\n" + "\n" + "The season started about a month earlier than usual, sparking concerns it might turn into the worst in " + "a decade. It ended up being very hard on the elderly, but was moderately severe overall, according to " + "the Centers for Disease Control and Prevention.\n" + "\n" + "Six of the pediatric deaths were reported in the last week, and it's possible there will be more, said " + "the CDC's Dr. Michael Jhung said Friday.\n" + "\n" + "Roughly 100 children die in an average flu season. One exception was the swine flu pandemic of " + "2009-2010, when 348 children died.\n" + "\n" + "The CDC recommends that all children ages 6 months and older be vaccinated against flu each season, " + "though only about half get a flu shot or nasal spray.\n" + "\n" + "All but four of the children who died were old enough to be vaccinated, but 90 percent of them did " + "not get vaccinated, CDC officials said.\n" + "\n" + "This year's vaccine was considered effective in children, though it didn't work very well in older " + "people. And the dominant flu strain early in the season was one that tends to " + "cause more severe illness.\n" + "\n" + "The government only does a national flu death count for children. But it does track hospitalization " + "rates for people 65 and older, and those statistics have been grim.\n" + "\n" + "In that group, 177 out of every 100,000 were hospitalized with flu-related illness in the past " + "several months. That's more than 2 1/2 times higher than any other recent season.\n" + "\n" + "This flu season started in early December, a month earlier than usual, and peaked by the end " + "of year. Since then, flu reports have been dropping off throughout the country.\n" + "\n" + "\"We appear to be getting close to the end of flu season,\" Jhung said.";
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = tab.createTextAnnotation("corpus", "id", text);
POSAnnotator annotator = new POSAnnotator();
try {
annotator.getView(ta);
} catch (AnnotatorException e) {
fail("AnnotatorException thrown!\n" + e.getMessage());
}
tca.addView(ta);
View temporalViews = ta.getView(ViewNames.TIMEX3);
List<Constituent> constituents = temporalViews.getConstituents();
assertEquals("<TIMEX3 type=\"DURATION\" value=\"P1M\">", constituents.get(0).getLabel());
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class ACERelationTester method testRandomText.
public static void testRandomText(String text) {
String corpus = "";
String textId = "";
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
try {
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
ta.addView(pos_annotator);
stanfordDepHandler.addView(ta);
chunker.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
for (Relation r : ta.getView(ViewNames.RELATION).getRelations()) {
IOHelper.printRelation(r);
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.illinois.cs.cogcomp.pos.POSAnnotator in project cogcomp-nlp by CogComp.
the class Demo method main.
public static void main(String[] args) throws IOException, AnnotatorException {
Options options = new Options();
Option inputtext = new Option("t", "text", true, "input text to be processed");
inputtext.setRequired(false);
options.addOption(inputtext);
CommandLineParser parser = new DefaultParser();
HelpFormatter formatter = new HelpFormatter();
try {
CommandLine cmd = parser.parse(options, args);
String defaultText = "The flu season is winding down, and it has killed 105 children so far - about the average toll.\n" + "\n" + "The season started about a month earlier than usual, sparking concerns it might turn into the worst in " + "a decade. It ended up being very hard on the elderly, but was moderately severe overall, according to " + "the Centers for Disease Control and Prevention.\n" + "\n" + "Six of the pediatric deaths were reported in the last week, and it's possible there will be more, said " + "the CDC's Dr. Michael Jhung said Friday.\n" + "\n" + "Roughly 100 children die in an average flu season. One exception was the swine flu pandemic of " + "2009-2010, when 348 children died.\n" + "\n" + "The CDC recommends that all children ages 6 months and older be vaccinated against flu each season, " + "though only about half get a flu shot or nasal spray.\n" + "\n" + "All but four of the children who died were old enough to be vaccinated, but 90 percent of them did " + "not get vaccinated, CDC officials said.\n" + "\n" + "This year's vaccine was considered effective in children, though it didn't work very well in older " + "people. And the dominant flu strain early in the season was one that tends to " + "cause more severe illness.\n" + "\n" + "The government only does a national flu death count for children. But it does track hospitalization " + "rates for people 65 and older, and those statistics have been grim.\n" + "\n" + "In that group, 177 out of every 100,000 were hospitalized with flu-related illness in the past " + "several months. That's more than 2 1/2 times higher than any other recent season.\n" + "\n" + "This flu season started in early December, a month earlier than usual, and peaked by the end " + "of year. Since then, flu reports have been dropping off throughout the country.\n" + "\n" + "\"We appear to be getting close to the end of flu season,\" Jhung said.";
String text = cmd.getOptionValue("text", defaultText);
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = tab.createTextAnnotation("corpus", "id", text);
POSAnnotator annotator = new POSAnnotator();
try {
annotator.getView(ta);
} catch (AnnotatorException e) {
fail("AnnotatorException thrown!\n" + e.getMessage());
}
Properties rmProps = new TemporalChunkerConfigurator().getDefaultConfig().getProperties();
rmProps.setProperty("useHeidelTime", "False");
TemporalChunkerAnnotator tca = new TemporalChunkerAnnotator(new ResourceManager(rmProps));
tca.addView(ta);
View temporalViews = ta.getView(ViewNames.TIMEX3);
List<Constituent> constituents = temporalViews.getConstituents();
System.out.printf("There're %d time expressions (TIMEX) in total.\n", constituents.size());
for (Constituent c : constituents) {
System.out.printf("TIMEX #%d: Text=%s, Type=%s, Value=%s\n", constituents.indexOf(c), c, c.getAttribute("type"), c.getAttribute("value"));
}
} catch (ParseException e) {
System.out.println(e.getMessage());
formatter.printHelp("Temporal Normalizer Demo", options);
System.exit(1);
}
}
Aggregations