Search in sources :

Example 1 with TimexChunk

use of edu.illinois.cs.cogcomp.temporal.normalizer.main.timex2interval.TimexChunk in project cogcomp-nlp by CogComp.

the class TemporalNormalizerBenchmark method testTemporalChunker.

/**
 * Normalize the dataset using our Chunker for temporal phrases extraction
 * @param outputFolder
 * @param verbose
 * @throws Exception
 */
public void testTemporalChunker(String outputFolder, boolean verbose) throws Exception {
    TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
    ResourceManager nerRm = new TemporalChunkerConfigurator().getDefaultConfig();
    IOUtilities.existsInClasspath(TemporalChunkerAnnotator.class, nerRm.getString("modelDirPath"));
    java.util.logging.Logger.getLogger("HeidelTimeStandalone").setLevel(Level.OFF);
    List<TextAnnotation> taList = new ArrayList<>();
    long preprocessTime = System.currentTimeMillis();
    POSAnnotator annotator = new POSAnnotator();
    for (int j = 0; j < testText.size(); j++) {
        TextAnnotation ta = tab.createTextAnnotation("corpus", "id", testText.get(j));
        try {
            annotator.getView(ta);
        } catch (AnnotatorException e) {
            fail("AnnotatorException thrown!\n" + e.getMessage());
        }
        taList.add(ta);
    }
    if (verbose) {
        System.out.println("Start");
    }
    long startTime = System.currentTimeMillis();
    File outDir = new File(outputFolder);
    if (!outDir.exists()) {
        outDir.mkdir();
    }
    for (int j = 0; j < testText.size(); j++) {
        tca.addDocumentCreationTime(DCTs.get(j));
        TextAnnotation ta = taList.get(j);
        try {
            tca.addView(ta);
        } catch (AnnotatorException e) {
            fail("Exception while adding TIMEX3 VIEW " + e.getStackTrace());
        }
        String outputFileName = "./" + outputFolder + "/" + docIDs.get(j) + ".tml";
        if (verbose) {
            System.out.println(docIDs.get(j));
            for (TimexChunk tc : tca.getTimex()) {
                System.out.println(tc.toTIMEXString());
            }
            System.out.println("\n");
        }
        tca.write2Text(outputFileName, docIDs.get(j), testText.get(j));
        tca.deleteTimex();
    }
    long endTime = System.currentTimeMillis();
    long totalTime = endTime - startTime;
    if (verbose) {
        System.out.println("Process time: " + totalTime);
        System.out.println("Preprocess + process time: " + (endTime - preprocessTime));
    }
}
Also used : TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) TimexChunk(edu.illinois.cs.cogcomp.temporal.normalizer.main.timex2interval.TimexChunk) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ArrayList(java.util.ArrayList) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 2 with TimexChunk

use of edu.illinois.cs.cogcomp.temporal.normalizer.main.timex2interval.TimexChunk in project cogcomp-nlp by CogComp.

the class TemporalNormalizerBenchmark method testNormalizationWithTrueExtraction.

/**
 * Normalize the dataset using real extraction
 * @param outputFolder
 * @param verbose
 * @throws Exception
 */
public void testNormalizationWithTrueExtraction(String outputFolder, boolean verbose) throws Exception {
    TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
    System.out.println("Working Directory = " + System.getProperty("user.dir"));
    ResourceManager nerRm = new TemporalChunkerConfigurator().getDefaultConfig();
    IOUtilities.existsInClasspath(TemporalChunkerAnnotator.class, nerRm.getString("modelDirPath"));
    java.util.logging.Logger.getLogger("HeidelTimeStandalone").setLevel(Level.OFF);
    long preprocessTime = System.currentTimeMillis();
    List<TextAnnotation> taList = new ArrayList<>();
    POSAnnotator annotator = new POSAnnotator();
    for (int j = 0; j < te3inputText.size(); j++) {
        String text = testText.get(j);
        text = text.replaceAll("\\n", " ");
        TextAnnotation ta = tab.createTextAnnotation("corpus", "id", text);
        try {
            annotator.getView(ta);
        } catch (AnnotatorException e) {
            fail("AnnotatorException thrown!\n" + e.getMessage());
        }
        taList.add(ta);
    }
    long startTime = System.currentTimeMillis();
    int numTimex = 0;
    File outDir = new File(outputFolder);
    if (!outDir.exists()) {
        outDir.mkdir();
    }
    for (int j = 0; j < te3inputText.size(); j++) {
        TextAnnotation ta = taList.get(j);
        tca.addDocumentCreationTime(DCTs.get(j));
        if (verbose) {
            System.out.println(docIDs.get(j));
        }
        try {
            List<TimexChunk> timex = tca.extractTimexFromFile(te3inputText.get(j), testText.get(j), ta, verbose);
            tca.setTimex(timex);
            String outputFileName = outputFolder + "/" + docIDs.get(j) + ".tml";
            tca.write2Text(outputFileName, docIDs.get(j), testText.get(j));
            numTimex += timex.size();
            tca.deleteTimex();
        } catch (AnnotatorException e) {
            fail("Exception while adding TIMEX3 VIEW " + e.getStackTrace());
        }
    }
    long endTime = System.currentTimeMillis();
    long totalTime = endTime - startTime;
    System.out.println("Process time: " + totalTime);
    System.out.println("Preprocess + process time: " + (endTime - preprocessTime));
    System.out.println("Total timex3: " + numTimex);
}
Also used : TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) TimexChunk(edu.illinois.cs.cogcomp.temporal.normalizer.main.timex2interval.TimexChunk) POSAnnotator(edu.illinois.cs.cogcomp.pos.POSAnnotator) ArrayList(java.util.ArrayList) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) ResourceManager(edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Aggregations

AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)2 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)2 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)2 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)2 POSAnnotator (edu.illinois.cs.cogcomp.pos.POSAnnotator)2 TimexChunk (edu.illinois.cs.cogcomp.temporal.normalizer.main.timex2interval.TimexChunk)2 ArrayList (java.util.ArrayList)2