Search in sources :

Example 36 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class CachingPipelineTest method stanfordFailTest.

@Test
public void stanfordFailTest() {
    String inputFile = "src/test/resources/stanfordFailExample.txt";
    String text = null;
    try {
        text = LineIO.slurp(inputFile);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    TextAnnotation basicTextAnnotation = null;
    try {
        basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", text);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        fail(e.getMessage());
    }
    try {
        processor.addView(basicTextAnnotation, ViewNames.DEPENDENCY_STANFORD);
    } catch (RuntimeException | AnnotatorException e) {
        e.printStackTrace();
        System.out.println("Expected exception from stanford.");
    }
    System.out.println(basicTextAnnotation.toString());
}
Also used : FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 37 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ViewConstructorPipelineTest method main.

public static void main(String[] args) {
    String input = null;
    try {
        input = LineIO.slurp(textFile);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    System.out.println("input from " + textFile + " is " + input.length() + " characters long.");
    AnnotatorService as = null;
    try {
        as = PipelineFactory.buildPipeline(ViewNames.POS);
    } catch (IOException | AnnotatorException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    TextAnnotation ta = null;
    try {
        ta = as.createAnnotatedTextAnnotation("test", "test", input);
    } catch (AnnotatorException e) {
        e.printStackTrace();
        System.exit(-1);
    }
    System.out.println("found " + ta.getView(ViewNames.POS).getConstituents() + " POS constituents.");
}
Also used : AnnotatorService(edu.illinois.cs.cogcomp.annotation.AnnotatorService) FileNotFoundException(java.io.FileNotFoundException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 38 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class RunPipeline method RunPipelineWithCorpusReader.

public void RunPipelineWithCorpusReader(AnnotationReader reader) {
    Iterator<TextAnnotation> iter = reader.iterator();
    boolean replaceExistingViews = false;
    while (iter.hasNext()) {
        TextAnnotation ta = iter.next();
        try {
            ta = pipeline.annotateTextAnnotation(ta, replaceExistingViews);
        } catch (AnnotatorException e) {
            e.printStackTrace();
            logger.error("failed to preprocess ta with id '" + ta.getId() + "'.");
        }
        logger.debug("processed file (assumed this is TextAnnotation docid): '" + ta.getId() + "'");
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 39 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class RunPipeline method RunPipelineOnDataset.

/**
 * Runs a caching pipeline on an entire data set. Expects one document per file. Reports files
 * for which TextAnnotation objects could not be created. Will process every non-directory file
 * in the specified data directory, and write a file with the same name to the specified output
 * directory in json format. It will overwrite a file with the same name in the output location.
 * In addition, TextAnnotations are written to the cache as directed by the configuration
 * (default PipelineConfigurator/AnnotatorServiceConfigurator or config file).
 *
 * @param dataDirectory directory containing source documents; may have subdirectories
 * @throws IOException
 * @throws AnnotatorException
 */
public void RunPipelineOnDataset(Path dataDirectory, Path outDirectory) throws IOException, AnnotatorException {
    if (!(dataDirectory.toFile().exists()))
        throw new IOException("Directory '" + dataDirectory + "' does not exist.");
    else if (!(dataDirectory.toFile().isDirectory()))
        throw new IOException("Directory '" + dataDirectory + "' exists but is not a directory.");
    // if ( !this.pipeline.isCacheEnabled() )
    // throw new IllegalStateException(
    // "Pipeline cache is disabled. Change the settings in the config file " +
    // "or the properties passed to the pipeline constructor." );
    FilenameFilter filter = new FilenameFilter() {

        @Override
        public boolean accept(File dir, String name) {
            File f = new File(dir.getAbsolutePath() + "/" + name);
            return f.exists() && !f.isDirectory();
        }
    };
    String[] files = IOUtils.lsFilesRecursive(dataDirectory.toString(), filter);
    for (String f : files) {
        Path inPath = Paths.get(f);
        String inFileName = inPath.getName(inPath.getNameCount() - 1).toString();
        String outFileName = outDirectory.toString() + "/" + inFileName;
        if (outFileName.equals(inFileName))
            throw new IllegalArgumentException("Output file '" + outFileName + "' ended up same as input file '" + inFileName + ". Aborting. Please check the dataDirectory and outDirectory arguments.");
        TextAnnotation ta = RunPipelineOnFile(f);
        if (null == ta)
            logger.warn("Could not generate textAnnotation for file '" + f + "'.");
        else {
            logger.debug("processed file '{}', TextAnnotation id is '{}'.", f, ta.getId());
            SerializationHelper.serializeTextAnnotationToFile(ta, outFileName, true, true);
        }
    }
}
Also used : Path(java.nio.file.Path) FilenameFilter(java.io.FilenameFilter) IOException(java.io.IOException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) File(java.io.File)

Example 40 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class MainServer method addAdditionalViewToTA.

private static String addAdditionalViewToTA(AnnotatorService finalPipeline, String jsonStrTA, String views, Logger logger) throws AnnotatorException {
    if (views == null || jsonStrTA == null) {
        return "The parameters 'jsonstr' and/or 'views' are not specified.";
    } else {
        logger.info("------------------------------");
        logger.info("Views to add: " + views);
        String[] viewsInArray = views.split(",");
        logger.info("Adding the basic annotations . . . ");
        TextAnnotation ta = null;
        try {
            ta = SerializationHelper.deserializeFromJson(jsonStrTA);
        } catch (Exception e) {
            logger.error("Error reading TA from JsonStr . . . ");
        }
        if (ta == null) {
            logger.info("Error reading TA from JsonStr. Views cannot be added.");
            return jsonStrTA;
        }
        for (String vuName : viewsInArray) {
            logger.info("Adding the view: ->" + vuName.trim() + "<-");
            try {
                finalPipeline.addView(ta, vuName.trim());
            } catch (Exception e) {
                e.printStackTrace();
            }
            printMemoryDetails(logger);
        }
        logger.info("Done adding the views. Serializing the view now.");
        String output = SerializationHelper.serializeToJson(ta);
        logger.info("Done. Sending the result back. ");
        return output;
    }
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) IOException(java.io.IOException) HelpScreenException(net.sourceforge.argparse4j.internal.HelpScreenException) ArgumentParserException(net.sourceforge.argparse4j.inf.ArgumentParserException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)292 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)121 Test (org.junit.Test)84 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)60 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)40 ArrayList (java.util.ArrayList)33 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)32 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)27 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)24 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 IOException (java.io.IOException)22 LinkedHashSet (java.util.LinkedHashSet)21 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)20 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)19 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)18 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)18 File (java.io.File)18 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)16