use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class CachingPipelineTest method stanfordFailTest.
@Test
public void stanfordFailTest() {
String inputFile = "src/test/resources/stanfordFailExample.txt";
String text = null;
try {
text = LineIO.slurp(inputFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
fail(e.getMessage());
}
TextAnnotation basicTextAnnotation = null;
try {
basicTextAnnotation = processor.createBasicTextAnnotation("test", "test", text);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
try {
processor.addView(basicTextAnnotation, ViewNames.DEPENDENCY_STANFORD);
} catch (RuntimeException | AnnotatorException e) {
e.printStackTrace();
System.out.println("Expected exception from stanford.");
}
System.out.println(basicTextAnnotation.toString());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ViewConstructorPipelineTest method main.
public static void main(String[] args) {
String input = null;
try {
input = LineIO.slurp(textFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("input from " + textFile + " is " + input.length() + " characters long.");
AnnotatorService as = null;
try {
as = PipelineFactory.buildPipeline(ViewNames.POS);
} catch (IOException | AnnotatorException e) {
e.printStackTrace();
System.exit(-1);
}
TextAnnotation ta = null;
try {
ta = as.createAnnotatedTextAnnotation("test", "test", input);
} catch (AnnotatorException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("found " + ta.getView(ViewNames.POS).getConstituents() + " POS constituents.");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class RunPipeline method RunPipelineWithCorpusReader.
public void RunPipelineWithCorpusReader(AnnotationReader reader) {
Iterator<TextAnnotation> iter = reader.iterator();
boolean replaceExistingViews = false;
while (iter.hasNext()) {
TextAnnotation ta = iter.next();
try {
ta = pipeline.annotateTextAnnotation(ta, replaceExistingViews);
} catch (AnnotatorException e) {
e.printStackTrace();
logger.error("failed to preprocess ta with id '" + ta.getId() + "'.");
}
logger.debug("processed file (assumed this is TextAnnotation docid): '" + ta.getId() + "'");
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class RunPipeline method RunPipelineOnDataset.
/**
* Runs a caching pipeline on an entire data set. Expects one document per file. Reports files
* for which TextAnnotation objects could not be created. Will process every non-directory file
* in the specified data directory, and write a file with the same name to the specified output
* directory in json format. It will overwrite a file with the same name in the output location.
* In addition, TextAnnotations are written to the cache as directed by the configuration
* (default PipelineConfigurator/AnnotatorServiceConfigurator or config file).
*
* @param dataDirectory directory containing source documents; may have subdirectories
* @throws IOException
* @throws AnnotatorException
*/
public void RunPipelineOnDataset(Path dataDirectory, Path outDirectory) throws IOException, AnnotatorException {
if (!(dataDirectory.toFile().exists()))
throw new IOException("Directory '" + dataDirectory + "' does not exist.");
else if (!(dataDirectory.toFile().isDirectory()))
throw new IOException("Directory '" + dataDirectory + "' exists but is not a directory.");
// if ( !this.pipeline.isCacheEnabled() )
// throw new IllegalStateException(
// "Pipeline cache is disabled. Change the settings in the config file " +
// "or the properties passed to the pipeline constructor." );
FilenameFilter filter = new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
File f = new File(dir.getAbsolutePath() + "/" + name);
return f.exists() && !f.isDirectory();
}
};
String[] files = IOUtils.lsFilesRecursive(dataDirectory.toString(), filter);
for (String f : files) {
Path inPath = Paths.get(f);
String inFileName = inPath.getName(inPath.getNameCount() - 1).toString();
String outFileName = outDirectory.toString() + "/" + inFileName;
if (outFileName.equals(inFileName))
throw new IllegalArgumentException("Output file '" + outFileName + "' ended up same as input file '" + inFileName + ". Aborting. Please check the dataDirectory and outDirectory arguments.");
TextAnnotation ta = RunPipelineOnFile(f);
if (null == ta)
logger.warn("Could not generate textAnnotation for file '" + f + "'.");
else {
logger.debug("processed file '{}', TextAnnotation id is '{}'.", f, ta.getId());
SerializationHelper.serializeTextAnnotationToFile(ta, outFileName, true, true);
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class MainServer method addAdditionalViewToTA.
private static String addAdditionalViewToTA(AnnotatorService finalPipeline, String jsonStrTA, String views, Logger logger) throws AnnotatorException {
if (views == null || jsonStrTA == null) {
return "The parameters 'jsonstr' and/or 'views' are not specified.";
} else {
logger.info("------------------------------");
logger.info("Views to add: " + views);
String[] viewsInArray = views.split(",");
logger.info("Adding the basic annotations . . . ");
TextAnnotation ta = null;
try {
ta = SerializationHelper.deserializeFromJson(jsonStrTA);
} catch (Exception e) {
logger.error("Error reading TA from JsonStr . . . ");
}
if (ta == null) {
logger.info("Error reading TA from JsonStr. Views cannot be added.");
return jsonStrTA;
}
for (String vuName : viewsInArray) {
logger.info("Adding the view: ->" + vuName.trim() + "<-");
try {
finalPipeline.addView(ta, vuName.trim());
} catch (Exception e) {
e.printStackTrace();
}
printMemoryDetails(logger);
}
logger.info("Done adding the views. Serializing the view now.");
String output = SerializationHelper.serializeToJson(ta);
logger.info("Done. Sending the result back. ");
return output;
}
}
Aggregations