use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class Main method processInputString.
/**
* process the single input string, produce output on standard out if no output directory is
* defined, or produce the output in the output directory by the same file name as the input
* file, or if a specific output filename is specified, use that name.
*
* @param data the string to process
* @throws Exception if anything goes wrong.
*/
private void processInputString(String data) throws Exception {
data = StringEscapeUtils.unescapeHtml4(data);
TextAnnotation ta = tab.createTextAnnotation(data);
data = this.produceOutput(this.nerAnnotator.getView(ta), ta);
this.getResultProcessor().publish(data, Long.toString(System.currentTimeMillis()) + ".txt");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class LemmatizerTATest method testCreateTextAnnotationLemmaView.
@Test
public void testCreateTextAnnotationLemmaView() {
View lemmaView = null;
TextAnnotation ta = inputTa;
try {
lemmaView = lem.createLemmaView(ta);
} catch (IOException e) {
e.printStackTrace();
fail(e.getMessage());
}
boolean isTested = false;
if (null != lemmaView) {
List<Constituent> spans = lemmaView.getConstituents();
printConstituents(System.out, spans);
// orig 'The'
String the = spans.get(0).getLabel();
// orig 'men'
String CIA = spans.get(1).getLabel();
// orig 'have'
String thought = spans.get(2).getLabel();
// orig 'had'
String had = spans.get(6).getLabel();
// orig 'examinations'
String were = spans.get(15).getLabel();
assertEquals(the, "the");
assertEquals(CIA, "cia");
assertEquals(thought, "think");
assertEquals(had, "have");
assertEquals(were, "be");
isTested = true;
}
assertTrue(isTested);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class RunPipeline method RunPipelineWithCorpusReader.
public void RunPipelineWithCorpusReader(AnnotationReader reader) {
Iterator<TextAnnotation> iter = reader.iterator();
boolean replaceExistingViews = false;
while (iter.hasNext()) {
TextAnnotation ta = iter.next();
try {
ta = pipeline.annotateTextAnnotation(ta, replaceExistingViews);
} catch (AnnotatorException e) {
e.printStackTrace();
logger.error("failed to preprocess ta with id '" + ta.getId() + "'.");
}
logger.debug("processed file (assumed this is TextAnnotation docid): '" + ta.getId() + "'");
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class RunPipeline method RunPipelineOnDataset.
/**
* Runs a caching pipeline on an entire data set. Expects one document per file. Reports files
* for which TextAnnotation objects could not be created. Will process every non-directory file
* in the specified data directory, and write a file with the same name to the specified output
* directory in json format. It will overwrite a file with the same name in the output location.
* In addition, TextAnnotations are written to the cache as directed by the configuration
* (default PipelineConfigurator/AnnotatorServiceConfigurator or config file).
*
*
* @param dataDirectory directory containing source documents; may have subdirectories
* @throws IOException
* @throws AnnotatorException
*/
public void RunPipelineOnDataset(Path dataDirectory, Path outDirectory) throws IOException, AnnotatorException {
if (!(dataDirectory.toFile().exists()))
throw new IOException("Directory '" + dataDirectory + "' does not exist.");
else if (!(dataDirectory.toFile().isDirectory()))
throw new IOException("Directory '" + dataDirectory + "' exists but is not a directory.");
// if ( !this.pipeline.isCacheEnabled() )
// throw new IllegalStateException(
// "Pipeline cache is disabled. Change the settings in the config file " +
// "or the properties passed to the pipeline constructor." );
FilenameFilter filter = new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
File f = new File(dir.getAbsolutePath() + "/" + name);
return f.exists() && !f.isDirectory();
}
};
String[] files = IOUtils.lsFilesRecursive(dataDirectory.toString(), filter);
for (String f : files) {
Path inPath = Paths.get(f);
String inFileName = inPath.getName(inPath.getNameCount() - 1).toString();
String outFileName = outDirectory.toString() + "/" + inFileName;
if (outFileName.equals(inFileName))
throw new IllegalArgumentException("Output file '" + outFileName + "' ended up same as input file '" + inFileName + ". Aborting. Please check the dataDirectory and outDirectory arguments.");
TextAnnotation ta = RunPipelineOnFile(f);
if (null == ta)
logger.warn("Could not generate textAnnotation for file '" + f + "'.");
else {
logger.debug("processed file '{}', TextAnnotation id is '{}'.", f, ta.getId());
SerializationHelper.serializeTextAnnotationToFile(ta, outFileName, true, true);
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class MainServer method annotateText.
private static String annotateText(AnnotatorService finalPipeline, String text, String views, Logger logger) throws AnnotatorException {
if (views == null || text == null) {
return "The parameters 'text' and/or 'views' are not specified. Here is a sample input: \n ?text=\"This is a sample sentence. I'm happy.\"&views=POS,NER";
} else {
logger.info("------------------------------");
logger.info("Text: " + text);
logger.info("Views to add: " + views);
String[] viewsInArray = views.split(",");
logger.info("Adding the basic annotations . . . ");
TextAnnotation ta = finalPipeline.createBasicTextAnnotation("", "", text);
for (String vuName : viewsInArray) {
logger.info("Adding the view: ->" + vuName.trim() + "<-");
try {
finalPipeline.addView(ta, vuName.trim());
} catch (Exception e) {
e.printStackTrace();
}
printMemoryDetails(logger);
}
logger.info("Done adding the views. Deserializing the view now.");
String output = SerializationHelper.serializeToJson(ta);
logger.info("Done. Sending the result back. ");
return output;
}
}
Aggregations