use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class ViewConstructorPipelineTest method main.
public static void main(String[] args) {
String input = null;
try {
input = LineIO.slurp(textFile);
} catch (FileNotFoundException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("input from " + textFile + " is " + input.length() + " characters long.");
AnnotatorService as = null;
try {
as = PipelineFactory.buildPipeline(ViewNames.POS);
} catch (IOException | AnnotatorException e) {
e.printStackTrace();
System.exit(-1);
}
TextAnnotation ta = null;
try {
ta = as.createAnnotatedTextAnnotation("test", "test", input);
} catch (AnnotatorException e) {
e.printStackTrace();
System.exit(-1);
}
System.out.println("found " + ta.getView(ViewNames.POS).getConstituents() + " POS constituents.");
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method testTokenization.
/**
* test tokenization produces the correct number of constinuents.
*/
@Test
public void testTokenization() {
TextAnnotation ta = tab.createTextAnnotation(TOKEN_TEST);
View nerView = null;
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(nerView.getConstituents().size(), 2);
String tokTestB = "Grigory Pasko, crusading Russian journalist who documented Russian Navy's mishandling of " + "nuclear waste, is released on parole after serving two-thirds of his four-year prison sentence.";
ta = tab.createTextAnnotation(tokTestB);
try {
nerView = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertEquals(3, nerView.getNumberOfConstituents());
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NERAnnotatorTest method evaluatePerformance.
/**
* Make sure it runs in reasonable time. We will test the performance of the machine we run on
* to get a better measure.
*/
// @Test
public void evaluatePerformance() {
// now do performance.
final int SIZE = 100;
// make sure any lazy loading is done outside the performance test.
TextAnnotation tat = tab.createTextAnnotation(TEST_INPUT);
try {
getView(tat);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
long expectedPerformance = this.measureMachinePerformance();
logger.info("Expect " + expectedPerformance);
{
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
}
// start the performance test.
long start = System.currentTimeMillis();
for (int i = 0; i < SIZE; i++) {
TextAnnotation ta = tab.createTextAnnotation(TEST_INPUT);
View view = null;
try {
view = getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(view != null);
for (Constituent c : view.getConstituents()) {
assertTrue("No entity named \"" + c.toString() + "\"", entities.contains(c.toString()));
}
}
start = System.currentTimeMillis() - start;
start /= SIZE;
System.out.printf("For text size = %d, average NER runtime = %d, normalized = %f", TEST_INPUT.length(), start, (double) start / (double) expectedPerformance);
assertTrue(start <= expectedPerformance);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class NerInitTest method testInit.
@Test
public void testInit() {
Properties props = new Properties();
props.setProperty(NerBaseConfigurator.GAZETTEER_FEATURES, "0");
props.setProperty(NerBaseConfigurator.BROWN_CLUSTER_PATHS, "0");
ResourceManager rm = (new NerBaseConfigurator()).getConfig(new ResourceManager(props));
NERAnnotator ner = NerAnnotatorManager.buildNerAnnotator(rm, ViewNames.NER_CONLL);
assertNotNull(ner);
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = tab.createTextAnnotation(TESTSTR);
try {
ner.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assert (ta.hasView(ViewNames.NER_CONLL));
assertEquals(ta.getView(ViewNames.NER_CONLL).getConstituents().size(), 2);
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.
the class PrepSRLDataReader method readData.
@Override
public List<TextAnnotation> readData() {
lazyReadMaps();
List<TextAnnotation> textAnnotations = new ArrayList<>();
String dataDir = file + File.separator;
dataDir += (corpusName.equals("train") ? semevalTrainDataDirectory : semevalTestDataDirectory);
for (String currentFile : getFiles(dataDir)) {
NodeList instanceNodeList;
try {
// read the xml
Document dom = XMLUtils.getXMLDOM(dataDir + File.separator + currentFile + ".xml");
Element docElem = dom.getDocumentElement();
instanceNodeList = docElem.getElementsByTagName("instance");
} catch (Exception ex) {
System.err.println("Unable to get the DOM" + ex);
return null;
}
// read the key file
if (corpusName.equals("test")) {
String keyFileName;
int start = currentFile.indexOf('-') + 1;
int end = currentFile.indexOf('.');
keyFileName = currentFile.substring(start, end);
try {
keys = new Hashtable<>();
LineIO.read(file + File.separator + semevalKeyDirectory + File.separator + keyFileName + ".key", new ITransformer<String, Void>() {
public Void transform(String input) {
String[] parts = input.split(" ");
keys.put(parts[1], parts[2]);
return null;
}
});
} catch (FileNotFoundException e) {
System.err.println("File " + semevalKeyDirectory + File.separator + keyFileName + ".key not found" + e);
return null;
}
}
while (currentNodeId < instanceNodeList.getLength()) {
TextAnnotation ta = makeNewTextAnnotation((Element) instanceNodeList.item(currentNodeId));
if (ta == null) {
logger.error("{} returned null.", instanceNodeList.item(currentNodeId));
currentNodeId++;
continue;
}
textAnnotations.add(ta);
currentNodeId++;
}
}
return consolidate(textAnnotations);
}
Aggregations