use of edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder in project cogcomp-nlp by CogComp.
the class XmlTextAnnotationMakerOntonotesTest method testNestedNames.
/**
* the edit offsets get messed up when there are nested tags.
*/
@Test
public void testNestedNames() {
String text = "He spoke with Paul <ENAMEX TYPE=\"PERSON\"><ENAMEX TYPE=\"PERSON\" E_OFF=\"1\">Paula</ENAMEX> Zahn</ENAMEX> .";
// we keep everything.
XmlDocumentProcessor xmlProcessor = new XmlDocumentProcessor(tagsWithText, tagsWithAtts, dropTags, true);
StatefulTokenizer st = new StatefulTokenizer();
TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(st);
XmlTextAnnotationMaker xtam = new XmlTextAnnotationMaker(taBuilder, xmlProcessor);
// read the file and create the annotation.
XmlTextAnnotation xta = xtam.createTextAnnotation(text, "OntoNotes 5.0", "test");
TextAnnotation ta = xta.getTextAnnotation();
List<XmlDocumentProcessor.SpanInfo> fudge = xta.getXmlMarkup();
StringTransformation xst = xta.getXmlSt();
for (XmlDocumentProcessor.SpanInfo si : fudge) {
int newTextStart = xst.computeModifiedOffsetFromOriginal(si.spanOffsets.getFirst());
int newTextEnd = xst.computeModifiedOffsetFromOriginal(si.spanOffsets.getSecond());
String neStr = ta.getText().substring(newTextStart, newTextEnd);
assertTrue(REF_ENTITIES.contains(neStr));
}
}
use of edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder in project cogcomp-nlp by CogComp.
the class ESADatalessAnnotator method main.
/**
* @param args config: config file path testFile: Test File
*/
public static void main(String[] args) {
CommandLine cmd = getCMDOpts(args);
ResourceManager rm;
try {
String configFile = cmd.getOptionValue("config", "config/project.properties");
ResourceManager nonDefaultRm = new ResourceManager(configFile);
rm = new ESADatalessConfigurator().getConfig(nonDefaultRm);
} catch (IOException e) {
rm = new ESADatalessConfigurator().getDefaultConfig();
}
String testFile = cmd.getOptionValue("testFile", "data/graphicsTestDocument.txt");
StringBuilder sb = new StringBuilder();
String line;
try (BufferedReader br = new BufferedReader(new FileReader(new File(testFile)))) {
while ((line = br.readLine()) != null) {
sb.append(line);
sb.append(" ");
}
String text = sb.toString().trim();
TokenizerTextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = taBuilder.createTextAnnotation(text);
ESADatalessAnnotator datalessAnnotator = new ESADatalessAnnotator(rm);
datalessAnnotator.addView(ta);
List<Constituent> annots = ta.getView(ViewNames.DATALESS_ESA).getConstituents();
System.out.println("Predicted LabelIDs:");
for (Constituent annot : annots) {
System.out.println(annot.getLabel());
}
Map<String, String> labelNameMap = DatalessAnnotatorUtils.getLabelNameMap(rm.getString(DatalessConfigurator.LabelName_Path.key));
System.out.println("Predicted Labels:");
for (Constituent annot : annots) {
System.out.println(labelNameMap.get(annot.getLabel()));
}
} catch (FileNotFoundException e) {
e.printStackTrace();
logger.error("Test File not found at " + testFile + " ... exiting");
System.exit(-1);
} catch (IOException e) {
e.printStackTrace();
logger.error("IO Error while reading the test file ... exiting");
System.exit(-1);
} catch (AnnotatorException e) {
e.printStackTrace();
logger.error("Error Annotating the Test Document with the Dataless View ... exiting");
System.exit(-1);
}
}
use of edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder in project cogcomp-nlp by CogComp.
the class ACERelationTester method testRandomText.
public static void testRandomText(String text) {
String corpus = "";
String textId = "";
TextAnnotationBuilder stab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
TextAnnotation ta = stab.createTextAnnotation(corpus, textId, text);
try {
POSAnnotator pos_annotator = new POSAnnotator();
ChunkerAnnotator chunker = new ChunkerAnnotator(true);
chunker.initialize(new ChunkerConfigurator().getDefaultConfig());
Properties stanfordProps = new Properties();
stanfordProps.put("annotators", "pos, parse");
stanfordProps.put("parse.originalDependencies", true);
stanfordProps.put("parse.maxlen", Stanford331Configurator.STFRD_MAX_SENTENCE_LENGTH);
stanfordProps.put("parse.maxtime", Stanford331Configurator.STFRD_TIME_PER_SENTENCE);
POSTaggerAnnotator posAnnotator = new POSTaggerAnnotator("pos", stanfordProps);
ParserAnnotator parseAnnotator = new ParserAnnotator("parse", stanfordProps);
StanfordDepHandler stanfordDepHandler = new StanfordDepHandler(posAnnotator, parseAnnotator);
MentionAnnotator mentionAnnotator = new MentionAnnotator("ACE_TYPE");
RelationAnnotator relationAnnotator = new RelationAnnotator();
ta.addView(pos_annotator);
stanfordDepHandler.addView(ta);
chunker.addView(ta);
mentionAnnotator.addView(ta);
relationAnnotator.addView(ta);
for (Relation r : ta.getView(ViewNames.RELATION).getRelations()) {
IOHelper.printRelation(r);
}
} catch (Exception e) {
e.printStackTrace();
}
}
use of edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder in project cogcomp-nlp by CogComp.
the class ClauseAnnotatorFactory method buildPipeline.
/**
* @param rm non-default config options
* @return AnnotatorService with specified NLP components
* @throws IOException
* @throws AnnotatorException
*/
public static BasicAnnotatorService buildPipeline(ResourceManager rm) throws IOException, AnnotatorException {
TextAnnotationBuilder taBldr = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(false, false));
Map<String, Annotator> annotators = buildAnnotators();
return new BasicAnnotatorService(taBldr, annotators, rm);
}
use of edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder in project cogcomp-nlp by CogComp.
the class NerOntonotesTest method testOntonotesNer.
@Test
public void testOntonotesNer() {
TextAnnotationBuilder tab = new TokenizerTextAnnotationBuilder(new StatefulTokenizer());
Properties props = new Properties();
NERAnnotator nerOntonotes = NerAnnotatorManager.buildNerAnnotator(new ResourceManager(props), ViewNames.NER_ONTONOTES);
TextAnnotation taOnto = tab.createTextAnnotation("", "", TEST_INPUT);
try {
nerOntonotes.getView(taOnto);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
View v = taOnto.getView(nerOntonotes.getViewName());
assertEquals(3, v.getConstituents().size());
}
Aggregations