use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class ChunkerAnnotator method addView.
@Override
public void addView(TextAnnotation record) throws AnnotatorException {
if (!record.hasView(tokensfield) || !record.hasView(sentencesfield) || !record.hasView(posfield)) {
String msg = "Record must be tokenized, sentence split, and POS-tagged first.";
logger.error(msg);
throw new AnnotatorException(msg);
}
List<Constituent> tags = record.getView(posfield).getConstituents();
List<Token> lbjTokens = LBJavaUtils.recordToLBJTokens(record);
View chunkView = new SpanLabelView(ViewNames.SHALLOW_PARSE, this.NAME, record, 1.0);
int currentChunkStart = 0;
int currentChunkEnd = 0;
String clabel = "";
Constituent previous = null;
int tcounter = 0;
for (Token lbjtoken : lbjTokens) {
Constituent current = tags.get(tcounter);
tagger.discreteValue(lbjtoken);
logger.debug("{} {}", lbjtoken.toString(), (null == lbjtoken.type) ? "NULL" : lbjtoken.type);
// what happens if we see an Inside tag -- even if it doesn't follow a Before tag
if (null != lbjtoken.type && lbjtoken.type.charAt(0) == 'I') {
if (lbjtoken.type.length() < 3)
throw new IllegalArgumentException("Chunker word label '" + lbjtoken.type + "' is too short!");
if (// we must have just seen an Outside tag and possibly completed
null == clabel) // a chunk
{
// modify lbjToken.type for later ifs
lbjtoken.type = "B" + lbjtoken.type.substring(1);
} else if (clabel.length() >= 3 && !clabel.equals(lbjtoken.type.substring(2))) {
// trying to avoid mysterious null pointer exception...
lbjtoken.type = "B" + lbjtoken.type.substring(1);
}
}
if ((lbjtoken.type.charAt(0) == 'B' || lbjtoken.type.charAt(0) == 'O') && clabel != null) {
if (previous != null) {
currentChunkEnd = previous.getEndSpan();
Constituent label = new Constituent(clabel, ViewNames.SHALLOW_PARSE, record, currentChunkStart, currentChunkEnd);
chunkView.addConstituent(label);
clabel = null;
}
// else no chunk in progress (we are at the start of the doc)
}
if (lbjtoken.type.charAt(0) == 'B') {
currentChunkStart = current.getStartSpan();
clabel = lbjtoken.type.substring(2);
}
previous = current;
tcounter++;
}
if (clabel != null && null != previous) {
currentChunkEnd = previous.getEndSpan();
Constituent label = new Constituent(clabel, ViewNames.SHALLOW_PARSE, record, currentChunkStart, currentChunkEnd);
chunkView.addConstituent(label);
}
record.addView(ViewNames.SHALLOW_PARSE, chunkView);
// chunkView;
return;
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class PennTreebankReader method next.
/**
* return the next annotation object. Don't forget to increment currentAnnotationId.
*
* @return an annotation object.
*/
@Override
public TextAnnotation next() {
// first check if we don't have any more lines
if (lines == null || currentLineId == lines.size()) {
// check if the current section has no more files
if (currentFileId == currentSectionFiles.length) {
// check if there are more sections
if (currentSectionId == sections.length) {
return null;
}
try {
updateCurrentFiles();
} catch (Exception e) {
e.printStackTrace();
}
currentFileId = 0;
}
try {
lines = LineIO.read(combinedWSJHome + "/" + sections[currentSectionId - 1] + "/" + currentSectionFiles[currentFileId++]);
treeInFile = 0;
} catch (FileNotFoundException e) {
e.printStackTrace();
}
currentLineId = 0;
}
TextAnnotation ta = null;
try {
ta = findNextTree();
} catch (AnnotatorException e) {
e.printStackTrace();
throw new IllegalStateException(e);
}
return ta;
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class DepAnnotatorTest method testDepParser.
@Test
public void testDepParser() throws Exception {
DepAnnotator depParser = new DepAnnotator();
try {
depParser.addView(ta);
assertTrue(ta.hasView(ViewNames.DEPENDENCY));
TreeView depTree = (TreeView) ta.getView(ViewNames.DEPENDENCY);
assertEquals("finished", depTree.getTreeRoot(0).getSurfaceForm());
} catch (AnnotatorException e) {
e.printStackTrace();
fail();
}
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class DepAnnotator method addView.
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
for (String reqView : requiredViews) if (!ta.hasView(reqView))
throw new AnnotatorException("TextAnnotation must have view: " + reqView);
DepInst sent = new DepInst(ta);
DepStruct deptree;
try {
deptree = (DepStruct) model.infSolver.getBestStructure(model.wv, sent);
} catch (Exception e) {
throw new AnnotatorException("Sentence cannot be parsed");
}
TreeView treeView = new TreeView(ViewNames.DEPENDENCY, ta);
int rootPos = findRoot(deptree);
// All the node positions are -1 to account for the extra <root> node added
Pair<String, Integer> nodePair = new Pair<>(sent.forms[rootPos], rootPos - 1);
Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
populateChildren(tree, deptree, sent, rootPos);
treeView.setDependencyTree(0, tree);
ta.addView(ViewNames.DEPENDENCY, treeView);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class AnnotatorLazyInitTest method testLazy.
@Test
public void testLazy() {
SimpleGazetteerAnnotator sga = null;
Properties props = new Properties();
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PATH_TO_DICTIONARIES.key, "/testgazetteers/");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.PHRASE_LENGTH.key, "6");
props.setProperty(SimpleGazetteerAnnotatorConfigurator.IS_LAZILY_INITIALIZED.key, SimpleGazetteerAnnotatorConfigurator.TRUE);
try {
sga = new SimpleGazetteerAnnotator(new ResourceManager(props));
} catch (IOException | URISyntaxException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertFalse(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
TextAnnotation ta = tab.createTextAnnotation("The CIA has no London headquarters, though General Electric does.");
try {
sga.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
fail(e.getMessage());
}
assertTrue(ta.hasView(sga.getViewName()));
assertTrue(sga.isInitialized());
assertTrue(null == sga.dictionaries ? true : sga.dictionaries.size() > 0);
assertTrue(null == sga.dictionariesIgnoreCase ? true : sga.dictionariesIgnoreCase.size() > 0);
assertTrue(ta.hasView(sga.getViewName()));
}
Aggregations