use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class Quantifier method getSpans.
/**
* @param inputTA the tokenized annotation of text input. If this parameter is not available,
* the user can pass null, in which case we will tokenize it ourselves.
*/
public List<QuantSpan> getSpans(String text, boolean standardized, TextAnnotation inputTA) throws AnnotatorException {
TextAnnotation annotation = (inputTA != null) ? inputTA : taBuilder.createTextAnnotation(text);
List<QuantSpan> quantSpans = new ArrayList<QuantSpan>();
String[] sentences = new String[annotation.getNumberOfSentences()];
for (int i = 0; i < annotation.getNumberOfSentences(); ++i) {
sentences[i] = annotation.getSentence(i).getText();
}
// if there is no annotator, initialize it
if (DataReader.preprocessor == null) {
DataReader.preprocessor = new Preprocessor(PreprocessorConfigurator.defaults());
}
// if it does not include POS or NER_CONLL, add them
DataReader.preprocessor.annotate(annotation);
assert annotation.getAvailableViews().contains(ViewNames.POS);
String previous = "";
String chunk = "";
boolean inChunk = false;
String prediction = "";
int startPos = 0, endPos = 0, tokenPos = 0;
List<Constituent> tokens = annotation.getView(ViewNames.TOKENS).getConstituents();
for (int i = 0; i < tokens.size(); ++i) {
prediction = chunker.discreteValue(tokens.get(i));
if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2))) {
if (!inChunk && tokenPos < annotation.size()) {
inChunk = true;
startPos = annotation.getTokenCharacterOffset(tokenPos).getFirst();
}
}
if (inChunk) {
chunk += tokens.get(i).getSurfaceForm() + " ";
}
if (!prediction.equals("O") && tokenPos < annotation.size() && (i == (tokens.size() - 1) || chunker.discreteValue(tokens.get(i + 1)).equals("O") || chunker.discreteValue(tokens.get(i + 1)).startsWith("B-") || !chunker.discreteValue(tokens.get(i + 1)).endsWith(prediction.substring(2)))) {
endPos = annotation.getTokenCharacterOffset(tokenPos).getSecond() - 1;
QuantSpan span = new QuantSpan(null, startPos, endPos);
try {
if (standardized) {
span.object = normalizer.parse(chunk, chunker.discreteValue(tokens.get(i)).substring(2));
}
} catch (Exception e) {
e.printStackTrace();
}
if (span.object != null)
quantSpans.add(span);
inChunk = false;
chunk = "";
}
previous = prediction;
if (tokenPos < annotation.size() && annotation.getToken(tokenPos).trim().endsWith(tokens.get(i).getSurfaceForm().trim())) {
tokenPos++;
}
}
return quantSpans;
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class MainClass method annotate.
private static void annotate(String filepath) throws IOException {
DepAnnotator annotator = new DepAnnotator();
TextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true));
Preprocessor preprocessor = new Preprocessor();
Files.lines(Paths.get(filepath)).forEach(line -> {
TextAnnotation ta = taBuilder.createTextAnnotation(line);
try {
preprocessor.annotate(ta);
annotator.addView(ta);
System.out.println(ta.getView(annotator.getViewName()).toString());
} catch (AnnotatorException e) {
e.printStackTrace();
}
});
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class BrownClusterFeatureExtractor method getWordFeatures.
@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
lazyLoadClusters(brownClustersFile);
if (!ta.hasView(viewGenerator.getViewName())) {
synchronized (BrownClusterFeatureExtractor.class) {
View view = null;
try {
view = viewGenerator.getView(ta);
} catch (AnnotatorException e) {
e.printStackTrace();
throw new EdisonException(e.getMessage());
}
ta.addView(viewGenerator.getViewName(), view);
}
}
SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
String word = ta.getToken(wordPosition);
// What follows has a subtle bug: view.getLabel only gets the first
// label for the word. A word can have multiple brown clusters though!
// This has been fixed below.
// String cluster = view.getLabel(wordPosition);
//
// return getBrownClusters(word, cluster);
Set<Feature> features = new LinkedHashSet<>();
for (Constituent c : view.getConstituentsCoveringToken(wordPosition)) {
String cluster = c.getLabel();
features.addAll(getBrownClusters(word, cluster));
}
return features;
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class CommaLabeler method addView.
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
// Check that we have the required views
for (String requiredView : requiredViews) {
if (!ta.hasView(requiredView))
throw new AnnotatorException("Missing required view " + requiredView);
}
// Create the Comma structure
CommaSRLSentence sentence = new CommaSRLSentence(ta, ta);
PredicateArgumentView srlView = new PredicateArgumentView(viewName, "illinois-comma", ta, 1.0d);
for (Comma comma : sentence.getCommas()) {
String label = classifier.discreteValue(comma);
int position = comma.getPosition();
Constituent predicate = new Constituent("Predicate:" + label, viewName, ta, position, position + 1);
predicate.addAttribute(PredicateArgumentView.SenseIdentifer, label);
srlView.addConstituent(predicate);
Constituent leftArg = comma.getPhraseToLeftOfComma(1);
if (leftArg != null) {
Constituent leftArgConst = new Constituent(leftArg.getLabel(), viewName, ta, leftArg.getStartSpan(), leftArg.getEndSpan());
srlView.addConstituent(leftArgConst);
srlView.addRelation(new Relation("LeftOf" + label, predicate, leftArgConst, 1.0d));
}
Constituent rightArg = comma.getPhraseToRightOfComma(1);
if (rightArg != null) {
Constituent rightArgConst = new Constituent(rightArg.getLabel(), viewName, ta, rightArg.getStartSpan(), rightArg.getEndSpan());
srlView.addConstituent(rightArgConst);
srlView.addRelation(new Relation("RightOf" + label, predicate, rightArgConst, 1.0d));
}
}
ta.addView(viewName, srlView);
}
use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.
the class IllinoisLemmatizer method addView.
@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
View v = null;
try {
v = this.createLemmaView(textAnnotation);
} catch (IOException e) {
e.printStackTrace();
String msg = NAME + ".getView(): caught IOException trying to create view: " + e.getMessage();
throw new AnnotatorException(msg);
}
textAnnotation.addView(getViewName(), v);
}
Aggregations