Search in sources :

Example 31 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class Quantifier method getSpans.

/**
     * @param inputTA the tokenized annotation of text input. If this parameter is not available,
     *        the user can pass null, in which case we will tokenize it ourselves.
     */
public List<QuantSpan> getSpans(String text, boolean standardized, TextAnnotation inputTA) throws AnnotatorException {
    TextAnnotation annotation = (inputTA != null) ? inputTA : taBuilder.createTextAnnotation(text);
    List<QuantSpan> quantSpans = new ArrayList<QuantSpan>();
    String[] sentences = new String[annotation.getNumberOfSentences()];
    for (int i = 0; i < annotation.getNumberOfSentences(); ++i) {
        sentences[i] = annotation.getSentence(i).getText();
    }
    // if there is no annotator, initialize it
    if (DataReader.preprocessor == null) {
        DataReader.preprocessor = new Preprocessor(PreprocessorConfigurator.defaults());
    }
    // if it does not include POS or NER_CONLL, add them
    DataReader.preprocessor.annotate(annotation);
    assert annotation.getAvailableViews().contains(ViewNames.POS);
    String previous = "";
    String chunk = "";
    boolean inChunk = false;
    String prediction = "";
    int startPos = 0, endPos = 0, tokenPos = 0;
    List<Constituent> tokens = annotation.getView(ViewNames.TOKENS).getConstituents();
    for (int i = 0; i < tokens.size(); ++i) {
        prediction = chunker.discreteValue(tokens.get(i));
        if (prediction.startsWith("B-") || prediction.startsWith("I-") && !previous.endsWith(prediction.substring(2))) {
            if (!inChunk && tokenPos < annotation.size()) {
                inChunk = true;
                startPos = annotation.getTokenCharacterOffset(tokenPos).getFirst();
            }
        }
        if (inChunk) {
            chunk += tokens.get(i).getSurfaceForm() + " ";
        }
        if (!prediction.equals("O") && tokenPos < annotation.size() && (i == (tokens.size() - 1) || chunker.discreteValue(tokens.get(i + 1)).equals("O") || chunker.discreteValue(tokens.get(i + 1)).startsWith("B-") || !chunker.discreteValue(tokens.get(i + 1)).endsWith(prediction.substring(2)))) {
            endPos = annotation.getTokenCharacterOffset(tokenPos).getSecond() - 1;
            QuantSpan span = new QuantSpan(null, startPos, endPos);
            try {
                if (standardized) {
                    span.object = normalizer.parse(chunk, chunker.discreteValue(tokens.get(i)).substring(2));
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            if (span.object != null)
                quantSpans.add(span);
            inChunk = false;
            chunk = "";
        }
        previous = prediction;
        if (tokenPos < annotation.size() && annotation.getToken(tokenPos).trim().endsWith(tokens.get(i).getSurfaceForm().trim())) {
            tokenPos++;
        }
    }
    return quantSpans;
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException)

Example 32 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class MainClass method annotate.

private static void annotate(String filepath) throws IOException {
    DepAnnotator annotator = new DepAnnotator();
    TextAnnotationBuilder taBuilder = new TokenizerTextAnnotationBuilder(new StatefulTokenizer(true));
    Preprocessor preprocessor = new Preprocessor();
    Files.lines(Paths.get(filepath)).forEach(line -> {
        TextAnnotation ta = taBuilder.createTextAnnotation(line);
        try {
            preprocessor.annotate(ta);
            annotator.addView(ta);
            System.out.println(ta.getView(annotator.getViewName()).toString());
        } catch (AnnotatorException e) {
            e.printStackTrace();
        }
    });
}
Also used : TextAnnotationBuilder(edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) TokenizerTextAnnotationBuilder(edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder) StatefulTokenizer(edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) Preprocessor(edu.illinois.cs.cogcomp.depparse.io.Preprocessor) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Example 33 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class BrownClusterFeatureExtractor method getWordFeatures.

@Override
public Set<Feature> getWordFeatures(TextAnnotation ta, int wordPosition) throws EdisonException {
    lazyLoadClusters(brownClustersFile);
    if (!ta.hasView(viewGenerator.getViewName())) {
        synchronized (BrownClusterFeatureExtractor.class) {
            View view = null;
            try {
                view = viewGenerator.getView(ta);
            } catch (AnnotatorException e) {
                e.printStackTrace();
                throw new EdisonException(e.getMessage());
            }
            ta.addView(viewGenerator.getViewName(), view);
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(viewGenerator.getViewName());
    String word = ta.getToken(wordPosition);
    // What follows has a subtle bug: view.getLabel only gets the first
    // label for the word. A word can have multiple brown clusters though!
    // This has been fixed below.
    // String cluster = view.getLabel(wordPosition);
    //
    // return getBrownClusters(word, cluster);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent c : view.getConstituentsCoveringToken(wordPosition)) {
        String cluster = c.getLabel();
        features.addAll(getBrownClusters(word, cluster));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 34 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class CommaLabeler method addView.

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    // Check that we have the required views
    for (String requiredView : requiredViews) {
        if (!ta.hasView(requiredView))
            throw new AnnotatorException("Missing required view " + requiredView);
    }
    // Create the Comma structure
    CommaSRLSentence sentence = new CommaSRLSentence(ta, ta);
    PredicateArgumentView srlView = new PredicateArgumentView(viewName, "illinois-comma", ta, 1.0d);
    for (Comma comma : sentence.getCommas()) {
        String label = classifier.discreteValue(comma);
        int position = comma.getPosition();
        Constituent predicate = new Constituent("Predicate:" + label, viewName, ta, position, position + 1);
        predicate.addAttribute(PredicateArgumentView.SenseIdentifer, label);
        srlView.addConstituent(predicate);
        Constituent leftArg = comma.getPhraseToLeftOfComma(1);
        if (leftArg != null) {
            Constituent leftArgConst = new Constituent(leftArg.getLabel(), viewName, ta, leftArg.getStartSpan(), leftArg.getEndSpan());
            srlView.addConstituent(leftArgConst);
            srlView.addRelation(new Relation("LeftOf" + label, predicate, leftArgConst, 1.0d));
        }
        Constituent rightArg = comma.getPhraseToRightOfComma(1);
        if (rightArg != null) {
            Constituent rightArgConst = new Constituent(rightArg.getLabel(), viewName, ta, rightArg.getStartSpan(), rightArg.getEndSpan());
            srlView.addConstituent(rightArgConst);
            srlView.addRelation(new Relation("RightOf" + label, predicate, rightArgConst, 1.0d));
        }
    }
    ta.addView(viewName, srlView);
}
Also used : Comma(edu.illinois.cs.cogcomp.comma.datastructures.Comma) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) CommaSRLSentence(edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence) PredicateArgumentView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 35 with AnnotatorException

use of edu.illinois.cs.cogcomp.annotation.AnnotatorException in project cogcomp-nlp by CogComp.

the class IllinoisLemmatizer method addView.

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    View v = null;
    try {
        v = this.createLemmaView(textAnnotation);
    } catch (IOException e) {
        e.printStackTrace();
        String msg = NAME + ".getView(): caught IOException trying to create view: " + e.getMessage();
        throw new AnnotatorException(msg);
    }
    textAnnotation.addView(getViewName(), v);
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) IOException(java.io.IOException) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)

Aggregations

AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)39 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)28 Test (org.junit.Test)14 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)11 IOException (java.io.IOException)8 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)7 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)6 FileNotFoundException (java.io.FileNotFoundException)6 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)4 TextAnnotationBuilder (edu.illinois.cs.cogcomp.annotation.TextAnnotationBuilder)3 Tree (edu.illinois.cs.cogcomp.core.datastructures.trees.Tree)3 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)3 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)3 Properties (java.util.Properties)3 AnnotatorService (edu.illinois.cs.cogcomp.annotation.AnnotatorService)2 Pair (edu.illinois.cs.cogcomp.core.datastructures.Pair)2 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)2 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 Token (edu.illinois.cs.cogcomp.lbjava.nlp.seg.Token)2